Apply by doing: cd /usr/src patch -p0 < 013_tcp.patch Rebuild your kernel. Update headers. make includes Then rebuild and install sysctl: cd sbin/sysctl make depend make make install Index: sys/netinet/tcp_input.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.132 retrieving revision 1.132.2.2 diff -u -p -r1.132 -r1.132.2.2 --- sys/netinet/tcp_input.c 9 Jul 2003 22:03:16 -0000 1.132 +++ sys/netinet/tcp_input.c 3 Mar 2004 08:37:05 -0000 1.132.2.2 @@ -200,11 +200,23 @@ tcp_reass(tp, th, m, tlen) * Allocate a new queue entry, before we throw away any data. * If we can't, just drop the packet. XXX */ - tiqe = pool_get(&ipqent_pool, PR_NOWAIT); + tiqe = pool_get(&tcpqe_pool, PR_NOWAIT); if (tiqe == NULL) { - tcpstat.tcps_rcvmemdrop++; - m_freem(m); - return (0); + tiqe = LIST_FIRST(&tp->segq); + if (tiqe != NULL && th->th_seq == tp->rcv_nxt) { + /* Reuse last entry since new segment fills a hole */ + while ((p = LIST_NEXT(tiqe, ipqe_q)) != NULL) + tiqe = p; + m_freem(tiqe->ipqe_m); + LIST_REMOVE(tiqe, ipqe_q); + } + if (tiqe == NULL || th->th_seq != tp->rcv_nxt) { + /* Flush fragments for this connection */ + tcp_freeq(tp); + tcpstat.tcps_rcvmemdrop++; + m_freem(m); + return (0); + } } /* @@ -231,7 +243,7 @@ tcp_reass(tp, th, m, tlen) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlen; m_freem(m); - pool_put(&ipqent_pool, tiqe); + pool_put(&tcpqe_pool, tiqe); return (0); } m_adj(m, i); @@ -261,7 +273,7 @@ tcp_reass(tp, th, m, tlen) nq = q->ipqe_q.le_next; m_freem(q->ipqe_m); LIST_REMOVE(q, ipqe_q); - pool_put(&ipqent_pool, q); + pool_put(&tcpqe_pool, q); } /* Insert the new fragment queue entry into place. */ @@ -297,7 +309,7 @@ present: m_freem(q->ipqe_m); else sbappendstream(&so->so_rcv, q->ipqe_m); - pool_put(&ipqent_pool, q); + pool_put(&tcpqe_pool, q); q = nq; } while (q != NULL && q->ipqe_tcp->th_seq == tp->rcv_nxt); sorwakeup(so); @@ -1380,8 +1392,10 @@ findpcb: tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } + tcp_reass_lock(tp); (void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0, &tlen); + tcp_reass_unlock(tp); /* * if we didn't have to retransmit the SYN, * use its rtt as our initial srtt & rtt var. @@ -1640,8 +1654,10 @@ trimthenstep6: tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } + tcp_reass_lock(tp); (void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0, &tlen); + tcp_reass_unlock(tp); tp->snd_wl1 = th->th_seq - 1; /* fall into ... */ @@ -2144,8 +2160,10 @@ dodata: /* XXX */ */ if ((tlen || (tiflags & TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { + tcp_reass_lock(tp); if (th->th_seq == tp->rcv_nxt && tp->segq.lh_first == NULL && tp->t_state == TCPS_ESTABLISHED) { + tcp_reass_unlock(tp); TCP_SETUP_ACK(tp, tiflags); tp->rcv_nxt += tlen; tiflags = th->th_flags & TH_FIN; @@ -2162,6 +2180,7 @@ dodata: /* XXX */ } else { m_adj(m, hdroptlen); tiflags = tcp_reass(tp, th, m, &tlen); + tcp_reass_unlock(tp); tp->t_flags |= TF_ACKNOW; } #ifdef TCP_SACK Index: sys/netinet/tcp_subr.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.68 retrieving revision 1.68.2.2 diff -u -p -r1.68 -r1.68.2.2 --- sys/netinet/tcp_subr.c 9 Jul 2003 22:03:16 -0000 1.68 +++ sys/netinet/tcp_subr.c 3 Mar 2004 08:37:05 -0000 1.68.2.2 @@ -139,17 +139,18 @@ u_int32_t tcp_now; #endif int tcbhashsize = TCBHASHSIZE; +int tcp_reass_limit = NMBCLUSTERS / 2; /* hardlimit for tcpqe_pool */ + #ifdef INET6 extern int ip6_defhlim; #endif /* INET6 */ struct pool tcpcb_pool; +struct pool tcpqe_pool; #ifdef TCP_SACK struct pool sackhl_pool; #endif -int tcp_freeq(struct tcpcb *); - struct tcpstat tcpstat; /* tcp statistics */ tcp_seq tcp_iss; @@ -164,6 +165,9 @@ tcp_init() #endif /* TCP_COMPAT_42 */ pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl", NULL); + pool_init(&tcpqe_pool, sizeof(struct ipqent), 0, 0, 0, "tcpqepl", + NULL); + pool_sethardlimit(&tcpqe_pool, tcp_reass_limit, NULL, 0); #ifdef TCP_SACK pool_init(&sackhl_pool, sizeof(struct sackhole), 0, 0, 0, "sackhlpl", NULL); @@ -669,7 +673,9 @@ tcp_close(struct tcpcb *tp) #endif /* RTV_RTT */ /* free the reassembly queue, if any */ + tcp_reass_lock(tp); tcp_freeq(tp); + tcp_reass_unlock(tp); tcp_canceltimers(tp); TCP_CLEAR_DELACK(tp); @@ -702,7 +708,7 @@ tcp_freeq(struct tcpcb *tp) while ((qe = LIST_FIRST(&tp->segq)) != NULL) { LIST_REMOVE(qe, ipqe_q); m_freem(qe->ipqe_m); - pool_put(&ipqent_pool, qe); + pool_put(&tcpqe_pool, qe); rv = 1; } return (rv); @@ -711,7 +717,20 @@ tcp_freeq(struct tcpcb *tp) void tcp_drain() { - + struct inpcb *inp; + + /* called at splimp() */ + CIRCLEQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) { + struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; + + if (tp != NULL) { + if (tcp_reass_lock_try(tp) == 0) + continue; + if (tcp_freeq(tp)) + tcpstat.tcps_conndrained++; + tcp_reass_unlock(tp); + } + } } /* Index: sys/netinet/tcp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.71 retrieving revision 1.71.2.1 diff -u -p -r1.71 -r1.71.2.1 --- sys/netinet/tcp_usrreq.c 9 Jun 2003 07:40:25 -0000 1.71 +++ sys/netinet/tcp_usrreq.c 3 Mar 2004 08:37:05 -0000 1.71.2.1 @@ -873,6 +873,7 @@ tcp_sysctl(name, namelen, oldp, oldlenp, void *newp; size_t newlen; { + int error, nval; /* All sysctl names at this level are terminal. */ if (namelen != 1) @@ -927,6 +928,18 @@ tcp_sysctl(name, namelen, oldp, oldlenp, return (sysctl_int(oldp, oldlenp, newp, newlen, &tcp_do_ecn)); #endif + case TCPCTL_REASS_LIMIT: + nval = tcp_reass_limit; + error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); + if (error) + return (error); + if (nval != tcp_reass_limit) { + error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); + if (error) + return (error); + tcp_reass_limit = nval; + } + return (0); default: return (ENOPROTOOPT); } Index: sys/netinet/tcp_var.h =================================================================== RCS file: /cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.51 retrieving revision 1.51.2.3 diff -u -p -r1.51 -r1.51.2.3 --- sys/netinet/tcp_var.h 9 Jun 2003 07:40:25 -0000 1.51 +++ sys/netinet/tcp_var.h 4 Mar 2004 03:28:09 -0000 1.51.2.3 @@ -82,6 +82,7 @@ struct tcpcb { #define TF_SEND_CWR 0x00020000 /* send CWR in next seg */ #define TF_DISABLE_ECN 0x00040000 /* disable ECN for this connection */ #endif +#define TF_REASSLOCK 0x00080000 /* reassembling or draining */ struct mbuf *t_template; /* skeletal packet for transmit */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ @@ -205,6 +206,35 @@ do { \ timeout_del(&(tp)->t_delack_to); \ } \ } while (/*CONSTCOND*/0) + +static __inline int tcp_reass_lock_try(struct tcpcb *); +static __inline void tcp_reass_unlock(struct tcpcb *); +#define tcp_reass_lock(tp) tcp_reass_lock_try(tp) + +static __inline int +tcp_reass_lock_try(struct tcpcb *tp) +{ + int s; + + s = splimp(); + if (tp->t_flags & TF_REASSLOCK) { + splx(s); + return (0); + } + tp->t_flags |= TF_REASSLOCK; + splx(s); + return (1); +} + +static __inline void +tcp_reass_unlock(struct tcpcb *tp) +{ + int s; + + s = splimp(); + tp->t_flags &= ~TF_REASSLOCK; + splx(s); +} #endif /* _KERNEL */ /* @@ -319,6 +349,8 @@ struct tcpstat { u_int32_t tcps_cwr_ecn; /* # of cwnd reduced by ecn */ u_int32_t tcps_cwr_frecovery; /* # of cwnd reduced by fastrecovery */ u_int32_t tcps_cwr_timeout; /* # of cwnd reduced by timeout */ + + u_int64_t tcps_conndrained; /* # of connections drained */ }; /* @@ -339,7 +371,8 @@ struct tcpstat { #define TCPCTL_RSTPPSLIMIT 12 /* RST pps limit */ #define TCPCTL_ACK_ON_PUSH 13 /* ACK immediately on PUSH */ #define TCPCTL_ECN 14 /* RFC3168 ECN */ -#define TCPCTL_MAXID 15 +#define TCPCTL_REASS_LIMIT 15 /* max entries for tcp reass queues */ +#define TCPCTL_MAXID 16 #define TCPCTL_NAMES { \ { 0, 0 }, \ @@ -357,6 +390,7 @@ struct tcpstat { { "rstppslimit", CTLTYPE_INT }, \ { "ackonpush", CTLTYPE_INT }, \ { "ecn", CTLTYPE_INT }, \ + { "reasslimit", CTLTYPE_INT }, \ } struct tcp_ident_mapping { @@ -377,10 +411,14 @@ extern struct pool sackhl_pool; #endif extern int tcp_do_ecn; /* RFC3168 ECN enabled/disabled? */ +extern struct pool tcpqe_pool; +extern int tcp_reass_limit; /* max entries for tcp reass queues */ + int tcp_attach(struct socket *); void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); +int tcp_freeq(struct tcpcb *); #if defined(INET6) && !defined(TCP6) void tcp6_ctlinput(int, struct sockaddr *, void *); #endif