Apply by doing: cd /usr/src patch -p0 < 018_tcp.patch Rebuild your kernel. Update headers. make includes Then rebuild and install sysctl: cd sbin/sysctl make depend make make install Index: sys/netinet/tcp_input.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.125 retrieving revision 1.125.2.2 diff -u -p -r1.125 -r1.125.2.2 --- sys/netinet/tcp_input.c 14 Feb 2003 17:54:46 -0000 1.125 +++ sys/netinet/tcp_input.c 3 Mar 2004 08:40:07 -0000 1.125.2.2 @@ -204,11 +204,23 @@ tcp_reass(tp, th, m, tlen) * Allocate a new queue entry, before we throw away any data. * If we can't, just drop the packet. XXX */ - tiqe = pool_get(&ipqent_pool, PR_NOWAIT); + tiqe = pool_get(&tcpqe_pool, PR_NOWAIT); if (tiqe == NULL) { - tcpstat.tcps_rcvmemdrop++; - m_freem(m); - return (0); + tiqe = LIST_FIRST(&tp->segq); + if (tiqe != NULL && th->th_seq == tp->rcv_nxt) { + /* Reuse last entry since new segment fills a hole */ + while ((p = LIST_NEXT(tiqe, ipqe_q)) != NULL) + tiqe = p; + m_freem(tiqe->ipqe_m); + LIST_REMOVE(tiqe, ipqe_q); + } + if (tiqe == NULL || th->th_seq != tp->rcv_nxt) { + /* Flush fragments for this connection */ + tcp_freeq(tp); + tcpstat.tcps_rcvmemdrop++; + m_freem(m); + return (0); + } } /* @@ -235,7 +247,7 @@ tcp_reass(tp, th, m, tlen) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlen; m_freem(m); - pool_put(&ipqent_pool, tiqe); + pool_put(&tcpqe_pool, tiqe); return (0); } m_adj(m, i); @@ -265,7 +277,7 @@ tcp_reass(tp, th, m, tlen) nq = q->ipqe_q.le_next; m_freem(q->ipqe_m); LIST_REMOVE(q, ipqe_q); - pool_put(&ipqent_pool, q); + pool_put(&tcpqe_pool, q); } /* Insert the new fragment queue entry into place. */ @@ -301,7 +313,7 @@ present: m_freem(q->ipqe_m); else sbappendstream(&so->so_rcv, q->ipqe_m); - pool_put(&ipqent_pool, q); + pool_put(&tcpqe_pool, q); q = nq; } while (q != NULL && q->ipqe_tcp->th_seq == tp->rcv_nxt); sorwakeup(so); @@ -1388,8 +1400,10 @@ findpcb: tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } + tcp_reass_lock(tp); (void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0, &tlen); + tcp_reass_unlock(tp); /* * if we didn't have to retransmit the SYN, * use its rtt as our initial srtt & rtt var. @@ -1648,8 +1662,10 @@ trimthenstep6: tp->snd_scale = tp->requested_s_scale; tp->rcv_scale = tp->request_r_scale; } + tcp_reass_lock(tp); (void) tcp_reass(tp, (struct tcphdr *)0, (struct mbuf *)0, &tlen); + tcp_reass_unlock(tp); tp->snd_wl1 = th->th_seq - 1; /* fall into ... */ @@ -2152,8 +2168,10 @@ dodata: /* XXX */ */ if ((tlen || (tiflags & TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { + tcp_reass_lock(tp); if (th->th_seq == tp->rcv_nxt && tp->segq.lh_first == NULL && tp->t_state == TCPS_ESTABLISHED) { + tcp_reass_unlock(tp); TCP_SETUP_ACK(tp, tiflags); tp->rcv_nxt += tlen; tiflags = th->th_flags & TH_FIN; @@ -2170,6 +2188,7 @@ dodata: /* XXX */ } else { m_adj(m, hdroptlen); tiflags = tcp_reass(tp, th, m, &tlen); + tcp_reass_unlock(tp); tp->t_flags |= TF_ACKNOW; } #ifdef TCP_SACK Index: sys/netinet/tcp_subr.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.65 retrieving revision 1.65.4.2 diff -u -p -r1.65 -r1.65.4.2 --- sys/netinet/tcp_subr.c 28 Aug 2002 15:43:03 -0000 1.65 +++ sys/netinet/tcp_subr.c 3 Mar 2004 08:40:07 -0000 1.65.4.2 @@ -141,17 +141,18 @@ int tcp_do_ecn = 0; /* RFC3168 ECN enab #endif int tcbhashsize = TCBHASHSIZE; +int tcp_reass_limit = NMBCLUSTERS / 2; /* hardlimit for tcpqe_pool */ + #ifdef INET6 extern int ip6_defhlim; #endif /* INET6 */ struct pool tcpcb_pool; +struct pool tcpqe_pool; #ifdef TCP_SACK struct pool sackhl_pool; #endif -int tcp_freeq(struct tcpcb *); - struct tcpstat tcpstat; /* tcp statistics */ /* @@ -165,6 +166,9 @@ tcp_init() #endif /* TCP_COMPAT_42 */ pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl", NULL); + pool_init(&tcpqe_pool, sizeof(struct ipqent), 0, 0, 0, "tcpqepl", + NULL); + pool_sethardlimit(&tcpqe_pool, tcp_reass_limit, NULL, 0); #ifdef TCP_SACK pool_init(&sackhl_pool, sizeof(struct sackhole), 0, 0, 0, "sackhlpl", NULL); @@ -670,7 +674,9 @@ tcp_close(struct tcpcb *tp) #endif /* RTV_RTT */ /* free the reassembly queue, if any */ + tcp_reass_lock(tp); tcp_freeq(tp); + tcp_reass_unlock(tp); tcp_canceltimers(tp); TCP_CLEAR_DELACK(tp); @@ -703,7 +709,7 @@ tcp_freeq(struct tcpcb *tp) while ((qe = LIST_FIRST(&tp->segq)) != NULL) { LIST_REMOVE(qe, ipqe_q); m_freem(qe->ipqe_m); - pool_put(&ipqent_pool, qe); + pool_put(&tcpqe_pool, qe); rv = 1; } return (rv); @@ -712,7 +718,20 @@ tcp_freeq(struct tcpcb *tp) void tcp_drain() { - + struct inpcb *inp; + + /* called at splimp() */ + CIRCLEQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) { + struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; + + if (tp != NULL) { + if (tcp_reass_lock_try(tp) == 0) + continue; + if (tcp_freeq(tp)) + tcpstat.tcps_conndrained++; + tcp_reass_unlock(tp); + } + } } /* Index: sys/netinet/tcp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.68 retrieving revision 1.68.2.1 diff -u -p -r1.68 -r1.68.2.1 --- sys/netinet/tcp_usrreq.c 12 Feb 2003 14:41:08 -0000 1.68 +++ sys/netinet/tcp_usrreq.c 3 Mar 2004 08:40:07 -0000 1.68.2.1 @@ -877,6 +877,7 @@ tcp_sysctl(name, namelen, oldp, oldlenp, void *newp; size_t newlen; { + int error, nval; /* All sysctl names at this level are terminal. */ if (namelen != 1) @@ -931,6 +932,18 @@ tcp_sysctl(name, namelen, oldp, oldlenp, return (sysctl_int(oldp, oldlenp, newp, newlen, &tcp_do_ecn)); #endif + case TCPCTL_REASS_LIMIT: + nval = tcp_reass_limit; + error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); + if (error) + return (error); + if (nval != tcp_reass_limit) { + error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); + if (error) + return (error); + tcp_reass_limit = nval; + } + return (0); default: return (ENOPROTOOPT); } Index: sys/netinet/tcp_var.h =================================================================== RCS file: /cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.45 retrieving revision 1.45.2.3 diff -u -p -r1.45 -r1.45.2.3 --- sys/netinet/tcp_var.h 12 Feb 2003 14:41:08 -0000 1.45 +++ sys/netinet/tcp_var.h 4 Mar 2004 03:35:15 -0000 1.45.2.3 @@ -86,6 +86,7 @@ struct tcpcb { #define TF_SEND_CWR 0x00020000 /* send CWR in next seg */ #define TF_DISABLE_ECN 0x00040000 /* disable ECN for this connection */ #endif +#define TF_REASSLOCK 0x00080000 /* reassembling or draining */ struct mbuf *t_template; /* skeletal packet for transmit */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ @@ -209,6 +210,35 @@ do { \ timeout_del(&(tp)->t_delack_to); \ } \ } while (/*CONSTCOND*/0) + +static __inline int tcp_reass_lock_try(struct tcpcb *); +static __inline void tcp_reass_unlock(struct tcpcb *); +#define tcp_reass_lock(tp) tcp_reass_lock_try(tp) + +static __inline int +tcp_reass_lock_try(struct tcpcb *tp) +{ + int s; + + s = splimp(); + if (tp->t_flags & TF_REASSLOCK) { + splx(s); + return (0); + } + tp->t_flags |= TF_REASSLOCK; + splx(s); + return (1); +} + +static __inline void +tcp_reass_unlock(struct tcpcb *tp) +{ + int s; + + s = splimp(); + tp->t_flags &= ~TF_REASSLOCK; + splx(s); +} #endif /* _KERNEL */ /* @@ -323,6 +353,8 @@ struct tcpstat { u_int32_t tcps_cwr_ecn; /* # of cwnd reduced by ecn */ u_int32_t tcps_cwr_frecovery; /* # of cwnd reduced by fastrecovery */ u_int32_t tcps_cwr_timeout; /* # of cwnd reduced by timeout */ + + u_int64_t tcps_conndrained; /* # of connections drained */ }; /* @@ -343,7 +375,8 @@ struct tcpstat { #define TCPCTL_RSTPPSLIMIT 12 /* RST pps limit */ #define TCPCTL_ACK_ON_PUSH 13 /* ACK immediately on PUSH */ #define TCPCTL_ECN 14 /* RFC3168 ECN */ -#define TCPCTL_MAXID 15 +#define TCPCTL_REASS_LIMIT 15 /* max entries for tcp reass queues */ +#define TCPCTL_MAXID 16 #define TCPCTL_NAMES { \ { 0, 0 }, \ @@ -361,6 +394,7 @@ struct tcpstat { { "rstppslimit", CTLTYPE_INT }, \ { "ackonpush", CTLTYPE_INT }, \ { "ecn", CTLTYPE_INT }, \ + { "reasslimit", CTLTYPE_INT }, \ } struct tcp_ident_mapping { @@ -381,10 +415,14 @@ extern struct pool sackhl_pool; #endif extern int tcp_do_ecn; /* RFC3168 ECN enabled/disabled? */ +extern struct pool tcpqe_pool; +extern int tcp_reass_limit; /* max entries for tcp reass queues */ + int tcp_attach(struct socket *); void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); +int tcp_freeq(struct tcpcb *); #if defined(INET6) && !defined(TCP6) void tcp6_ctlinput(int, struct sockaddr *, void *); #endif