tcp_sack.c source code [src/src/sys/netinet/tcp_sack.c]

1	/ $NetBSD: tcp_sack.c,v 1.32 2015/08/24 22:21:26 pooka Exp $ /
2
3	/*
4	* Copyright (c) 2005 The NetBSD Foundation, Inc.
5	* All rights reserved.
6	*
7	* This code is derived from software contributed to The NetBSD Foundation
8	* by Kentaro A. Kurahone.
9	*
10	* Redistribution and use in source and binary forms, with or without
11	* modification, are permitted provided that the following conditions
12	* are met:
13	* 1. Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29	* POSSIBILITY OF SUCH DAMAGE.
30	*/
31
32	/*
33	* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
34	* The Regents of the University of California. All rights reserved.
35	*
36	* Redistribution and use in source and binary forms, with or without
37	* modification, are permitted provided that the following conditions
38	* are met:
39	* 1. Redistributions of source code must retain the above copyright
40	* notice, this list of conditions and the following disclaimer.
41	* 2. Redistributions in binary form must reproduce the above copyright
42	* notice, this list of conditions and the following disclaimer in the
43	* documentation and/or other materials provided with the distribution.
44	* 4. Neither the name of the University nor the names of its contributors
45	* may be used to endorse or promote products derived from this software
46	* without specific prior written permission.
47	*
48	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58	* SUCH DAMAGE.
59	*
60	* @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
61	* $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
62	*/
63
64	/*
65	* @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
66	*
67	* NRL grants permission for redistribution and use in source and binary
68	* forms, with or without modification, of the software and documentation
69	* created at NRL provided that the following conditions are met:
70	*
71	* 1. Redistributions of source code must retain the above copyright
72	* notice, this list of conditions and the following disclaimer.
73	* 2. Redistributions in binary form must reproduce the above copyright
74	* notice, this list of conditions and the following disclaimer in the
75	* documentation and/or other materials provided with the distribution.
76	* 3. All advertising materials mentioning features or use of this software
77	* must display the following acknowledgements:
78	* This product includes software developed by the University of
79	* California, Berkeley and its contributors.
80	* This product includes software developed at the Information
81	* Technology Division, US Naval Research Laboratory.
82	* 4. Neither the name of the NRL nor the names of its contributors
83	* may be used to endorse or promote products derived from this software
84	* without specific prior written permission.
85	*
86	* THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87	* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89	* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
90	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97	*
98	* The views and conclusions contained in the software and documentation
99	* are those of the authors and should not be interpreted as representing
100	* official policies, either expressed or implied, of the US Naval
101	* Research Laboratory (NRL).
102	*/
103
104	#include <sys/cdefs.h>
105	__KERNEL_RCSID(`0`, "$NetBSD: tcp_sack.c,v 1.32 2015/08/24 22:21:26 pooka Exp $");
106
107	#ifdef _KERNEL_OPT
108	#include "opt_inet.h"
109	#include "opt_inet_csum.h"
110	#include "opt_tcp_debug.h"
111	#include "opt_ddb.h"
112	#endif
113
114	#include <sys/param.h>
115	#include <sys/systm.h>
116	#include <sys/mbuf.h>
117	#include <sys/protosw.h>
118	#include <sys/socket.h>
119	#include <sys/socketvar.h>
120	#include <sys/errno.h>
121	#include <sys/syslog.h>
122	#include <sys/pool.h>
123	#include <sys/domain.h>
124	#include <sys/kernel.h>
125
126	#include <net/if.h>
127	#include <net/route.h>
128	#include <net/if_types.h>
129
130	#include <netinet/in.h>
131	#include <netinet/in_systm.h>
132	#include <netinet/ip.h>
133	#include <netinet/in_pcb.h>
134	#include <netinet/in_var.h>
135	#include <netinet/ip_var.h>
136
137	#ifdef INET6
138	#ifndef INET
139	#include <netinet/in.h>
140	#endif
141	#include <netinet/ip6.h>
142	#include <netinet6/ip6_var.h>
143	#include <netinet6/in6_pcb.h>
144	#include <netinet6/ip6_var.h>
145	#include <netinet6/in6_var.h>
146	#include <netinet/icmp6.h>
147	#include <netinet6/nd6.h>
148	#endif
149
150	#ifndef INET6
151	/ always need ip6.h for IP6_EXTHDR_GET /
152	#include <netinet/ip6.h>
153	#endif
154
155	#include <netinet/tcp.h>
156	#include <netinet/tcp_fsm.h>
157	#include <netinet/tcp_seq.h>
158	#include <netinet/tcp_timer.h>
159	#include <netinet/tcp_var.h>
160	#include <netinet/tcpip.h>
161	#include <netinet/tcp_debug.h>
162
163	/ SACK block pool. /
164	static struct pool sackhole_pool;
165
166	void
167	tcp_sack_init(void)
168	{
169
170	pool_init(&sackhole_pool, sizeof(struct sackhole), `0`, `0`, `0`,
171	"sackholepl", NULL, IPL_SOFTNET);
172	}
173
174	static struct sackhole *
175	sack_allochole(struct tcpcb *tp)
176	{
177	struct sackhole *hole;
178
179	if (tp->snd_numholes >= tcp_sack_tp_maxholes \|\|
180	tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
181	return NULL;
182	}
183	hole = pool_get(&sackhole_pool, PR_NOWAIT);
184	if (hole == NULL) {
185	return NULL;
186	}
187	tp->snd_numholes++;
188	tcp_sack_globalholes++;
189
190	return hole;
191	}
192
193	static struct sackhole *
194	sack_inserthole(struct tcpcb *tp, tcp_seq start, tcp_seq end,
195	struct sackhole *prev)
196	{
197	struct sackhole *hole;
198
199	hole = sack_allochole(tp);
200	if (hole == NULL) {
201	return NULL;
202	}
203	hole->start = hole->rxmit = start;
204	hole->end = end;
205	if (prev != NULL) {
206	TAILQ_INSERT_AFTER(&tp->snd_holes, prev, hole, sackhole_q);
207	} else {
208	TAILQ_INSERT_TAIL(&tp->snd_holes, hole, sackhole_q);
209	}
210	return hole;
211	}
212
213	static struct sackhole *
214	sack_removehole(struct tcpcb tp, struct* sackhole *hole)
215	{
216	struct sackhole *next;
217
218	next = TAILQ_NEXT(hole, sackhole_q);
219	tp->snd_numholes--;
220	tcp_sack_globalholes--;
221	TAILQ_REMOVE(&tp->snd_holes, hole, sackhole_q);
222	pool_put(&sackhole_pool, hole);
223
224	return next;
225	}
226
227	/*
228	* tcp_new_dsack: record the reception of a duplicated segment.
229	*/
230
231	void
232	tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
233	{
234
235	if (TCP_SACK_ENABLED(tp)) {
236	tp->rcv_dsack_block.left = seq;
237	tp->rcv_dsack_block.right = seq + len;
238	tp->rcv_sack_flags \|= TCPSACK_HAVED;
239	}
240	}
241
242	/*
243	* tcp_sack_option: parse the given SACK option and update the scoreboard.
244	*/
245
246	void
247	tcp_sack_option(struct tcpcb tp, const* struct tcphdr th, const* u_char *cp,
248	int optlen)
249	{
250	struct sackblk
251	t_sack_block[(MAX_TCPOPTLEN - `2`) / (sizeof(u_int32_t) * `2`)];
252	struct sackblk *sack = NULL;
253	struct sackhole *cur = NULL;
254	struct sackhole *tmp = NULL;
255	const char *lp = cp + `2`;
256	int i, j, num_sack_blks;
257	tcp_seq left, right, acked;
258
259	/*
260	* If we aren't processing SACK responses, this is not an ACK
261	* or the peer sends us a sack option with invalid length, don't
262	* update the scoreboard.
263	*/
264	if (!TCP_SACK_ENABLED(tp) \|\| ((th->th_flags & TH_ACK) == `0`) \|\|
265	(optlen % `8` != `2` \|\| optlen < `10`)) {
266	return;
267	}
268
269	/*
270	* If we don't want any SACK holes to be allocated, just return.
271	*/
272	if (tcp_sack_globalmaxholes == `0` \|\| tcp_sack_tp_maxholes == `0`) {
273	return;
274	}
275
276	/ If the ACK is outside [snd_una, snd_max], ignore the SACK options. /
277	if (SEQ_LT(th->th_ack, tp->snd_una) \|\| SEQ_GT(th->th_ack, tp->snd_max))
278	return;
279
280	/*
281	* Extract SACK blocks.
282	*
283	* Note that t_sack_block is sorted so that we only need to do
284	* one pass over the sequence number space. (SACK "fast-path")
285	*/
286	num_sack_blks = optlen / `8`;
287	acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
288	for (i = `0`; i < num_sack_blks; i++, lp += sizeof(uint32_t) * `2`) {
289	memcpy(&left, lp, sizeof(uint32_t));
290	memcpy(&right, lp + sizeof(uint32_t), sizeof(uint32_t));
291	left = ntohl(left);
292	right = ntohl(right);
293
294	if (SEQ_LEQ(right, acked) \|\| SEQ_GT(right, tp->snd_max) \|\|
295	SEQ_GEQ(left, right)) {
296	/ SACK entry that's old, or invalid. /
297	i--;
298	num_sack_blks--;
299	continue;
300	}
301
302	/ Insertion sort. /
303	for (j = i; (j > `0`) && SEQ_LT(left, t_sack_block[j - `1`].left);
304	j--) {
305	t_sack_block[j].left = t_sack_block[j - `1`].left;
306	t_sack_block[j].right = t_sack_block[j - `1`].right;
307	}
308	t_sack_block[j].left = left;
309	t_sack_block[j].right = right;
310	}
311
312	/ Update the scoreboard. /
313	cur = TAILQ_FIRST(&tp->snd_holes);
314	for (i = `0`; i < num_sack_blks; i++) {
315	sack = &t_sack_block[i];
316	/*
317	* FACK TCP. Update snd_fack so we can enter Fast
318	* Recovery early.
319	*/
320	if (SEQ_GEQ(sack->right, tp->snd_fack))
321	tp->snd_fack = sack->right;
322
323	if (TAILQ_EMPTY(&tp->snd_holes)) {
324	/ First hole. /
325	cur = sack_inserthole(tp, th->th_ack, sack->left, NULL);
326	if (cur == NULL) {
327	/ ENOBUFS, bail out/
328	return;
329	}
330	tp->rcv_lastsack = sack->right;
331	continue; / With next sack block /
332	}
333
334	/ Go through the list of holes. /
335	while (cur) {
336	if (SEQ_LEQ(sack->right, cur->start))
337	/ SACKs data before the current hole /
338	break; / No use going through more holes /
339
340	if (SEQ_GEQ(sack->left, cur->end)) {
341	/ SACKs data beyond the current hole /
342	cur = TAILQ_NEXT(cur, sackhole_q);
343	continue;
344	}
345
346	if (SEQ_LEQ(sack->left, cur->start)) {
347	/ Data acks at least the beginning of hole /
348	if (SEQ_GEQ(sack->right, cur->end)) {
349	/ Acks entire hole, so delete hole /
350	cur = sack_removehole(tp, cur);
351	break;
352	}
353
354	/ Otherwise, move start of hole forward /
355	cur->start = sack->right;
356	cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
357	break;
358	}
359
360	if (SEQ_GEQ(sack->right, cur->end)) {
361	/ Move end of hole backward. /
362	cur->end = sack->left;
363	cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
364	cur = TAILQ_NEXT(cur, sackhole_q);
365	break;
366	}
367
368	if (SEQ_LT(cur->start, sack->left) &&
369	SEQ_GT(cur->end, sack->right)) {
370	/*
371	* ACKs some data in middle of a hole; need to
372	* split current hole
373	*/
374	tmp = sack_inserthole(tp, sack->right, cur->end,
375	cur);
376	if (tmp == NULL) {
377	return;
378	}
379	tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
380	cur->end = sack->left;
381	cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
382	cur = tmp;
383	break;
384	}
385	}
386
387	/ At this point, we have reached the tail of the list. /
388	if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
389	/*
390	* Need to append new hole at end.
391	*/
392	cur = sack_inserthole(tp, tp->rcv_lastsack, sack->left,
393	NULL);
394	if (cur == NULL) {
395	return;
396	}
397	}
398	if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
399	tp->rcv_lastsack = sack->right;
400	}
401	}
402	}
403
404	/*
405	* tcp_del_sackholes: remove holes covered by a cumulative ACK.
406	*/
407
408	void
409	tcp_del_sackholes(struct tcpcb tp, const* struct tcphdr *th)
410	{
411	/ Max because this could be an older ack that just arrived. /
412	tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
413	th->th_ack : tp->snd_una;
414	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
415
416	while (cur) {
417	if (SEQ_LEQ(cur->end, lastack)) {
418	cur = sack_removehole(tp, cur);
419	} else if (SEQ_LT(cur->start, lastack)) {
420	cur->start = lastack;
421	if (SEQ_LT(cur->rxmit, cur->start))
422	cur->rxmit = cur->start;
423	break;
424	} else
425	break;
426	}
427	}
428
429	/*
430	* tcp_free_sackholes: clear the scoreboard.
431	*/
432
433	void
434	tcp_free_sackholes(struct tcpcb *tp)
435	{
436	struct sackhole *sack;
437
438	/ Free up the SACK hole list. /
439	while ((sack = TAILQ_FIRST(&tp->snd_holes)) != NULL) {
440	sack_removehole(tp, sack);
441	}
442	KASSERT(tp->snd_numholes == `0`);
443	}
444
445	/*
446	* Returns pointer to a sackhole if there are any pending retransmissions;
447	* NULL otherwise.
448	*/
449	struct sackhole *
450	tcp_sack_output(struct tcpcb tp, int* *sack_bytes_rexmt)
451	{
452	struct sackhole *cur = NULL;
453
454	if (!TCP_SACK_ENABLED(tp))
455	return (NULL);
456
457	*sack_bytes_rexmt = `0`;
458	TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
459	if (SEQ_LT(cur->rxmit, cur->end)) {
460	if (SEQ_LT(cur->rxmit, tp->snd_una)) {
461	/ old SACK hole /
462	continue;
463	}
464	*sack_bytes_rexmt += (cur->rxmit - cur->start);
465	break;
466	}
467	*sack_bytes_rexmt += (cur->rxmit - cur->start);
468	}
469
470	return (cur);
471	}
472
473	/*
474	* After a timeout, the SACK list may be rebuilt. This SACK information
475	* should be used to avoid retransmitting SACKed data. This function
476	* traverses the SACK list to see if snd_nxt should be moved forward.
477	*/
478	void
479	tcp_sack_adjust(struct tcpcb *tp)
480	{
481	struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
482	struct sackhole *n = NULL;
483
484	if (TAILQ_EMPTY(&tp->snd_holes))
485	return; / No holes /
486	if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
487	return; / We're already beyond any SACKed blocks /
488
489	/*
490	* Two cases for which we want to advance snd_nxt:
491	* i) snd_nxt lies between end of one hole and beginning of another
492	* ii) snd_nxt lies between end of last hole and rcv_lastsack
493	*/
494	while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
495	if (SEQ_LT(tp->snd_nxt, cur->end))
496	return;
497	if (SEQ_GEQ(tp->snd_nxt, n->start))
498	cur = n;
499	else {
500	tp->snd_nxt = n->start;
501	return;
502	}
503	}
504	if (SEQ_LT(tp->snd_nxt, cur->end))
505	return;
506	tp->snd_nxt = tp->rcv_lastsack;
507
508	return;
509	}
510
511	/*
512	* tcp_sack_numblks: return the number of SACK blocks to send.
513	*/
514
515	int
516	tcp_sack_numblks(const struct tcpcb *tp)
517	{
518	int numblks;
519
520	if (!TCP_SACK_ENABLED(tp)) {
521	return `0`;
522	}
523
524	numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != `0`) ? `1` : `0`) +
525	tp->t_segqlen;
526
527	if (numblks == `0`) {
528	return `0`;
529	}
530
531	if (numblks > TCP_SACK_MAX) {
532	numblks = TCP_SACK_MAX;
533	}
534
535	return numblks;
536	}
537
538	#if defined(DDB)
539	void sack_dump(const struct tcpcb *);
540
541	void
542	sack_dump(const struct tcpcb *tp)
543	{
544	const struct sackhole *cur;
545
546	printf("snd_una=%" PRIu32 ", snd_max=%" PRIu32 "\n",
547	tp->snd_una, tp->snd_max);
548	printf("rcv_lastsack=%" PRIu32 ", snd_fack=%" PRIu32 "\n",
549	tp->rcv_lastsack, tp->snd_fack);
550	printf("numholes=%d\n", tp->snd_numholes);
551	TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
552	printf("\t%" PRIu32 "-%" PRIu32 ", rxmit=%" PRIu32 "\n",
553	cur->start, cur->end, cur->rxmit);
554	}
555	}
556	#endif /* defined(DDB) */
557

Browse the source code of src/src/sys/netinet/tcp_sack.c