aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ccids/ccid2.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp/ccids/ccid2.c')
-rw-r--r--net/dccp/ccids/ccid2.c289
1 files changed, 108 insertions, 181 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..d850e291f87c 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 111 dp->dccps_l_ack_ratio = val;
157} 112}
158 113
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 114static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 115{
169 struct sock *sk = (struct sock *)data; 116 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s;
172 118
173 bh_lock_sock(sk); 119 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 120 if (sock_owned_by_user(sk)) {
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 124
179 ccid2_pr_debug("RTO_EXPIRE\n"); 125 ccid2_pr_debug("RTO_EXPIRE\n");
180 126
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 127 /* back-off timer */
184 hc->tx_rto <<= 1; 128 hc->tx_rto <<= 1;
129 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX;
185 131
186 s = hc->tx_rto / HZ; 132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 133
192 /* adjust pipe, cwnd etc */ 134 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 135 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 136 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 137 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 138 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 139 hc->tx_pipe = 0;
198 140
199 /* clear state about stuff we sent */ 141 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 142 hc->tx_seqt = hc->tx_seqh;
@@ -204,23 +146,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 146 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 147 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 148 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc);
208out: 149out:
209 bh_unlock_sock(sk); 150 bh_unlock_sock(sk);
210 sock_put(sk); 151 sock_put(sk);
211} 152}
212 153
213static void ccid2_start_rto_timer(struct sock *sk) 154static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 155{
225 struct dccp_sock *dp = dccp_sk(sk); 156 struct dccp_sock *dp = dccp_sk(sk);
226 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 157 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 161
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 162 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 163 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 164 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 165
235 next = hc->tx_seqh->ccid2s_next; 166 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 167 /* check if we need to alloc more space */
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 227 }
297#endif 228#endif
298 229
299 /* setup RTO timer */ 230 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 231
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 232#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 233 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 234 struct ccid2_seq *seqp = hc->tx_seqt;
306 235
307 while (seqp != hc->tx_seqh) { 236 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 237 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 238 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 239 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 240 seqp = seqp->ccid2s_next;
312 } 241 }
313 } while (0); 242 } while (0);
314 ccid2_pr_debug("=========\n"); 243 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 244#endif
317} 245}
318 246
@@ -378,17 +306,87 @@ out_invalid_option:
378 return -1; 306 return -1;
379} 307}
380 308
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 309/**
310 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
311 * This code is almost identical with TCP's tcp_rtt_estimator(), since
312 * - it has a higher sampling frequency (recommended by RFC 1323),
313 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
314 * - it is simple (cf. more complex proposals such as Eifel timer or research
315 * which suggests that the gain should be set according to window size),
316 * - in tests it was found to work well with CCID2 [gerrit].
317 */
318static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
382{ 319{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 320 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
321 long m = mrtt ? : 1;
384 322
385 sk_stop_timer(sk, &hc->tx_rtotimer); 323 if (hc->tx_srtt == 0) {
386 ccid2_pr_debug("deleted RTO timer\n"); 324 /* First measurement m */
325 hc->tx_srtt = m << 3;
326 hc->tx_mdev = m << 1;
327
328 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
329 hc->tx_rttvar = hc->tx_mdev_max;
330
331 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
332 } else {
333 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
334 m -= (hc->tx_srtt >> 3);
335 hc->tx_srtt += m;
336
337 /* Similarly, update scaled mdev with regard to |m| */
338 if (m < 0) {
339 m = -m;
340 m -= (hc->tx_mdev >> 2);
341 /*
342 * This neutralises RTO increase when RTT < SRTT - mdev
343 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
344 * in Linux TCP", USENIX 2002, pp. 49-62).
345 */
346 if (m > 0)
347 m >>= 3;
348 } else {
349 m -= (hc->tx_mdev >> 2);
350 }
351 hc->tx_mdev += m;
352
353 if (hc->tx_mdev > hc->tx_mdev_max) {
354 hc->tx_mdev_max = hc->tx_mdev;
355 if (hc->tx_mdev_max > hc->tx_rttvar)
356 hc->tx_rttvar = hc->tx_mdev_max;
357 }
358
359 /*
360 * Decay RTTVAR at most once per flight, exploiting that
361 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
362 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
363 * GAR is a useful bound for FlightSize = pipe.
364 * AWL is probably too low here, as it over-estimates pipe.
365 */
366 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
367 if (hc->tx_mdev_max < hc->tx_rttvar)
368 hc->tx_rttvar -= (hc->tx_rttvar -
369 hc->tx_mdev_max) >> 2;
370 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
371 hc->tx_mdev_max = tcp_rto_min(sk);
372 }
373 }
374
375 /*
376 * Set RTO from SRTT and RTTVAR
377 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
378 * This agrees with RFC 4341, 5:
379 * "Because DCCP does not retransmit data, DCCP does not require
380 * TCP's recommended minimum timeout of one second".
381 */
382 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
383
384 if (hc->tx_rto > DCCP_RTO_MAX)
385 hc->tx_rto = DCCP_RTO_MAX;
387} 386}
388 387
389static inline void ccid2_new_ack(struct sock *sk, 388static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 389 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 390{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 391 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 392
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 400 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 401 hc->tx_packets_acked = 0;
404 } 402 }
405 403 /*
406 /* update RTO */ 404 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 405 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 406 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 407 * in the Ack Vector have earlier sending times.
410 int s; 408 * The cleanest solution is to not use the ccid2s_sent field at all
411 409 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 410 */
413 if (hc->tx_srtt == -1) { 411 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 412}
481 413
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 414static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 415{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 416 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 417
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 418 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 419 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 420 return;
489 } 421 }
490 422
491 hc->tx_last_cong = jiffies; 423 hc->tx_last_cong = ccid2_time_stamp;
492 424
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 425 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 426 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
510 int done = 0; 442 int done = 0;
511 unsigned int maxincr = 0; 443 unsigned int maxincr = 0;
512 444
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 445 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 446 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 447
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
620 seqp->ccid2s_acked = 1; 551 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 552 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 553 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 554 hc->tx_pipe--;
624 } 555 }
625 if (seqp == hc->tx_seqt) { 556 if (seqp == hc->tx_seqt) {
626 done = 1; 557 done = 1;
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 608 * one ack vector.
678 */ 609 */
679 ccid2_congestion_event(sk, seqp); 610 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 611 hc->tx_pipe--;
681 } 612 }
682 if (seqp == hc->tx_seqt) 613 if (seqp == hc->tx_seqt)
683 break; 614 break;
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 626 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 627 }
697 628
698 ccid2_hc_tx_check_sanity(hc); 629 /* restart RTO timer if not all outstanding data has been acked */
630 if (hc->tx_pipe == 0)
631 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
699} 634}
700 635
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 642 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 643 hc->tx_ssthresh = ~0U;
709 644
710 /* 645 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 646 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 647
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 648 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 649 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 654 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 655 return -ENOMEM;
725 656
726 hc->tx_rto = 3 * HZ; 657 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 658 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 659 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 660 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 661 (unsigned long)sk);
733
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 662 return 0;
736} 663}
737 664
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 667 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 668 int i;
742 669
743 ccid2_hc_tx_kill_rto_timer(sk); 670 sk_stop_timer(sk, &hc->tx_rtotimer);
744 671
745 for (i = 0; i < hc->tx_seqbufc; i++) 672 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 673 kfree(hc->tx_seqbuf[i]);