diff options
Diffstat (limited to 'net/dccp/ccids/ccid2.c')
-rw-r--r-- | net/dccp/ccids/ccid2.c | 169 |
1 files changed, 93 insertions, 76 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f7f5069b1e84..7af3106c1f94 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -113,19 +113,12 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
113 | dp->dccps_l_ack_ratio = val; | 113 | dp->dccps_l_ack_ratio = val; |
114 | } | 114 | } |
115 | 115 | ||
116 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val) | ||
117 | { | ||
118 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
119 | hc->tx_srtt = val; | ||
120 | } | ||
121 | |||
122 | static void ccid2_start_rto_timer(struct sock *sk); | 116 | static void ccid2_start_rto_timer(struct sock *sk); |
123 | 117 | ||
124 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 118 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
125 | { | 119 | { |
126 | struct sock *sk = (struct sock *)data; | 120 | struct sock *sk = (struct sock *)data; |
127 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 121 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
128 | long s; | ||
129 | 122 | ||
130 | bh_lock_sock(sk); | 123 | bh_lock_sock(sk); |
131 | if (sock_owned_by_user(sk)) { | 124 | if (sock_owned_by_user(sk)) { |
@@ -137,10 +130,8 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
137 | 130 | ||
138 | /* back-off timer */ | 131 | /* back-off timer */ |
139 | hc->tx_rto <<= 1; | 132 | hc->tx_rto <<= 1; |
140 | 133 | if (hc->tx_rto > DCCP_RTO_MAX) | |
141 | s = hc->tx_rto / HZ; | 134 | hc->tx_rto = DCCP_RTO_MAX; |
142 | if (s > 60) | ||
143 | hc->tx_rto = 60 * HZ; | ||
144 | 135 | ||
145 | ccid2_start_rto_timer(sk); | 136 | ccid2_start_rto_timer(sk); |
146 | 137 | ||
@@ -168,7 +159,7 @@ static void ccid2_start_rto_timer(struct sock *sk) | |||
168 | { | 159 | { |
169 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 160 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
170 | 161 | ||
171 | ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto); | 162 | ccid2_pr_debug("setting RTO timeout=%u\n", hc->tx_rto); |
172 | 163 | ||
173 | BUG_ON(timer_pending(&hc->tx_rtotimer)); | 164 | BUG_ON(timer_pending(&hc->tx_rtotimer)); |
174 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | 165 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
@@ -339,9 +330,86 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) | |||
339 | ccid2_pr_debug("deleted RTO timer\n"); | 330 | ccid2_pr_debug("deleted RTO timer\n"); |
340 | } | 331 | } |
341 | 332 | ||
342 | static inline void ccid2_new_ack(struct sock *sk, | 333 | /** |
343 | struct ccid2_seq *seqp, | 334 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm |
344 | unsigned int *maxincr) | 335 | * This code is almost identical with TCP's tcp_rtt_estimator(), since |
336 | * - it has a higher sampling frequency (recommended by RFC 1323), | ||
337 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | ||
338 | * - it is simple (cf. more complex proposals such as Eifel timer or research | ||
339 | * which suggests that the gain should be set according to window size), | ||
340 | * - in tests it was found to work well with CCID2 [gerrit]. | ||
341 | */ | ||
342 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) | ||
343 | { | ||
344 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
345 | long m = mrtt ? : 1; | ||
346 | |||
347 | if (hc->tx_srtt == 0) { | ||
348 | /* First measurement m */ | ||
349 | hc->tx_srtt = m << 3; | ||
350 | hc->tx_mdev = m << 1; | ||
351 | |||
352 | hc->tx_mdev_max = max(TCP_RTO_MIN, hc->tx_mdev); | ||
353 | hc->tx_rttvar = hc->tx_mdev_max; | ||
354 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; | ||
355 | } else { | ||
356 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | ||
357 | m -= (hc->tx_srtt >> 3); | ||
358 | hc->tx_srtt += m; | ||
359 | |||
360 | /* Similarly, update scaled mdev with regard to |m| */ | ||
361 | if (m < 0) { | ||
362 | m = -m; | ||
363 | m -= (hc->tx_mdev >> 2); | ||
364 | /* | ||
365 | * This neutralises RTO increase when RTT < SRTT - mdev | ||
366 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control | ||
367 | * in Linux TCP", USENIX 2002, pp. 49-62). | ||
368 | */ | ||
369 | if (m > 0) | ||
370 | m >>= 3; | ||
371 | } else { | ||
372 | m -= (hc->tx_mdev >> 2); | ||
373 | } | ||
374 | hc->tx_mdev += m; | ||
375 | |||
376 | if (hc->tx_mdev > hc->tx_mdev_max) { | ||
377 | hc->tx_mdev_max = hc->tx_mdev; | ||
378 | if (hc->tx_mdev_max > hc->tx_rttvar) | ||
379 | hc->tx_rttvar = hc->tx_mdev_max; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Decay RTTVAR at most once per flight, exploiting that | ||
384 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) | ||
385 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) | ||
386 | * GAR is a useful bound for FlightSize = pipe. | ||
387 | * AWL is probably too low here, as it over-estimates pipe. | ||
388 | */ | ||
389 | if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) { | ||
390 | if (hc->tx_mdev_max < hc->tx_rttvar) | ||
391 | hc->tx_rttvar -= (hc->tx_rttvar - | ||
392 | hc->tx_mdev_max) >> 2; | ||
393 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; | ||
394 | hc->tx_mdev_max = TCP_RTO_MIN; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Set RTO from SRTT and RTTVAR | ||
400 | * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms. | ||
401 | * This agrees with RFC 4341, 5: | ||
402 | * "Because DCCP does not retransmit data, DCCP does not require | ||
403 | * TCP's recommended minimum timeout of one second". | ||
404 | */ | ||
405 | hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar; | ||
406 | |||
407 | if (hc->tx_rto > DCCP_RTO_MAX) | ||
408 | hc->tx_rto = DCCP_RTO_MAX; | ||
409 | } | ||
410 | |||
411 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, | ||
412 | unsigned int *maxincr) | ||
345 | { | 413 | { |
346 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 414 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
347 | 415 | ||
@@ -355,64 +423,15 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
355 | hc->tx_cwnd += 1; | 423 | hc->tx_cwnd += 1; |
356 | hc->tx_packets_acked = 0; | 424 | hc->tx_packets_acked = 0; |
357 | } | 425 | } |
358 | 426 | /* | |
359 | /* update RTO */ | 427 | * FIXME: RTT is sampled several times per acknowledgment (for each |
360 | if (hc->tx_srtt == -1 || | 428 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). |
361 | time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { | 429 | * This causes the RTT to be over-estimated, since the older entries |
362 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; | 430 | * in the Ack Vector have earlier sending times. |
363 | int s; | 431 | * The cleanest solution is to not use the ccid2s_sent field at all |
364 | 432 | * and instead use DCCP timestamps: requires changes in other places. | |
365 | /* first measurement */ | 433 | */ |
366 | if (hc->tx_srtt == -1) { | 434 | ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent); |
367 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | ||
368 | r, jiffies, | ||
369 | (unsigned long long)seqp->ccid2s_seq); | ||
370 | ccid2_change_srtt(hc, r); | ||
371 | hc->tx_rttvar = r >> 1; | ||
372 | } else { | ||
373 | /* RTTVAR */ | ||
374 | long tmp = hc->tx_srtt - r; | ||
375 | long srtt; | ||
376 | |||
377 | if (tmp < 0) | ||
378 | tmp *= -1; | ||
379 | |||
380 | tmp >>= 2; | ||
381 | hc->tx_rttvar *= 3; | ||
382 | hc->tx_rttvar >>= 2; | ||
383 | hc->tx_rttvar += tmp; | ||
384 | |||
385 | /* SRTT */ | ||
386 | srtt = hc->tx_srtt; | ||
387 | srtt *= 7; | ||
388 | srtt >>= 3; | ||
389 | tmp = r >> 3; | ||
390 | srtt += tmp; | ||
391 | ccid2_change_srtt(hc, srtt); | ||
392 | } | ||
393 | s = hc->tx_rttvar << 2; | ||
394 | /* clock granularity is 1 when based on jiffies */ | ||
395 | if (!s) | ||
396 | s = 1; | ||
397 | hc->tx_rto = hc->tx_srtt + s; | ||
398 | |||
399 | /* must be at least a second */ | ||
400 | s = hc->tx_rto / HZ; | ||
401 | /* DCCP doesn't require this [but I like it cuz my code sux] */ | ||
402 | #if 1 | ||
403 | if (s < 1) | ||
404 | hc->tx_rto = HZ; | ||
405 | #endif | ||
406 | /* max 60 seconds */ | ||
407 | if (s > 60) | ||
408 | hc->tx_rto = HZ * 60; | ||
409 | |||
410 | hc->tx_lastrtt = jiffies; | ||
411 | |||
412 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", | ||
413 | hc->tx_srtt, hc->tx_rttvar, | ||
414 | hc->tx_rto, HZ, r); | ||
415 | } | ||
416 | } | 435 | } |
417 | 436 | ||
418 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | 437 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
@@ -662,9 +681,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
662 | if (ccid2_hc_tx_alloc_seq(hc)) | 681 | if (ccid2_hc_tx_alloc_seq(hc)) |
663 | return -ENOMEM; | 682 | return -ENOMEM; |
664 | 683 | ||
665 | hc->tx_rto = 3 * HZ; | 684 | hc->tx_rto = DCCP_TIMEOUT_INIT; |
666 | ccid2_change_srtt(hc, -1); | ||
667 | hc->tx_rttvar = -1; | ||
668 | hc->tx_rpdupack = -1; | 685 | hc->tx_rpdupack = -1; |
669 | hc->tx_last_cong = jiffies; | 686 | hc->tx_last_cong = jiffies; |
670 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 687 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |