diff options
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/ccids/ccid2.c | 171 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 18 |
2 files changed, 107 insertions, 82 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index fa074d442065..22753fd98698 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -110,12 +110,6 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
110 | dp->dccps_l_ack_ratio = val; | 110 | dp->dccps_l_ack_ratio = val; |
111 | } | 111 | } |
112 | 112 | ||
113 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) | ||
114 | { | ||
115 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
116 | hctx->srtt = val; | ||
117 | } | ||
118 | |||
119 | static void ccid2_start_rto_timer(struct sock *sk); | 113 | static void ccid2_start_rto_timer(struct sock *sk); |
120 | 114 | ||
121 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 115 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
@@ -123,7 +117,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
123 | struct sock *sk = (struct sock *)data; | 117 | struct sock *sk = (struct sock *)data; |
124 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 118 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
125 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); | 119 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); |
126 | long s; | ||
127 | 120 | ||
128 | bh_lock_sock(sk); | 121 | bh_lock_sock(sk); |
129 | if (sock_owned_by_user(sk)) { | 122 | if (sock_owned_by_user(sk)) { |
@@ -135,10 +128,8 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
135 | 128 | ||
136 | /* back-off timer */ | 129 | /* back-off timer */ |
137 | hctx->rto <<= 1; | 130 | hctx->rto <<= 1; |
138 | 131 | if (hctx->rto > DCCP_RTO_MAX) | |
139 | s = hctx->rto / HZ; | 132 | hctx->rto = DCCP_RTO_MAX; |
140 | if (s > 60) | ||
141 | hctx->rto = 60 * HZ; | ||
142 | 133 | ||
143 | /* adjust pipe, cwnd etc */ | 134 | /* adjust pipe, cwnd etc */ |
144 | hctx->ssthresh = hctx->cwnd / 2; | 135 | hctx->ssthresh = hctx->cwnd / 2; |
@@ -279,9 +270,87 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) | |||
279 | ccid2_pr_debug("deleted RTO timer\n"); | 270 | ccid2_pr_debug("deleted RTO timer\n"); |
280 | } | 271 | } |
281 | 272 | ||
282 | static inline void ccid2_new_ack(struct sock *sk, | 273 | /** |
283 | struct ccid2_seq *seqp, | 274 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm |
284 | unsigned int *maxincr) | 275 | * This code is almost identical with TCP's tcp_rtt_estimator(), since |
276 | * - it has a higher sampling frequency (recommended by RFC 1323), | ||
277 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | ||
278 | * - it is simple (cf. more complex proposals such as Eifel timer or research | ||
279 | * which suggests that the gain should be set according to window size), | ||
280 | * - in tests it was found to work well with CCID2 [gerrit]. | ||
281 | */ | ||
282 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) | ||
283 | { | ||
284 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | ||
285 | long m = mrtt ? : 1; | ||
286 | |||
287 | if (hctx->srtt == 0) { | ||
288 | /* First measurement m */ | ||
289 | hctx->srtt = m << 3; | ||
290 | hctx->mdev = m << 1; | ||
291 | |||
292 | hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev); | ||
293 | hctx->rttvar = hctx->mdev_max; | ||
294 | hctx->rtt_seq = dccp_sk(sk)->dccps_gss; | ||
295 | } else { | ||
296 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | ||
297 | m -= (hctx->srtt >> 3); | ||
298 | hctx->srtt += m; | ||
299 | |||
300 | /* Similarly, update scaled mdev with regard to |m| */ | ||
301 | if (m < 0) { | ||
302 | m = -m; | ||
303 | m -= (hctx->mdev >> 2); | ||
304 | /* | ||
305 | * This neutralises RTO increase when RTT < SRTT - mdev | ||
306 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control | ||
307 | * in Linux TCP", USENIX 2002, pp. 49-62). | ||
308 | */ | ||
309 | if (m > 0) | ||
310 | m >>= 3; | ||
311 | } else { | ||
312 | m -= (hctx->mdev >> 2); | ||
313 | } | ||
314 | hctx->mdev += m; | ||
315 | |||
316 | if (hctx->mdev > hctx->mdev_max) { | ||
317 | hctx->mdev_max = hctx->mdev; | ||
318 | if (hctx->mdev_max > hctx->rttvar) | ||
319 | hctx->rttvar = hctx->mdev_max; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Decay RTTVAR at most once per flight, exploiting that | ||
324 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) | ||
325 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) | ||
326 | * GAR is a useful bound for FlightSize = pipe, AWL is probably | ||
327 | * too low as it over-estimates pipe. | ||
328 | */ | ||
329 | if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) { | ||
330 | if (hctx->mdev_max < hctx->rttvar) | ||
331 | hctx->rttvar -= (hctx->rttvar - | ||
332 | hctx->mdev_max) >> 2; | ||
333 | hctx->rtt_seq = dccp_sk(sk)->dccps_gss; | ||
334 | hctx->mdev_max = TCP_RTO_MIN; | ||
335 | } | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Set RTO from SRTT and RTTVAR | ||
340 | * Clock granularity is ignored since the minimum error for RTTVAR is | ||
341 | * clamped to 50msec (corresponding to HZ=20). This leads to a minimum | ||
342 | * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP | ||
343 | * does not retransmit data, DCCP does not require TCP's recommended | ||
344 | * minimum timeout of one second". | ||
345 | */ | ||
346 | hctx->rto = (hctx->srtt >> 3) + hctx->rttvar; | ||
347 | |||
348 | if (hctx->rto > DCCP_RTO_MAX) | ||
349 | hctx->rto = DCCP_RTO_MAX; | ||
350 | } | ||
351 | |||
352 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, | ||
353 | unsigned int *maxincr) | ||
285 | { | 354 | { |
286 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 355 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
287 | 356 | ||
@@ -295,64 +364,15 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
295 | hctx->cwnd += 1; | 364 | hctx->cwnd += 1; |
296 | hctx->packets_acked = 0; | 365 | hctx->packets_acked = 0; |
297 | } | 366 | } |
298 | 367 | /* | |
299 | /* update RTO */ | 368 | * FIXME: RTT is sampled several times per acknowledgment (for each |
300 | if (hctx->srtt == -1 || | 369 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). |
301 | time_after(jiffies, hctx->lastrtt + hctx->srtt)) { | 370 | * This causes the RTT to be over-estimated, since the older entries |
302 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; | 371 | * in the Ack Vector have earlier sending times. |
303 | int s; | 372 | * The cleanest solution is to not use the ccid2s_sent field at all |
304 | 373 | * and instead use DCCP timestamps - need to be resolved at some time. | |
305 | /* first measurement */ | 374 | */ |
306 | if (hctx->srtt == -1) { | 375 | ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent); |
307 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | ||
308 | r, jiffies, | ||
309 | (unsigned long long)seqp->ccid2s_seq); | ||
310 | ccid2_change_srtt(hctx, r); | ||
311 | hctx->rttvar = r >> 1; | ||
312 | } else { | ||
313 | /* RTTVAR */ | ||
314 | long tmp = hctx->srtt - r; | ||
315 | long srtt; | ||
316 | |||
317 | if (tmp < 0) | ||
318 | tmp *= -1; | ||
319 | |||
320 | tmp >>= 2; | ||
321 | hctx->rttvar *= 3; | ||
322 | hctx->rttvar >>= 2; | ||
323 | hctx->rttvar += tmp; | ||
324 | |||
325 | /* SRTT */ | ||
326 | srtt = hctx->srtt; | ||
327 | srtt *= 7; | ||
328 | srtt >>= 3; | ||
329 | tmp = r >> 3; | ||
330 | srtt += tmp; | ||
331 | ccid2_change_srtt(hctx, srtt); | ||
332 | } | ||
333 | s = hctx->rttvar << 2; | ||
334 | /* clock granularity is 1 when based on jiffies */ | ||
335 | if (!s) | ||
336 | s = 1; | ||
337 | hctx->rto = hctx->srtt + s; | ||
338 | |||
339 | /* must be at least a second */ | ||
340 | s = hctx->rto / HZ; | ||
341 | /* DCCP doesn't require this [but I like it cuz my code sux] */ | ||
342 | #if 1 | ||
343 | if (s < 1) | ||
344 | hctx->rto = HZ; | ||
345 | #endif | ||
346 | /* max 60 seconds */ | ||
347 | if (s > 60) | ||
348 | hctx->rto = HZ * 60; | ||
349 | |||
350 | hctx->lastrtt = jiffies; | ||
351 | |||
352 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", | ||
353 | hctx->srtt, hctx->rttvar, | ||
354 | hctx->rto, HZ, r); | ||
355 | } | ||
356 | } | 376 | } |
357 | 377 | ||
358 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | 378 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
@@ -579,8 +599,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
579 | if (hctx->pipe == 0) | 599 | if (hctx->pipe == 0) |
580 | sk_stop_timer(sk, &hctx->rtotimer); | 600 | sk_stop_timer(sk, &hctx->rtotimer); |
581 | else | 601 | else |
582 | sk_reset_timer(sk, &hctx->rtotimer, | 602 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); |
583 | jiffies + hctx->rto); | ||
584 | done: | 603 | done: |
585 | /* check if incoming Acks allow pending packets to be sent */ | 604 | /* check if incoming Acks allow pending packets to be sent */ |
586 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) | 605 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) |
@@ -613,9 +632,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
613 | if (ccid2_hc_tx_alloc_seq(hctx)) | 632 | if (ccid2_hc_tx_alloc_seq(hctx)) |
614 | return -ENOMEM; | 633 | return -ENOMEM; |
615 | 634 | ||
616 | hctx->rto = 3 * HZ; | 635 | hctx->rto = DCCP_TIMEOUT_INIT; |
617 | ccid2_change_srtt(hctx, -1); | ||
618 | hctx->rttvar = -1; | ||
619 | hctx->rpdupack = -1; | 636 | hctx->rpdupack = -1; |
620 | hctx->last_cong = jiffies; | 637 | hctx->last_cong = jiffies; |
621 | setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); | 638 | setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index d5900dd5d4f4..8b7a2dee2f6d 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -44,7 +44,12 @@ struct ccid2_seq { | |||
44 | * | 44 | * |
45 | * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 | 45 | * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 |
46 | * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465) | 46 | * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465) |
47 | * @lastrtt: time RTT was last measured | 47 | * @srtt: smoothed RTT estimate, scaled by 2^3 |
48 | * @mdev: smoothed RTT variation, scaled by 2^2 | ||
49 | * @mdev_max: maximum of @mdev during one flight | ||
50 | * @rttvar: moving average/maximum of @mdev_max | ||
51 | * @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) | ||
52 | * @rtt_seq: to decay RTTVAR at most once per flight | ||
48 | * @rpseq: last consecutive seqno | 53 | * @rpseq: last consecutive seqno |
49 | * @rpdupack: dupacks since rpseq | 54 | * @rpdupack: dupacks since rpseq |
50 | * @av_chunks: list of Ack Vectors received on current skb | 55 | * @av_chunks: list of Ack Vectors received on current skb |
@@ -58,10 +63,13 @@ struct ccid2_hc_tx_sock { | |||
58 | int seqbufc; | 63 | int seqbufc; |
59 | struct ccid2_seq *seqh; | 64 | struct ccid2_seq *seqh; |
60 | struct ccid2_seq *seqt; | 65 | struct ccid2_seq *seqt; |
61 | long rto; | 66 | /* RTT measurement: variables/principles are the same as in TCP */ |
62 | long srtt; | 67 | u32 srtt, |
63 | long rttvar; | 68 | mdev, |
64 | unsigned long lastrtt; | 69 | mdev_max, |
70 | rttvar, | ||
71 | rto; | ||
72 | u64 rtt_seq:48; | ||
65 | struct timer_list rtotimer; | 73 | struct timer_list rtotimer; |
66 | u64 rpseq; | 74 | u64 rpseq; |
67 | int rpdupack; | 75 | int rpdupack; |