aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ccids
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp/ccids')
-rw-r--r--net/dccp/ccids/ccid2.c169
-rw-r--r--net/dccp/ccids/ccid2.h20
2 files changed, 108 insertions, 81 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f7f5069b1e84..7af3106c1f94 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -113,19 +113,12 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
113 dp->dccps_l_ack_ratio = val; 113 dp->dccps_l_ack_ratio = val;
114} 114}
115 115
116static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
117{
118 ccid2_pr_debug("change SRTT to %ld\n", val);
119 hc->tx_srtt = val;
120}
121
122static void ccid2_start_rto_timer(struct sock *sk); 116static void ccid2_start_rto_timer(struct sock *sk);
123 117
124static void ccid2_hc_tx_rto_expire(unsigned long data) 118static void ccid2_hc_tx_rto_expire(unsigned long data)
125{ 119{
126 struct sock *sk = (struct sock *)data; 120 struct sock *sk = (struct sock *)data;
127 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 121 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
128 long s;
129 122
130 bh_lock_sock(sk); 123 bh_lock_sock(sk);
131 if (sock_owned_by_user(sk)) { 124 if (sock_owned_by_user(sk)) {
@@ -137,10 +130,8 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
137 130
138 /* back-off timer */ 131 /* back-off timer */
139 hc->tx_rto <<= 1; 132 hc->tx_rto <<= 1;
140 133 if (hc->tx_rto > DCCP_RTO_MAX)
141 s = hc->tx_rto / HZ; 134 hc->tx_rto = DCCP_RTO_MAX;
142 if (s > 60)
143 hc->tx_rto = 60 * HZ;
144 135
145 ccid2_start_rto_timer(sk); 136 ccid2_start_rto_timer(sk);
146 137
@@ -168,7 +159,7 @@ static void ccid2_start_rto_timer(struct sock *sk)
168{ 159{
169 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 160 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
170 161
171 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto); 162 ccid2_pr_debug("setting RTO timeout=%u\n", hc->tx_rto);
172 163
173 BUG_ON(timer_pending(&hc->tx_rtotimer)); 164 BUG_ON(timer_pending(&hc->tx_rtotimer));
174 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); 165 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
@@ -339,9 +330,86 @@ static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
339 ccid2_pr_debug("deleted RTO timer\n"); 330 ccid2_pr_debug("deleted RTO timer\n");
340} 331}
341 332
342static inline void ccid2_new_ack(struct sock *sk, 333/**
343 struct ccid2_seq *seqp, 334 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
344 unsigned int *maxincr) 335 * This code is almost identical with TCP's tcp_rtt_estimator(), since
336 * - it has a higher sampling frequency (recommended by RFC 1323),
337 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
338 * - it is simple (cf. more complex proposals such as Eifel timer or research
339 * which suggests that the gain should be set according to window size),
340 * - in tests it was found to work well with CCID2 [gerrit].
341 */
342static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
343{
344 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
345 long m = mrtt ? : 1;
346
347 if (hc->tx_srtt == 0) {
348 /* First measurement m */
349 hc->tx_srtt = m << 3;
350 hc->tx_mdev = m << 1;
351
352 hc->tx_mdev_max = max(TCP_RTO_MIN, hc->tx_mdev);
353 hc->tx_rttvar = hc->tx_mdev_max;
354 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
355 } else {
356 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
357 m -= (hc->tx_srtt >> 3);
358 hc->tx_srtt += m;
359
360 /* Similarly, update scaled mdev with regard to |m| */
361 if (m < 0) {
362 m = -m;
363 m -= (hc->tx_mdev >> 2);
364 /*
365 * This neutralises RTO increase when RTT < SRTT - mdev
366 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
367 * in Linux TCP", USENIX 2002, pp. 49-62).
368 */
369 if (m > 0)
370 m >>= 3;
371 } else {
372 m -= (hc->tx_mdev >> 2);
373 }
374 hc->tx_mdev += m;
375
376 if (hc->tx_mdev > hc->tx_mdev_max) {
377 hc->tx_mdev_max = hc->tx_mdev;
378 if (hc->tx_mdev_max > hc->tx_rttvar)
379 hc->tx_rttvar = hc->tx_mdev_max;
380 }
381
382 /*
383 * Decay RTTVAR at most once per flight, exploiting that
384 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
385 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
386 * GAR is a useful bound for FlightSize = pipe.
387 * AWL is probably too low here, as it over-estimates pipe.
388 */
389 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
390 if (hc->tx_mdev_max < hc->tx_rttvar)
391 hc->tx_rttvar -= (hc->tx_rttvar -
392 hc->tx_mdev_max) >> 2;
393 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
394 hc->tx_mdev_max = TCP_RTO_MIN;
395 }
396 }
397
398 /*
399 * Set RTO from SRTT and RTTVAR
400 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
401 * This agrees with RFC 4341, 5:
402 * "Because DCCP does not retransmit data, DCCP does not require
403 * TCP's recommended minimum timeout of one second".
404 */
405 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
406
407 if (hc->tx_rto > DCCP_RTO_MAX)
408 hc->tx_rto = DCCP_RTO_MAX;
409}
410
411static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
412 unsigned int *maxincr)
345{ 413{
346 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 414 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
347 415
@@ -355,64 +423,15 @@ static inline void ccid2_new_ack(struct sock *sk,
355 hc->tx_cwnd += 1; 423 hc->tx_cwnd += 1;
356 hc->tx_packets_acked = 0; 424 hc->tx_packets_acked = 0;
357 } 425 }
358 426 /*
359 /* update RTO */ 427 * FIXME: RTT is sampled several times per acknowledgment (for each
360 if (hc->tx_srtt == -1 || 428 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
361 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 429 * This causes the RTT to be over-estimated, since the older entries
362 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 430 * in the Ack Vector have earlier sending times.
363 int s; 431 * The cleanest solution is to not use the ccid2s_sent field at all
364 432 * and instead use DCCP timestamps: requires changes in other places.
365 /* first measurement */ 433 */
366 if (hc->tx_srtt == -1) { 434 ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
367 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
368 r, jiffies,
369 (unsigned long long)seqp->ccid2s_seq);
370 ccid2_change_srtt(hc, r);
371 hc->tx_rttvar = r >> 1;
372 } else {
373 /* RTTVAR */
374 long tmp = hc->tx_srtt - r;
375 long srtt;
376
377 if (tmp < 0)
378 tmp *= -1;
379
380 tmp >>= 2;
381 hc->tx_rttvar *= 3;
382 hc->tx_rttvar >>= 2;
383 hc->tx_rttvar += tmp;
384
385 /* SRTT */
386 srtt = hc->tx_srtt;
387 srtt *= 7;
388 srtt >>= 3;
389 tmp = r >> 3;
390 srtt += tmp;
391 ccid2_change_srtt(hc, srtt);
392 }
393 s = hc->tx_rttvar << 2;
394 /* clock granularity is 1 when based on jiffies */
395 if (!s)
396 s = 1;
397 hc->tx_rto = hc->tx_srtt + s;
398
399 /* must be at least a second */
400 s = hc->tx_rto / HZ;
401 /* DCCP doesn't require this [but I like it cuz my code sux] */
402#if 1
403 if (s < 1)
404 hc->tx_rto = HZ;
405#endif
406 /* max 60 seconds */
407 if (s > 60)
408 hc->tx_rto = HZ * 60;
409
410 hc->tx_lastrtt = jiffies;
411
412 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
413 hc->tx_srtt, hc->tx_rttvar,
414 hc->tx_rto, HZ, r);
415 }
416} 435}
417 436
418static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 437static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
@@ -662,9 +681,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
662 if (ccid2_hc_tx_alloc_seq(hc)) 681 if (ccid2_hc_tx_alloc_seq(hc))
663 return -ENOMEM; 682 return -ENOMEM;
664 683
665 hc->tx_rto = 3 * HZ; 684 hc->tx_rto = DCCP_TIMEOUT_INIT;
666 ccid2_change_srtt(hc, -1);
667 hc->tx_rttvar = -1;
668 hc->tx_rpdupack = -1; 685 hc->tx_rpdupack = -1;
669 hc->tx_last_cong = jiffies; 686 hc->tx_last_cong = jiffies;
670 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 687 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103bb..b017843ba44d 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,7 +42,12 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 42 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 45 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
46 * @tx_mdev: smoothed RTT variation, scaled by 2^2
47 * @tx_mdev_max: maximum of @mdev during one flight
48 * @tx_rttvar: moving average/maximum of @mdev_max
49 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
50 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 51 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 52 * @tx_rpdupack: dupacks since rpseq
48 */ 53 */
@@ -55,11 +60,16 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 60 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 61 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 62 struct ccid2_seq *tx_seqt;
58 long tx_rto; 63
59 long tx_srtt; 64 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 65 u32 tx_srtt,
61 unsigned long tx_lastrtt; 66 tx_mdev,
67 tx_mdev_max,
68 tx_rttvar,
69 tx_rto;
70 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 71 struct timer_list tx_rtotimer;
72
63 u64 tx_rpseq; 73 u64 tx_rpseq;
64 int tx_rpdupack; 74 int tx_rpdupack;
65 unsigned long tx_last_cong; 75 unsigned long tx_last_cong;