aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ccids
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp/ccids')
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c444
-rw-r--r--net/dccp/ccids/ccid2.h42
-rw-r--r--net/dccp/ccids/ccid3.c268
-rw-r--r--net/dccp/ccids/ccid3.h51
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/ccids/lib/packet_history.c39
-rw-r--r--net/dccp/ccids/lib/packet_history.h22
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c14
10 files changed, 350 insertions, 564 deletions
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44e..0581143cb800 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
47 47
48 If in doubt, say N. 48 If in doubt, say N.
49 49
50config IP_DCCP_CCID3_RTO
51 int "Use higher bound for nofeedback timer"
52 default 100
53 depends on IP_DCCP_CCID3 && EXPERIMENTAL
54 ---help---
55 Use higher lower bound for nofeedback timer expiration.
56
57 The TFRC nofeedback timer normally expires after the maximum of 4
58 RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
59 with a small RTT this can mean a high processing load and reduced
60 performance, since then the nofeedback timer is triggered very
61 frequently.
62
63 This option enables to set a higher lower bound for the nofeedback
64 value. Values in units of milliseconds can be set here.
65
66 A value of 0 disables this feature by enforcing the value specified
67 in RFC 3448. The following values have been suggested as bounds for
68 experimental use:
69 * 16-20ms to match the typical multimedia inter-frame interval
70 * 100ms as a reasonable compromise [default]
71 * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
72
73 The default of 100ms is a compromise between a large value for
74 efficient DCCP implementations, and a small value to avoid disrupting
75 the network in times of congestion.
76
77 The purpose of the nofeedback timer is to slow DCCP down when there
78 is serious network congestion: experimenting with larger values should
79 therefore not be performed on WANs.
80
81config IP_DCCP_TFRC_LIB 50config IP_DCCP_TFRC_LIB
82 def_bool y if IP_DCCP_CCID3 51 def_bool y if IP_DCCP_CCID3
83 52
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..fadecd20d75b 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -123,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
123 78
124static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
125{ 80{
126 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 81 if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
127 82 return CCID_PACKET_WILL_DEQUEUE_LATER;
128 if (hc->tx_pipe < hc->tx_cwnd) 83 return CCID_PACKET_SEND_AT_ONCE;
129 return 0;
130
131 return 1; /* XXX CCID should dequeue when ready instead of polling */
132} 84}
133 85
134static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) 86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -156,19 +108,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 108 dp->dccps_l_ack_ratio = val;
157} 109}
158 110
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 111static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 112{
169 struct sock *sk = (struct sock *)data; 113 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 114 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s; 115 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
172 116
173 bh_lock_sock(sk); 117 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 118 if (sock_owned_by_user(sk)) {
@@ -178,23 +122,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 122
179 ccid2_pr_debug("RTO_EXPIRE\n"); 123 ccid2_pr_debug("RTO_EXPIRE\n");
180 124
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 125 /* back-off timer */
184 hc->tx_rto <<= 1; 126 hc->tx_rto <<= 1;
185 127 if (hc->tx_rto > DCCP_RTO_MAX)
186 s = hc->tx_rto / HZ; 128 hc->tx_rto = DCCP_RTO_MAX;
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 129
192 /* adjust pipe, cwnd etc */ 130 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 131 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 132 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 133 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 134 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 135 hc->tx_pipe = 0;
198 136
199 /* clear state about stuff we sent */ 137 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 138 hc->tx_seqt = hc->tx_seqh;
@@ -204,23 +142,18 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 142 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 143 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 144 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc); 145
146 /* if we were blocked before, we may now send cwnd=1 packet */
147 if (sender_was_blocked)
148 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
149 /* restart backed-off timer */
150 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
208out: 151out:
209 bh_unlock_sock(sk); 152 bh_unlock_sock(sk);
210 sock_put(sk); 153 sock_put(sk);
211} 154}
212 155
213static void ccid2_start_rto_timer(struct sock *sk) 156static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 157{
225 struct dccp_sock *dp = dccp_sk(sk); 158 struct dccp_sock *dp = dccp_sk(sk);
226 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 159 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -230,7 +163,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 163
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 164 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 165 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 166 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 167
235 next = hc->tx_seqh->ccid2s_next; 168 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 169 /* check if we need to alloc more space */
@@ -296,99 +229,104 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 229 }
297#endif 230#endif
298 231
299 /* setup RTO timer */ 232 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 233
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 234#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 235 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 236 struct ccid2_seq *seqp = hc->tx_seqt;
306 237
307 while (seqp != hc->tx_seqh) { 238 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 239 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 240 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 241 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 242 seqp = seqp->ccid2s_next;
312 } 243 }
313 } while (0); 244 } while (0);
314 ccid2_pr_debug("=========\n"); 245 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 246#endif
317} 247}
318 248
319/* XXX Lame code duplication! 249/**
320 * returns -1 if none was found. 250 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
321 * else returns the next offset to use in the function call. 251 * This code is almost identical with TCP's tcp_rtt_estimator(), since
252 * - it has a higher sampling frequency (recommended by RFC 1323),
253 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
254 * - it is simple (cf. more complex proposals such as Eifel timer or research
255 * which suggests that the gain should be set according to window size),
256 * - in tests it was found to work well with CCID2 [gerrit].
322 */ 257 */
323static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, 258static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
324 unsigned char **vec, unsigned char *veclen)
325{ 259{
326 const struct dccp_hdr *dh = dccp_hdr(skb); 260 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
327 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); 261 long m = mrtt ? : 1;
328 unsigned char *opt_ptr; 262
329 const unsigned char *opt_end = (unsigned char *)dh + 263 if (hc->tx_srtt == 0) {
330 (dh->dccph_doff * 4); 264 /* First measurement m */
331 unsigned char opt, len; 265 hc->tx_srtt = m << 3;
332 unsigned char *value; 266 hc->tx_mdev = m << 1;
333 267
334 BUG_ON(offset < 0); 268 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
335 options += offset; 269 hc->tx_rttvar = hc->tx_mdev_max;
336 opt_ptr = options; 270
337 if (opt_ptr >= opt_end) 271 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
338 return -1; 272 } else {
339 273 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
340 while (opt_ptr != opt_end) { 274 m -= (hc->tx_srtt >> 3);
341 opt = *opt_ptr++; 275 hc->tx_srtt += m;
342 len = 0; 276
343 value = NULL; 277 /* Similarly, update scaled mdev with regard to |m| */
344 278 if (m < 0) {
345 /* Check if this isn't a single byte option */ 279 m = -m;
346 if (opt > DCCPO_MAX_RESERVED) { 280 m -= (hc->tx_mdev >> 2);
347 if (opt_ptr == opt_end)
348 goto out_invalid_option;
349
350 len = *opt_ptr++;
351 if (len < 3)
352 goto out_invalid_option;
353 /* 281 /*
354 * Remove the type and len fields, leaving 282 * This neutralises RTO increase when RTT < SRTT - mdev
355 * just the value size 283 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
284 * in Linux TCP", USENIX 2002, pp. 49-62).
356 */ 285 */
357 len -= 2; 286 if (m > 0)
358 value = opt_ptr; 287 m >>= 3;
359 opt_ptr += len; 288 } else {
289 m -= (hc->tx_mdev >> 2);
290 }
291 hc->tx_mdev += m;
360 292
361 if (opt_ptr > opt_end) 293 if (hc->tx_mdev > hc->tx_mdev_max) {
362 goto out_invalid_option; 294 hc->tx_mdev_max = hc->tx_mdev;
295 if (hc->tx_mdev_max > hc->tx_rttvar)
296 hc->tx_rttvar = hc->tx_mdev_max;
363 } 297 }
364 298
365 switch (opt) { 299 /*
366 case DCCPO_ACK_VECTOR_0: 300 * Decay RTTVAR at most once per flight, exploiting that
367 case DCCPO_ACK_VECTOR_1: 301 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
368 *vec = value; 302 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
369 *veclen = len; 303 * GAR is a useful bound for FlightSize = pipe.
370 return offset + (opt_ptr - options); 304 * AWL is probably too low here, as it over-estimates pipe.
305 */
306 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
307 if (hc->tx_mdev_max < hc->tx_rttvar)
308 hc->tx_rttvar -= (hc->tx_rttvar -
309 hc->tx_mdev_max) >> 2;
310 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
311 hc->tx_mdev_max = tcp_rto_min(sk);
371 } 312 }
372 } 313 }
373 314
374 return -1; 315 /*
375 316 * Set RTO from SRTT and RTTVAR
376out_invalid_option: 317 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
377 DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); 318 * This agrees with RFC 4341, 5:
378 return -1; 319 * "Because DCCP does not retransmit data, DCCP does not require
379} 320 * TCP's recommended minimum timeout of one second".
380 321 */
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 322 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
382{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
384 323
385 sk_stop_timer(sk, &hc->tx_rtotimer); 324 if (hc->tx_rto > DCCP_RTO_MAX)
386 ccid2_pr_debug("deleted RTO timer\n"); 325 hc->tx_rto = DCCP_RTO_MAX;
387} 326}
388 327
389static inline void ccid2_new_ack(struct sock *sk, 328static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 329 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 330{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 331 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 332
@@ -402,93 +340,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 340 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 341 hc->tx_packets_acked = 0;
404 } 342 }
405 343 /*
406 /* update RTO */ 344 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 345 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 346 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 347 * in the Ack Vector have earlier sending times.
410 int s; 348 * The cleanest solution is to not use the ccid2s_sent field at all
411 349 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 350 */
413 if (hc->tx_srtt == -1) { 351 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 352}
481 353
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 354static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 355{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 356 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 357
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 358 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 359 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 360 return;
489 } 361 }
490 362
491 hc->tx_last_cong = jiffies; 363 hc->tx_last_cong = ccid2_time_stamp;
492 364
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 365 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 366 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -498,19 +370,31 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
498 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); 370 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
499} 371}
500 372
373static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
374 u8 option, u8 *optval, u8 optlen)
375{
376 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
377
378 switch (option) {
379 case DCCPO_ACK_VECTOR_0:
380 case DCCPO_ACK_VECTOR_1:
381 return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
382 option - DCCPO_ACK_VECTOR_0);
383 }
384 return 0;
385}
386
501static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 387static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
502{ 388{
503 struct dccp_sock *dp = dccp_sk(sk); 389 struct dccp_sock *dp = dccp_sk(sk);
504 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 390 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
391 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
392 struct dccp_ackvec_parsed *avp;
505 u64 ackno, seqno; 393 u64 ackno, seqno;
506 struct ccid2_seq *seqp; 394 struct ccid2_seq *seqp;
507 unsigned char *vector;
508 unsigned char veclen;
509 int offset = 0;
510 int done = 0; 395 int done = 0;
511 unsigned int maxincr = 0; 396 unsigned int maxincr = 0;
512 397
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 398 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 399 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 400
@@ -541,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
541 } 425 }
542 426
543 /* check forward path congestion */ 427 /* check forward path congestion */
544 /* still didn't send out new data packets */ 428 if (dccp_packet_without_ack(skb))
545 if (hc->tx_seqh == hc->tx_seqt)
546 return; 429 return;
547 430
548 switch (DCCP_SKB_CB(skb)->dccpd_type) { 431 /* still didn't send out new data packets */
549 case DCCP_PKT_ACK: 432 if (hc->tx_seqh == hc->tx_seqt)
550 case DCCP_PKT_DATAACK: 433 goto done;
551 break;
552 default:
553 return;
554 }
555 434
556 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; 435 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
557 if (after48(ackno, hc->tx_high_ack)) 436 if (after48(ackno, hc->tx_high_ack))
@@ -575,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
575 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); 454 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
576 455
577 /* go through all ack vectors */ 456 /* go through all ack vectors */
578 while ((offset = ccid2_ackvector(sk, skb, offset, 457 list_for_each_entry(avp, &hc->tx_av_chunks, node) {
579 &vector, &veclen)) != -1) {
580 /* go through this ack vector */ 458 /* go through this ack vector */
581 while (veclen--) { 459 for (; avp->len--; avp->vec++) {
582 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 460 u64 ackno_end_rl = SUB48(ackno,
583 u64 ackno_end_rl = SUB48(ackno, rl); 461 dccp_ackvec_runlen(avp->vec));
584 462
585 ccid2_pr_debug("ackvec start:%llu end:%llu\n", 463 ccid2_pr_debug("ackvec %llu |%u,%u|\n",
586 (unsigned long long)ackno, 464 (unsigned long long)ackno,
587 (unsigned long long)ackno_end_rl); 465 dccp_ackvec_state(avp->vec) >> 6,
466 dccp_ackvec_runlen(avp->vec));
588 /* if the seqno we are analyzing is larger than the 467 /* if the seqno we are analyzing is larger than the
589 * current ackno, then move towards the tail of our 468 * current ackno, then move towards the tail of our
590 * seqnos. 469 * seqnos.
@@ -603,24 +482,22 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
603 * run length 482 * run length
604 */ 483 */
605 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { 484 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
606 const u8 state = *vector & 485 const u8 state = dccp_ackvec_state(avp->vec);
607 DCCP_ACKVEC_STATE_MASK;
608 486
609 /* new packet received or marked */ 487 /* new packet received or marked */
610 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && 488 if (state != DCCPAV_NOT_RECEIVED &&
611 !seqp->ccid2s_acked) { 489 !seqp->ccid2s_acked) {
612 if (state == 490 if (state == DCCPAV_ECN_MARKED)
613 DCCP_ACKVEC_STATE_ECN_MARKED) {
614 ccid2_congestion_event(sk, 491 ccid2_congestion_event(sk,
615 seqp); 492 seqp);
616 } else 493 else
617 ccid2_new_ack(sk, seqp, 494 ccid2_new_ack(sk, seqp,
618 &maxincr); 495 &maxincr);
619 496
620 seqp->ccid2s_acked = 1; 497 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 498 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 499 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 500 hc->tx_pipe--;
624 } 501 }
625 if (seqp == hc->tx_seqt) { 502 if (seqp == hc->tx_seqt) {
626 done = 1; 503 done = 1;
@@ -632,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
632 break; 509 break;
633 510
634 ackno = SUB48(ackno_end_rl, 1); 511 ackno = SUB48(ackno_end_rl, 1);
635 vector++;
636 } 512 }
637 if (done) 513 if (done)
638 break; 514 break;
@@ -677,7 +553,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 553 * one ack vector.
678 */ 554 */
679 ccid2_congestion_event(sk, seqp); 555 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 556 hc->tx_pipe--;
681 } 557 }
682 if (seqp == hc->tx_seqt) 558 if (seqp == hc->tx_seqt)
683 break; 559 break;
@@ -695,7 +571,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 571 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 572 }
697 573
698 ccid2_hc_tx_check_sanity(hc); 574 /* restart RTO timer if not all outstanding data has been acked */
575 if (hc->tx_pipe == 0)
576 sk_stop_timer(sk, &hc->tx_rtotimer);
577 else
578 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
579done:
580 /* check if incoming Acks allow pending packets to be sent */
581 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
582 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
584}
585
586/*
587 * Convert RFC 3390 larger initial window into an equivalent number of packets.
588 * This is based on the numbers specified in RFC 5681, 3.1.
589 */
590static inline u32 rfc3390_bytes_to_packets(const u32 smss)
591{
592 return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
699} 593}
700 594
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 595static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +601,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 601 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 602 hc->tx_ssthresh = ~0U;
709 603
710 /* 604 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 605 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 606
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 607 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 608 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +613,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 613 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 614 return -ENOMEM;
725 615
726 hc->tx_rto = 3 * HZ; 616 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 617 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 618 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 619 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 620 (unsigned long)sk);
733 621 INIT_LIST_HEAD(&hc->tx_av_chunks);
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 622 return 0;
736} 623}
737 624
@@ -740,7 +627,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 627 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 628 int i;
742 629
743 ccid2_hc_tx_kill_rto_timer(sk); 630 sk_stop_timer(sk, &hc->tx_rtotimer);
744 631
745 for (i = 0; i < hc->tx_seqbufc; i++) 632 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 633 kfree(hc->tx_seqbuf[i]);
@@ -765,16 +652,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
765} 652}
766 653
767struct ccid_operations ccid2_ops = { 654struct ccid_operations ccid2_ops = {
768 .ccid_id = DCCPC_CCID2, 655 .ccid_id = DCCPC_CCID2,
769 .ccid_name = "TCP-like", 656 .ccid_name = "TCP-like",
770 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), 657 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
771 .ccid_hc_tx_init = ccid2_hc_tx_init, 658 .ccid_hc_tx_init = ccid2_hc_tx_init,
772 .ccid_hc_tx_exit = ccid2_hc_tx_exit, 659 .ccid_hc_tx_exit = ccid2_hc_tx_exit,
773 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, 660 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
774 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, 661 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
775 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, 662 .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
776 .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), 663 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
777 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, 664 .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
665 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
778}; 666};
779 667
780#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 668#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103bb..e9985dafc2c7 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
18#ifndef _DCCP_CCID2_H_ 18#ifndef _DCCP_CCID2_H_
19#define _DCCP_CCID2_H_ 19#define _DCCP_CCID2_H_
20 20
21#include <linux/dccp.h>
22#include <linux/timer.h> 21#include <linux/timer.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include "../ccid.h" 23#include "../ccid.h"
24#include "../dccp.h"
25
26/*
27 * CCID-2 timestamping faces the same issues as TCP timestamping.
28 * Hence we reuse/share as much of the code as possible.
29 */
30#define ccid2_time_stamp tcp_time_stamp
31
25/* NUMDUPACK parameter from RFC 4341, p. 6 */ 32/* NUMDUPACK parameter from RFC 4341, p. 6 */
26#define NUMDUPACK 3 33#define NUMDUPACK 3
27 34
28struct sock;
29
30struct ccid2_seq { 35struct ccid2_seq {
31 u64 ccid2s_seq; 36 u64 ccid2s_seq;
32 unsigned long ccid2s_sent; 37 u32 ccid2s_sent;
33 int ccid2s_acked; 38 int ccid2s_acked;
34 struct ccid2_seq *ccid2s_prev; 39 struct ccid2_seq *ccid2s_prev;
35 struct ccid2_seq *ccid2s_next; 40 struct ccid2_seq *ccid2s_next;
@@ -42,9 +47,15 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 47 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 49 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 50 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
51 * @tx_mdev: smoothed RTT variation, scaled by 2^2
52 * @tx_mdev_max: maximum of @mdev during one flight
53 * @tx_rttvar: moving average/maximum of @mdev_max
54 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
58 * @tx_av_chunks: list of Ack Vectors received on current skb
48 */ 59 */
49struct ccid2_hc_tx_sock { 60struct ccid2_hc_tx_sock {
50 u32 tx_cwnd; 61 u32 tx_cwnd;
@@ -55,17 +66,28 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 66 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 67 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 68 struct ccid2_seq *tx_seqt;
58 long tx_rto; 69
59 long tx_srtt; 70 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 71 u32 tx_srtt,
61 unsigned long tx_lastrtt; 72 tx_mdev,
73 tx_mdev_max,
74 tx_rttvar,
75 tx_rto;
76 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 77 struct timer_list tx_rtotimer;
78
63 u64 tx_rpseq; 79 u64 tx_rpseq;
64 int tx_rpdupack; 80 int tx_rpdupack;
65 unsigned long tx_last_cong; 81 u32 tx_last_cong;
66 u64 tx_high_ack; 82 u64 tx_high_ack;
83 struct list_head tx_av_chunks;
67}; 84};
68 85
86static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
87{
88 return hc->tx_pipe >= hc->tx_cwnd;
89}
90
69struct ccid2_hc_rx_sock { 91struct ccid2_hc_rx_sock {
70 int rx_data; 92 int rx_data;
71}; 93};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f752986497..3d604e1349c0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
54 [TFRC_SSTATE_NO_SENT] = "NO_SENT", 54 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", 55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
56 [TFRC_SSTATE_FBACK] = "FBACK", 56 [TFRC_SSTATE_FBACK] = "FBACK",
57 [TFRC_SSTATE_TERM] = "TERM",
58 }; 57 };
59 58
60 return ccid3_state_names[state]; 59 return ccid3_state_names[state];
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
91 return scaled_div(w_init << 6, hc->tx_rtt); 90 return scaled_div(w_init << 6, hc->tx_rtt);
92} 91}
93 92
94/* 93/**
95 * Recalculate t_ipi and delta (should be called whenever X changes) 94 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
95 * This respects the granularity of X_inst (64 * bytes/second).
96 */ 96 */
97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) 97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
98{ 98{
99 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); 99 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
101 100
102 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 101 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
103 hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 102 hc->tx_s, (unsigned)(hc->tx_x >> 6));
104
105 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
106 hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
107} 103}
108 104
109static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) 105static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
211 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, 207 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
212 ccid3_tx_state_name(hc->tx_state)); 208 ccid3_tx_state_name(hc->tx_state));
213 209
210 /* Ignore and do not restart after leaving the established state */
211 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
212 goto out;
213
214 /* Reset feedback state to "no feedback received" */
214 if (hc->tx_state == TFRC_SSTATE_FBACK) 215 if (hc->tx_state == TFRC_SSTATE_FBACK)
215 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 216 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
216 else if (hc->tx_state != TFRC_SSTATE_NO_FBACK)
217 goto out;
218 217
219 /* 218 /*
220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 219 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
220 * RTO is 0 if and only if no feedback has been received yet.
221 */ 221 */
222 if (hc->tx_t_rto == 0 || /* no feedback received yet */ 222 if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
223 hc->tx_p == 0) {
224 223
225 /* halve send rate directly */ 224 /* halve send rate directly */
226 hc->tx_x = max(hc->tx_x / 2, 225 hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 * Set new timeout for the nofeedback timer. 255 * Set new timeout for the nofeedback timer.
257 * See comments in packet_recv() regarding the value of t_RTO. 256 * See comments in packet_recv() regarding the value of t_RTO.
258 */ 257 */
259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ 258 if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
260 t_nfb = TFRC_INITIAL_TIMEOUT; 259 t_nfb = TFRC_INITIAL_TIMEOUT;
261 else 260 else
262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); 261 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -269,11 +268,11 @@ out:
269 sock_put(sk); 268 sock_put(sk);
270} 269}
271 270
272/* 271/**
273 * returns 272 * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
274 * > 0: delay (in msecs) that should pass before actually sending 273 * @skb: next packet candidate to send on @sk
275 * = 0: can send immediately 274 * This function uses the convention of ccid_packet_dequeue_eval() and
276 * < 0: error condition; do not send packet 275 * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
277 */ 276 */
278static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 277static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
279{ 278{
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
290 if (unlikely(skb->len == 0)) 289 if (unlikely(skb->len == 0))
291 return -EBADMSG; 290 return -EBADMSG;
292 291
293 switch (hc->tx_state) { 292 if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
294 case TFRC_SSTATE_NO_SENT:
295 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + 293 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
296 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); 294 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
297 hc->tx_last_win_count = 0; 295 hc->tx_last_win_count = 0;
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
326 ccid3_update_send_interval(hc); 324 ccid3_update_send_interval(hc);
327 325
328 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 326 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
329 break; 327
330 case TFRC_SSTATE_NO_FBACK: 328 } else {
331 case TFRC_SSTATE_FBACK:
332 delay = ktime_us_delta(hc->tx_t_nom, now); 329 delay = ktime_us_delta(hc->tx_t_nom, now);
333 ccid3_pr_debug("delay=%ld\n", (long)delay); 330 ccid3_pr_debug("delay=%ld\n", (long)delay);
334 /* 331 /*
335 * Scheduling of packet transmissions [RFC 3448, 4.6] 332 * Scheduling of packet transmissions (RFC 5348, 8.3)
336 * 333 *
337 * if (t_now > t_nom - delta) 334 * if (t_now > t_nom - delta)
338 * // send the packet now 335 * // send the packet now
339 * else 336 * else
340 * // send the packet in (t_nom - t_now) milliseconds. 337 * // send the packet in (t_nom - t_now) milliseconds.
341 */ 338 */
342 if (delay - (s64)hc->tx_delta >= 1000) 339 if (delay >= TFRC_T_DELTA)
343 return (u32)delay / 1000L; 340 return (u32)delay / USEC_PER_MSEC;
344 341
345 ccid3_hc_tx_update_win_count(hc, now); 342 ccid3_hc_tx_update_win_count(hc, now);
346 break;
347 case TFRC_SSTATE_TERM:
348 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
349 return -EINVAL;
350 } 343 }
351 344
352 /* prepare to send now (add options etc.) */ 345 /* prepare to send now (add options etc.) */
@@ -355,11 +348,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
355 348
356 /* set the nominal send time for the next following packet */ 349 /* set the nominal send time for the next following packet */
357 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); 350 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
358 return 0; 351 return CCID_PACKET_SEND_AT_ONCE;
359} 352}
360 353
361static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, 354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
362 unsigned int len)
363{ 355{
364 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 356 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
365 357
@@ -372,48 +364,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 364static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
373{ 365{
374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 366 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
375 struct ccid3_options_received *opt_recv; 367 struct tfrc_tx_hist_entry *acked;
376 ktime_t now; 368 ktime_t now;
377 unsigned long t_nfb; 369 unsigned long t_nfb;
378 u32 pinv, r_sample; 370 u32 r_sample;
379 371
380 /* we are only interested in ACKs */ 372 /* we are only interested in ACKs */
381 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 373 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
382 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 374 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
383 return; 375 return;
384 /* ... and only in the established state */
385 if (hc->tx_state != TFRC_SSTATE_FBACK &&
386 hc->tx_state != TFRC_SSTATE_NO_FBACK)
387 return;
388
389 opt_recv = &hc->tx_options_received;
390 now = ktime_get_real();
391
392 /* Estimate RTT from history if ACK number is valid */
393 r_sample = tfrc_tx_hist_rtt(hc->tx_hist,
394 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
395 if (r_sample == 0) {
396 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
397 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
398 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
399 return;
400 }
401
402 /* Update receive rate in units of 64 * bytes/second */
403 hc->tx_x_recv = opt_recv->ccid3or_receive_rate;
404 hc->tx_x_recv <<= 6;
405
406 /* Update loss event rate (which is scaled by 1e6) */
407 pinv = opt_recv->ccid3or_loss_event_rate;
408 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
409 hc->tx_p = 0;
410 else /* can not exceed 100% */
411 hc->tx_p = scaled_div(1, pinv);
412 /* 376 /*
413 * Validate new RTT sample and update moving average 377 * Locate the acknowledged packet in the TX history.
378 *
379 * Returning "entry not found" here can for instance happen when
380 * - the host has not sent out anything (e.g. a passive server),
381 * - the Ack is outdated (packet with higher Ack number was received),
382 * - it is a bogus Ack (for a packet not sent on this connection).
414 */ 383 */
415 r_sample = dccp_sample_rtt(sk, r_sample); 384 acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
385 if (acked == NULL)
386 return;
387 /* For the sake of RTT sampling, ignore/remove all older entries */
388 tfrc_tx_hist_purge(&acked->next);
389
390 /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
391 now = ktime_get_real();
392 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
416 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); 393 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
394
417 /* 395 /*
418 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 396 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
419 */ 397 */
@@ -461,13 +439,12 @@ done_computing_x:
461 sk->sk_write_space(sk); 439 sk->sk_write_space(sk);
462 440
463 /* 441 /*
464 * Update timeout interval for the nofeedback timer. 442 * Update timeout interval for the nofeedback timer. In order to control
465 * We use a configuration option to increase the lower bound. 443 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
466 * This can help avoid triggering the nofeedback timer too 444 * tunable RTAX_RTO_MIN value as the lower bound.
467 * often ('spinning') on LANs with small RTTs.
468 */ 445 */
469 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * 446 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
470 (USEC_PER_SEC / 1000))); 447 USEC_PER_SEC/HZ * tcp_rto_min(sk));
471 /* 448 /*
472 * Schedule no feedback timer to expire in 449 * Schedule no feedback timer to expire in
473 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 450 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -482,66 +459,41 @@ done_computing_x:
482 jiffies + usecs_to_jiffies(t_nfb)); 459 jiffies + usecs_to_jiffies(t_nfb));
483} 460}
484 461
485static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 462static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
486 unsigned char len, u16 idx, 463 u8 option, u8 *optval, u8 optlen)
487 unsigned char *value)
488{ 464{
489 int rc = 0;
490 const struct dccp_sock *dp = dccp_sk(sk);
491 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 465 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
492 struct ccid3_options_received *opt_recv;
493 __be32 opt_val; 466 __be32 opt_val;
494 467
495 opt_recv = &hc->tx_options_received;
496
497 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
498 opt_recv->ccid3or_seqno = dp->dccps_gsr;
499 opt_recv->ccid3or_loss_event_rate = ~0;
500 opt_recv->ccid3or_loss_intervals_idx = 0;
501 opt_recv->ccid3or_loss_intervals_len = 0;
502 opt_recv->ccid3or_receive_rate = 0;
503 }
504
505 switch (option) { 468 switch (option) {
469 case TFRC_OPT_RECEIVE_RATE:
506 case TFRC_OPT_LOSS_EVENT_RATE: 470 case TFRC_OPT_LOSS_EVENT_RATE:
507 if (unlikely(len != 4)) { 471 /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
508 DCCP_WARN("%s(%p), invalid len %d " 472 if (packet_type == DCCP_PKT_DATA)
509 "for TFRC_OPT_LOSS_EVENT_RATE\n", 473 break;
510 dccp_role(sk), sk, len); 474 if (unlikely(optlen != 4)) {
511 rc = -EINVAL; 475 DCCP_WARN("%s(%p), invalid len %d for %u\n",
512 } else { 476 dccp_role(sk), sk, optlen, option);
513 opt_val = get_unaligned((__be32 *)value); 477 return -EINVAL;
514 opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
515 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
516 dccp_role(sk), sk,
517 opt_recv->ccid3or_loss_event_rate);
518 } 478 }
519 break; 479 opt_val = ntohl(get_unaligned((__be32 *)optval));
520 case TFRC_OPT_LOSS_INTERVALS: 480
521 opt_recv->ccid3or_loss_intervals_idx = idx; 481 if (option == TFRC_OPT_RECEIVE_RATE) {
522 opt_recv->ccid3or_loss_intervals_len = len; 482 /* Receive Rate is kept in units of 64 bytes/second */
523 ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", 483 hc->tx_x_recv = opt_val;
524 dccp_role(sk), sk, 484 hc->tx_x_recv <<= 6;
525 opt_recv->ccid3or_loss_intervals_idx, 485
526 opt_recv->ccid3or_loss_intervals_len);
527 break;
528 case TFRC_OPT_RECEIVE_RATE:
529 if (unlikely(len != 4)) {
530 DCCP_WARN("%s(%p), invalid len %d "
531 "for TFRC_OPT_RECEIVE_RATE\n",
532 dccp_role(sk), sk, len);
533 rc = -EINVAL;
534 } else {
535 opt_val = get_unaligned((__be32 *)value);
536 opt_recv->ccid3or_receive_rate = ntohl(opt_val);
537 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", 486 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
538 dccp_role(sk), sk, 487 dccp_role(sk), sk, opt_val);
539 opt_recv->ccid3or_receive_rate); 488 } else {
489 /* Update the fixpoint Loss Event Rate fraction */
490 hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
491
492 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
493 dccp_role(sk), sk, opt_val);
540 } 494 }
541 break;
542 } 495 }
543 496 return 0;
544 return rc;
545} 497}
546 498
547static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) 499static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -559,42 +511,36 @@ static void ccid3_hc_tx_exit(struct sock *sk)
559{ 511{
560 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 512 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
561 513
562 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
563 sk_stop_timer(sk, &hc->tx_no_feedback_timer); 514 sk_stop_timer(sk, &hc->tx_no_feedback_timer);
564
565 tfrc_tx_hist_purge(&hc->tx_hist); 515 tfrc_tx_hist_purge(&hc->tx_hist);
566} 516}
567 517
568static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 518static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
569{ 519{
570 struct ccid3_hc_tx_sock *hc; 520 info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
571 521 info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
572 /* Listen socks doesn't have a private CCID block */
573 if (sk->sk_state == DCCP_LISTEN)
574 return;
575
576 hc = ccid3_hc_tx_sk(sk);
577 info->tcpi_rto = hc->tx_t_rto;
578 info->tcpi_rtt = hc->tx_rtt;
579} 522}
580 523
581static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, 524static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
582 u32 __user *optval, int __user *optlen) 525 u32 __user *optval, int __user *optlen)
583{ 526{
584 const struct ccid3_hc_tx_sock *hc; 527 const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
528 struct tfrc_tx_info tfrc;
585 const void *val; 529 const void *val;
586 530
587 /* Listen socks doesn't have a private CCID block */
588 if (sk->sk_state == DCCP_LISTEN)
589 return -EINVAL;
590
591 hc = ccid3_hc_tx_sk(sk);
592 switch (optname) { 531 switch (optname) {
593 case DCCP_SOCKOPT_CCID_TX_INFO: 532 case DCCP_SOCKOPT_CCID_TX_INFO:
594 if (len < sizeof(hc->tx_tfrc)) 533 if (len < sizeof(tfrc))
595 return -EINVAL; 534 return -EINVAL;
596 len = sizeof(hc->tx_tfrc); 535 tfrc.tfrctx_x = hc->tx_x;
597 val = &hc->tx_tfrc; 536 tfrc.tfrctx_x_recv = hc->tx_x_recv;
537 tfrc.tfrctx_x_calc = hc->tx_x_calc;
538 tfrc.tfrctx_rtt = hc->tx_rtt;
539 tfrc.tfrctx_p = hc->tx_p;
540 tfrc.tfrctx_rto = hc->tx_t_rto;
541 tfrc.tfrctx_ipi = hc->tx_t_ipi;
542 len = sizeof(tfrc);
543 val = &tfrc;
598 break; 544 break;
599 default: 545 default:
600 return -ENOPROTOOPT; 546 return -ENOPROTOOPT;
@@ -624,7 +570,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
624 static const char *const ccid3_rx_state_names[] = { 570 static const char *const ccid3_rx_state_names[] = {
625 [TFRC_RSTATE_NO_DATA] = "NO_DATA", 571 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
626 [TFRC_RSTATE_DATA] = "DATA", 572 [TFRC_RSTATE_DATA] = "DATA",
627 [TFRC_RSTATE_TERM] = "TERM",
628 }; 573 };
629 574
630 return ccid3_rx_state_names[state]; 575 return ccid3_rx_state_names[state];
@@ -650,14 +595,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
650{ 595{
651 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 596 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
652 struct dccp_sock *dp = dccp_sk(sk); 597 struct dccp_sock *dp = dccp_sk(sk);
653 ktime_t now; 598 ktime_t now = ktime_get_real();
654 s64 delta = 0; 599 s64 delta = 0;
655 600
656 if (unlikely(hc->rx_state == TFRC_RSTATE_TERM))
657 return;
658
659 now = ktime_get_real();
660
661 switch (fbtype) { 601 switch (fbtype) {
662 case CCID3_FBACK_INITIAL: 602 case CCID3_FBACK_INITIAL:
663 hc->rx_x_recv = 0; 603 hc->rx_x_recv = 0;
@@ -701,14 +641,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
701 641
702static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 642static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
703{ 643{
704 const struct ccid3_hc_rx_sock *hc; 644 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
705 __be32 x_recv, pinv; 645 __be32 x_recv, pinv;
706 646
707 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 647 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
708 return 0; 648 return 0;
709 649
710 hc = ccid3_hc_rx_sk(sk);
711
712 if (dccp_packet_without_ack(skb)) 650 if (dccp_packet_without_ack(skb))
713 return 0; 651 return 0;
714 652
@@ -749,10 +687,11 @@ static u32 ccid3_first_li(struct sock *sk)
749 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 687 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
750 if (x_recv == 0) { /* would also trigger divide-by-zero */ 688 if (x_recv == 0) { /* would also trigger divide-by-zero */
751 DCCP_WARN("X_recv==0\n"); 689 DCCP_WARN("X_recv==0\n");
752 if ((x_recv = hc->rx_x_recv) == 0) { 690 if (hc->rx_x_recv == 0) {
753 DCCP_BUG("stored value of X_recv is zero"); 691 DCCP_BUG("stored value of X_recv is zero");
754 return ~0U; 692 return ~0U;
755 } 693 }
694 x_recv = hc->rx_x_recv;
756 } 695 }
757 696
758 fval = scaled_div(hc->rx_s, hc->rx_rtt); 697 fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -862,46 +801,31 @@ static void ccid3_hc_rx_exit(struct sock *sk)
862{ 801{
863 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 802 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
864 803
865 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
866
867 tfrc_rx_hist_purge(&hc->rx_hist); 804 tfrc_rx_hist_purge(&hc->rx_hist);
868 tfrc_lh_cleanup(&hc->rx_li_hist); 805 tfrc_lh_cleanup(&hc->rx_li_hist);
869} 806}
870 807
871static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 808static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
872{ 809{
873 const struct ccid3_hc_rx_sock *hc; 810 info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
874
875 /* Listen socks doesn't have a private CCID block */
876 if (sk->sk_state == DCCP_LISTEN)
877 return;
878
879 hc = ccid3_hc_rx_sk(sk);
880 info->tcpi_ca_state = hc->rx_state;
881 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; 811 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
882 info->tcpi_rcv_rtt = hc->rx_rtt; 812 info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
883} 813}
884 814
885static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 815static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
886 u32 __user *optval, int __user *optlen) 816 u32 __user *optval, int __user *optlen)
887{ 817{
888 const struct ccid3_hc_rx_sock *hc; 818 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
889 struct tfrc_rx_info rx_info; 819 struct tfrc_rx_info rx_info;
890 const void *val; 820 const void *val;
891 821
892 /* Listen socks doesn't have a private CCID block */
893 if (sk->sk_state == DCCP_LISTEN)
894 return -EINVAL;
895
896 hc = ccid3_hc_rx_sk(sk);
897 switch (optname) { 822 switch (optname) {
898 case DCCP_SOCKOPT_CCID_RX_INFO: 823 case DCCP_SOCKOPT_CCID_RX_INFO:
899 if (len < sizeof(rx_info)) 824 if (len < sizeof(rx_info))
900 return -EINVAL; 825 return -EINVAL;
901 rx_info.tfrcrx_x_recv = hc->rx_x_recv; 826 rx_info.tfrcrx_x_recv = hc->rx_x_recv;
902 rx_info.tfrcrx_rtt = hc->rx_rtt; 827 rx_info.tfrcrx_rtt = hc->rx_rtt;
903 rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : 828 rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
904 scaled_div(1, hc->rx_pinv);
905 len = sizeof(rx_info); 829 len = sizeof(rx_info);
906 val = &rx_info; 830 val = &rx_info;
907 break; 831 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 032635776653..1a9933c29672 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,35 +42,36 @@
42#include "lib/tfrc.h" 42#include "lib/tfrc.h"
43#include "../ccid.h" 43#include "../ccid.h"
44 44
45/* Two seconds as per RFC 3448 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
49#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
50
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 48/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 49#define TFRC_T_MBI 64
53 50
51/*
52 * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
53 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
54 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
55 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
56 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
57 */
58#if (HZ >= 500)
59# define TFRC_T_DELTA USEC_PER_MSEC
60#else
61# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
62#endif
63
54enum ccid3_options { 64enum ccid3_options {
55 TFRC_OPT_LOSS_EVENT_RATE = 192, 65 TFRC_OPT_LOSS_EVENT_RATE = 192,
56 TFRC_OPT_LOSS_INTERVALS = 193, 66 TFRC_OPT_LOSS_INTERVALS = 193,
57 TFRC_OPT_RECEIVE_RATE = 194, 67 TFRC_OPT_RECEIVE_RATE = 194,
58}; 68};
59 69
60struct ccid3_options_received {
61 u64 ccid3or_seqno:48,
62 ccid3or_loss_intervals_idx:16;
63 u16 ccid3or_loss_intervals_len;
64 u32 ccid3or_loss_event_rate;
65 u32 ccid3or_receive_rate;
66};
67
68/* TFRC sender states */ 70/* TFRC sender states */
69enum ccid3_hc_tx_states { 71enum ccid3_hc_tx_states {
70 TFRC_SSTATE_NO_SENT = 1, 72 TFRC_SSTATE_NO_SENT = 1,
71 TFRC_SSTATE_NO_FBACK, 73 TFRC_SSTATE_NO_FBACK,
72 TFRC_SSTATE_FBACK, 74 TFRC_SSTATE_FBACK,
73 TFRC_SSTATE_TERM,
74}; 75};
75 76
76/** 77/**
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states {
90 * @tx_no_feedback_timer: Handle to no feedback timer 91 * @tx_no_feedback_timer: Handle to no feedback timer
91 * @tx_t_ld: Time last doubled during slow start 92 * @tx_t_ld: Time last doubled during slow start
92 * @tx_t_nom: Nominal send time of next packet 93 * @tx_t_nom: Nominal send time of next packet
93 * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
94 * @tx_hist: Packet history 94 * @tx_hist: Packet history
95 * @tx_options_received: Parsed set of retrieved options
96 */ 95 */
97struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
98 struct tfrc_tx_info tx_tfrc; 97 u64 tx_x;
99#define tx_x tx_tfrc.tfrctx_x 98 u64 tx_x_recv;
100#define tx_x_recv tx_tfrc.tfrctx_x_recv 99 u32 tx_x_calc;
101#define tx_x_calc tx_tfrc.tfrctx_x_calc 100 u32 tx_rtt;
102#define tx_rtt tx_tfrc.tfrctx_rtt 101 u32 tx_p;
103#define tx_p tx_tfrc.tfrctx_p 102 u32 tx_t_rto;
104#define tx_t_rto tx_tfrc.tfrctx_rto 103 u32 tx_t_ipi;
105#define tx_t_ipi tx_tfrc.tfrctx_ipi
106 u16 tx_s; 104 u16 tx_s;
107 enum ccid3_hc_tx_states tx_state:8; 105 enum ccid3_hc_tx_states tx_state:8;
108 u8 tx_last_win_count; 106 u8 tx_last_win_count;
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock {
110 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
111 ktime_t tx_t_ld; 109 ktime_t tx_t_ld;
112 ktime_t tx_t_nom; 110 ktime_t tx_t_nom;
113 u32 tx_delta;
114 struct tfrc_tx_hist_entry *tx_hist; 111 struct tfrc_tx_hist_entry *tx_hist;
115 struct ccid3_options_received tx_options_received;
116}; 112};
117 113
118static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 114static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
126enum ccid3_hc_rx_states { 122enum ccid3_hc_rx_states {
127 TFRC_RSTATE_NO_DATA = 1, 123 TFRC_RSTATE_NO_DATA = 1,
128 TFRC_RSTATE_DATA, 124 TFRC_RSTATE_DATA,
129 TFRC_RSTATE_TERM = 127,
130}; 125};
131 126
132/** 127/**
133 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket 128 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
134 * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
135 * @rx_rtt: Receiver estimate of rtt (non-standard)
136 * @rx_p: Current loss event rate (RFC 3448 5.4)
137 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) 129 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
138 * @rx_state: Receiver state, one of %ccid3_hc_rx_states 130 * @rx_state: Receiver state, one of %ccid3_hc_rx_states
139 * @rx_bytes_recv: Total sum of DCCP payload bytes 131 * @rx_bytes_recv: Total sum of DCCP payload bytes
140 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) 132 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
141 * @rx_rtt: Receiver estimate of RTT 133 * @rx_rtt: Receiver estimate of RTT
142 * @rx_tstamp_last_feedback: Time at which last feedback was sent 134 * @rx_tstamp_last_feedback: Time at which last feedback was sent
143 * @rx_tstamp_last_ack: Time at which last feedback was sent
144 * @rx_hist: Packet history (loss detection + RTT sampling) 135 * @rx_hist: Packet history (loss detection + RTT sampling)
145 * @rx_li_hist: Loss Interval database 136 * @rx_li_hist: Loss Interval database
146 * @rx_s: Received packet size in bytes 137 * @rx_s: Received packet size in bytes
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf79071..497723c4d4bb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
116 cur->li_length = len; 116 cur->li_length = len;
117 tfrc_lh_calc_i_mean(lh); 117 tfrc_lh_calc_i_mean(lh);
118 118
119 return (lh->i_mean < old_i_mean); 119 return lh->i_mean < old_i_mean;
120} 120}
121 121
122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ 122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 3a4f414e94a0..de8fe294bf0b 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -38,18 +38,6 @@
38#include "packet_history.h" 38#include "packet_history.h"
39#include "../../dccp.h" 39#include "../../dccp.h"
40 40
41/**
42 * tfrc_tx_hist_entry - Simple singly-linked TX history list
43 * @next: next oldest entry (LIFO order)
44 * @seqno: sequence number of this entry
45 * @stamp: send time of packet with sequence number @seqno
46 */
47struct tfrc_tx_hist_entry {
48 struct tfrc_tx_hist_entry *next;
49 u64 seqno;
50 ktime_t stamp;
51};
52
53/* 41/*
54 * Transmitter History Routines 42 * Transmitter History Routines
55 */ 43 */
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void)
71 } 59 }
72} 60}
73 61
74static struct tfrc_tx_hist_entry *
75 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
76{
77 while (head != NULL && head->seqno != seqno)
78 head = head->next;
79
80 return head;
81}
82
83int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 62int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
84{ 63{
85 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 64 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
107 *headp = NULL; 86 *headp = NULL;
108} 87}
109 88
110u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
111 const ktime_t now)
112{
113 u32 rtt = 0;
114 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
115
116 if (packet != NULL) {
117 rtt = ktime_us_delta(now, packet->stamp);
118 /*
119 * Garbage-collect older (irrelevant) entries:
120 */
121 tfrc_tx_hist_purge(&packet->next);
122 }
123
124 return rtt;
125}
126
127
128/* 89/*
129 * Receiver History Routines 90 * Receiver History Routines
130 */ 91 */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7df6c5299999..7ee4a9d9d335 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h" 41#include "tfrc.h"
42 42
43struct tfrc_tx_hist_entry; 43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
55static inline struct tfrc_tx_hist_entry *
56 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
57{
58 while (head != NULL && head->seqno != seqno)
59 head = head->next;
60 return head;
61}
44 62
45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); 63extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); 64extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
49 65
50/* Subtraction a-b modulo-16, respects circular wrap-around */ 66/* Subtraction a-b modulo-16, respects circular wrap-around */
51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) 67#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 01bb48e96c2e..f8ee3f549770 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57 57
58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
60extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
60 61
61extern int tfrc_tx_packet_history_init(void); 62extern int tfrc_tx_packet_history_init(void);
62extern void tfrc_tx_packet_history_exit(void); 63extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 22ca1cf0eb55..a052a4377e26 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
687 index = tfrc_binsearch(fvalue, 0); 687 index = tfrc_binsearch(fvalue, 0);
688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; 688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
689} 689}
690
691/**
692 * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
693 * When @loss_event_rate is large, there is a chance that p is truncated to 0.
694 * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
695 */
696u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
697{
698 if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
699 return 0;
700 if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
701 return 1000000;
702 return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
703}