aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ccids/ccid2.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/dccp/ccids/ccid2.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/dccp/ccids/ccid2.c')
-rw-r--r--net/dccp/ccids/ccid2.c444
1 files changed, 166 insertions, 278 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..fadecd20d75b 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -123,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
123 78
124static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
125{ 80{
126 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 81 if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
127 82 return CCID_PACKET_WILL_DEQUEUE_LATER;
128 if (hc->tx_pipe < hc->tx_cwnd) 83 return CCID_PACKET_SEND_AT_ONCE;
129 return 0;
130
131 return 1; /* XXX CCID should dequeue when ready instead of polling */
132} 84}
133 85
134static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) 86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -156,19 +108,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 108 dp->dccps_l_ack_ratio = val;
157} 109}
158 110
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 111static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 112{
169 struct sock *sk = (struct sock *)data; 113 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 114 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s; 115 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
172 116
173 bh_lock_sock(sk); 117 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 118 if (sock_owned_by_user(sk)) {
@@ -178,23 +122,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 122
179 ccid2_pr_debug("RTO_EXPIRE\n"); 123 ccid2_pr_debug("RTO_EXPIRE\n");
180 124
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 125 /* back-off timer */
184 hc->tx_rto <<= 1; 126 hc->tx_rto <<= 1;
185 127 if (hc->tx_rto > DCCP_RTO_MAX)
186 s = hc->tx_rto / HZ; 128 hc->tx_rto = DCCP_RTO_MAX;
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 129
192 /* adjust pipe, cwnd etc */ 130 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 131 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 132 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 133 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 134 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 135 hc->tx_pipe = 0;
198 136
199 /* clear state about stuff we sent */ 137 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 138 hc->tx_seqt = hc->tx_seqh;
@@ -204,23 +142,18 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 142 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 143 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 144 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc); 145
146 /* if we were blocked before, we may now send cwnd=1 packet */
147 if (sender_was_blocked)
148 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
149 /* restart backed-off timer */
150 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
208out: 151out:
209 bh_unlock_sock(sk); 152 bh_unlock_sock(sk);
210 sock_put(sk); 153 sock_put(sk);
211} 154}
212 155
213static void ccid2_start_rto_timer(struct sock *sk) 156static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 157{
225 struct dccp_sock *dp = dccp_sk(sk); 158 struct dccp_sock *dp = dccp_sk(sk);
226 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 159 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -230,7 +163,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 163
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 164 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 165 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 166 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 167
235 next = hc->tx_seqh->ccid2s_next; 168 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 169 /* check if we need to alloc more space */
@@ -296,99 +229,104 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 229 }
297#endif 230#endif
298 231
299 /* setup RTO timer */ 232 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 233
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 234#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 235 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 236 struct ccid2_seq *seqp = hc->tx_seqt;
306 237
307 while (seqp != hc->tx_seqh) { 238 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 239 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 240 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 241 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 242 seqp = seqp->ccid2s_next;
312 } 243 }
313 } while (0); 244 } while (0);
314 ccid2_pr_debug("=========\n"); 245 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 246#endif
317} 247}
318 248
319/* XXX Lame code duplication! 249/**
320 * returns -1 if none was found. 250 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
321 * else returns the next offset to use in the function call. 251 * This code is almost identical with TCP's tcp_rtt_estimator(), since
252 * - it has a higher sampling frequency (recommended by RFC 1323),
253 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
254 * - it is simple (cf. more complex proposals such as Eifel timer or research
255 * which suggests that the gain should be set according to window size),
256 * - in tests it was found to work well with CCID2 [gerrit].
322 */ 257 */
323static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, 258static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
324 unsigned char **vec, unsigned char *veclen)
325{ 259{
326 const struct dccp_hdr *dh = dccp_hdr(skb); 260 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
327 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); 261 long m = mrtt ? : 1;
328 unsigned char *opt_ptr; 262
329 const unsigned char *opt_end = (unsigned char *)dh + 263 if (hc->tx_srtt == 0) {
330 (dh->dccph_doff * 4); 264 /* First measurement m */
331 unsigned char opt, len; 265 hc->tx_srtt = m << 3;
332 unsigned char *value; 266 hc->tx_mdev = m << 1;
333 267
334 BUG_ON(offset < 0); 268 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
335 options += offset; 269 hc->tx_rttvar = hc->tx_mdev_max;
336 opt_ptr = options; 270
337 if (opt_ptr >= opt_end) 271 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
338 return -1; 272 } else {
339 273 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
340 while (opt_ptr != opt_end) { 274 m -= (hc->tx_srtt >> 3);
341 opt = *opt_ptr++; 275 hc->tx_srtt += m;
342 len = 0; 276
343 value = NULL; 277 /* Similarly, update scaled mdev with regard to |m| */
344 278 if (m < 0) {
345 /* Check if this isn't a single byte option */ 279 m = -m;
346 if (opt > DCCPO_MAX_RESERVED) { 280 m -= (hc->tx_mdev >> 2);
347 if (opt_ptr == opt_end)
348 goto out_invalid_option;
349
350 len = *opt_ptr++;
351 if (len < 3)
352 goto out_invalid_option;
353 /* 281 /*
354 * Remove the type and len fields, leaving 282 * This neutralises RTO increase when RTT < SRTT - mdev
355 * just the value size 283 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
284 * in Linux TCP", USENIX 2002, pp. 49-62).
356 */ 285 */
357 len -= 2; 286 if (m > 0)
358 value = opt_ptr; 287 m >>= 3;
359 opt_ptr += len; 288 } else {
289 m -= (hc->tx_mdev >> 2);
290 }
291 hc->tx_mdev += m;
360 292
361 if (opt_ptr > opt_end) 293 if (hc->tx_mdev > hc->tx_mdev_max) {
362 goto out_invalid_option; 294 hc->tx_mdev_max = hc->tx_mdev;
295 if (hc->tx_mdev_max > hc->tx_rttvar)
296 hc->tx_rttvar = hc->tx_mdev_max;
363 } 297 }
364 298
365 switch (opt) { 299 /*
366 case DCCPO_ACK_VECTOR_0: 300 * Decay RTTVAR at most once per flight, exploiting that
367 case DCCPO_ACK_VECTOR_1: 301 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
368 *vec = value; 302 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
369 *veclen = len; 303 * GAR is a useful bound for FlightSize = pipe.
370 return offset + (opt_ptr - options); 304 * AWL is probably too low here, as it over-estimates pipe.
305 */
306 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
307 if (hc->tx_mdev_max < hc->tx_rttvar)
308 hc->tx_rttvar -= (hc->tx_rttvar -
309 hc->tx_mdev_max) >> 2;
310 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
311 hc->tx_mdev_max = tcp_rto_min(sk);
371 } 312 }
372 } 313 }
373 314
374 return -1; 315 /*
375 316 * Set RTO from SRTT and RTTVAR
376out_invalid_option: 317 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
377 DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); 318 * This agrees with RFC 4341, 5:
378 return -1; 319 * "Because DCCP does not retransmit data, DCCP does not require
379} 320 * TCP's recommended minimum timeout of one second".
380 321 */
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 322 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
382{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
384 323
385 sk_stop_timer(sk, &hc->tx_rtotimer); 324 if (hc->tx_rto > DCCP_RTO_MAX)
386 ccid2_pr_debug("deleted RTO timer\n"); 325 hc->tx_rto = DCCP_RTO_MAX;
387} 326}
388 327
389static inline void ccid2_new_ack(struct sock *sk, 328static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 329 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 330{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 331 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 332
@@ -402,93 +340,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 340 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 341 hc->tx_packets_acked = 0;
404 } 342 }
405 343 /*
406 /* update RTO */ 344 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 345 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 346 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 347 * in the Ack Vector have earlier sending times.
410 int s; 348 * The cleanest solution is to not use the ccid2s_sent field at all
411 349 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 350 */
413 if (hc->tx_srtt == -1) { 351 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 352}
481 353
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 354static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 355{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 356 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 357
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 358 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 359 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 360 return;
489 } 361 }
490 362
491 hc->tx_last_cong = jiffies; 363 hc->tx_last_cong = ccid2_time_stamp;
492 364
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 365 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 366 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -498,19 +370,31 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
498 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); 370 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
499} 371}
500 372
373static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
374 u8 option, u8 *optval, u8 optlen)
375{
376 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
377
378 switch (option) {
379 case DCCPO_ACK_VECTOR_0:
380 case DCCPO_ACK_VECTOR_1:
381 return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
382 option - DCCPO_ACK_VECTOR_0);
383 }
384 return 0;
385}
386
501static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 387static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
502{ 388{
503 struct dccp_sock *dp = dccp_sk(sk); 389 struct dccp_sock *dp = dccp_sk(sk);
504 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 390 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
391 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
392 struct dccp_ackvec_parsed *avp;
505 u64 ackno, seqno; 393 u64 ackno, seqno;
506 struct ccid2_seq *seqp; 394 struct ccid2_seq *seqp;
507 unsigned char *vector;
508 unsigned char veclen;
509 int offset = 0;
510 int done = 0; 395 int done = 0;
511 unsigned int maxincr = 0; 396 unsigned int maxincr = 0;
512 397
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 398 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 399 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 400
@@ -541,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
541 } 425 }
542 426
543 /* check forward path congestion */ 427 /* check forward path congestion */
544 /* still didn't send out new data packets */ 428 if (dccp_packet_without_ack(skb))
545 if (hc->tx_seqh == hc->tx_seqt)
546 return; 429 return;
547 430
548 switch (DCCP_SKB_CB(skb)->dccpd_type) { 431 /* still didn't send out new data packets */
549 case DCCP_PKT_ACK: 432 if (hc->tx_seqh == hc->tx_seqt)
550 case DCCP_PKT_DATAACK: 433 goto done;
551 break;
552 default:
553 return;
554 }
555 434
556 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; 435 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
557 if (after48(ackno, hc->tx_high_ack)) 436 if (after48(ackno, hc->tx_high_ack))
@@ -575,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
575 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); 454 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
576 455
577 /* go through all ack vectors */ 456 /* go through all ack vectors */
578 while ((offset = ccid2_ackvector(sk, skb, offset, 457 list_for_each_entry(avp, &hc->tx_av_chunks, node) {
579 &vector, &veclen)) != -1) {
580 /* go through this ack vector */ 458 /* go through this ack vector */
581 while (veclen--) { 459 for (; avp->len--; avp->vec++) {
582 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 460 u64 ackno_end_rl = SUB48(ackno,
583 u64 ackno_end_rl = SUB48(ackno, rl); 461 dccp_ackvec_runlen(avp->vec));
584 462
585 ccid2_pr_debug("ackvec start:%llu end:%llu\n", 463 ccid2_pr_debug("ackvec %llu |%u,%u|\n",
586 (unsigned long long)ackno, 464 (unsigned long long)ackno,
587 (unsigned long long)ackno_end_rl); 465 dccp_ackvec_state(avp->vec) >> 6,
466 dccp_ackvec_runlen(avp->vec));
588 /* if the seqno we are analyzing is larger than the 467 /* if the seqno we are analyzing is larger than the
589 * current ackno, then move towards the tail of our 468 * current ackno, then move towards the tail of our
590 * seqnos. 469 * seqnos.
@@ -603,24 +482,22 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
603 * run length 482 * run length
604 */ 483 */
605 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { 484 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
606 const u8 state = *vector & 485 const u8 state = dccp_ackvec_state(avp->vec);
607 DCCP_ACKVEC_STATE_MASK;
608 486
609 /* new packet received or marked */ 487 /* new packet received or marked */
610 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && 488 if (state != DCCPAV_NOT_RECEIVED &&
611 !seqp->ccid2s_acked) { 489 !seqp->ccid2s_acked) {
612 if (state == 490 if (state == DCCPAV_ECN_MARKED)
613 DCCP_ACKVEC_STATE_ECN_MARKED) {
614 ccid2_congestion_event(sk, 491 ccid2_congestion_event(sk,
615 seqp); 492 seqp);
616 } else 493 else
617 ccid2_new_ack(sk, seqp, 494 ccid2_new_ack(sk, seqp,
618 &maxincr); 495 &maxincr);
619 496
620 seqp->ccid2s_acked = 1; 497 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 498 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 499 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 500 hc->tx_pipe--;
624 } 501 }
625 if (seqp == hc->tx_seqt) { 502 if (seqp == hc->tx_seqt) {
626 done = 1; 503 done = 1;
@@ -632,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
632 break; 509 break;
633 510
634 ackno = SUB48(ackno_end_rl, 1); 511 ackno = SUB48(ackno_end_rl, 1);
635 vector++;
636 } 512 }
637 if (done) 513 if (done)
638 break; 514 break;
@@ -677,7 +553,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 553 * one ack vector.
678 */ 554 */
679 ccid2_congestion_event(sk, seqp); 555 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 556 hc->tx_pipe--;
681 } 557 }
682 if (seqp == hc->tx_seqt) 558 if (seqp == hc->tx_seqt)
683 break; 559 break;
@@ -695,7 +571,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 571 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 572 }
697 573
698 ccid2_hc_tx_check_sanity(hc); 574 /* restart RTO timer if not all outstanding data has been acked */
575 if (hc->tx_pipe == 0)
576 sk_stop_timer(sk, &hc->tx_rtotimer);
577 else
578 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
579done:
580 /* check if incoming Acks allow pending packets to be sent */
581 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
582 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
584}
585
586/*
587 * Convert RFC 3390 larger initial window into an equivalent number of packets.
588 * This is based on the numbers specified in RFC 5681, 3.1.
589 */
590static inline u32 rfc3390_bytes_to_packets(const u32 smss)
591{
592 return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
699} 593}
700 594
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 595static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +601,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 601 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 602 hc->tx_ssthresh = ~0U;
709 603
710 /* 604 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 605 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 606
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 607 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 608 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +613,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 613 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 614 return -ENOMEM;
725 615
726 hc->tx_rto = 3 * HZ; 616 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 617 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 618 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 619 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 620 (unsigned long)sk);
733 621 INIT_LIST_HEAD(&hc->tx_av_chunks);
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 622 return 0;
736} 623}
737 624
@@ -740,7 +627,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 627 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 628 int i;
742 629
743 ccid2_hc_tx_kill_rto_timer(sk); 630 sk_stop_timer(sk, &hc->tx_rtotimer);
744 631
745 for (i = 0; i < hc->tx_seqbufc; i++) 632 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 633 kfree(hc->tx_seqbuf[i]);
@@ -765,16 +652,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
765} 652}
766 653
767struct ccid_operations ccid2_ops = { 654struct ccid_operations ccid2_ops = {
768 .ccid_id = DCCPC_CCID2, 655 .ccid_id = DCCPC_CCID2,
769 .ccid_name = "TCP-like", 656 .ccid_name = "TCP-like",
770 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), 657 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
771 .ccid_hc_tx_init = ccid2_hc_tx_init, 658 .ccid_hc_tx_init = ccid2_hc_tx_init,
772 .ccid_hc_tx_exit = ccid2_hc_tx_exit, 659 .ccid_hc_tx_exit = ccid2_hc_tx_exit,
773 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, 660 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
774 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, 661 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
775 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, 662 .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
776 .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), 663 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
777 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, 664 .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
665 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
778}; 666};
779 667
780#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 668#ifdef CONFIG_IP_DCCP_CCID2_DEBUG