diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/dccp/ccids/ccid2.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/dccp/ccids/ccid2.c')
-rw-r--r-- | net/dccp/ccids/ccid2.c | 444 |
1 files changed, 166 insertions, 278 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9b3ae9922be1..fadecd20d75b 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -25,59 +25,14 @@ | |||
25 | */ | 25 | */ |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include "../feat.h" | 27 | #include "../feat.h" |
28 | #include "../ccid.h" | ||
29 | #include "../dccp.h" | ||
30 | #include "ccid2.h" | 28 | #include "ccid2.h" |
31 | 29 | ||
32 | 30 | ||
33 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 31 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
34 | static int ccid2_debug; | 32 | static int ccid2_debug; |
35 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) | 33 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) |
36 | |||
37 | static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc) | ||
38 | { | ||
39 | int len = 0; | ||
40 | int pipe = 0; | ||
41 | struct ccid2_seq *seqp = hc->tx_seqh; | ||
42 | |||
43 | /* there is data in the chain */ | ||
44 | if (seqp != hc->tx_seqt) { | ||
45 | seqp = seqp->ccid2s_prev; | ||
46 | len++; | ||
47 | if (!seqp->ccid2s_acked) | ||
48 | pipe++; | ||
49 | |||
50 | while (seqp != hc->tx_seqt) { | ||
51 | struct ccid2_seq *prev = seqp->ccid2s_prev; | ||
52 | |||
53 | len++; | ||
54 | if (!prev->ccid2s_acked) | ||
55 | pipe++; | ||
56 | |||
57 | /* packets are sent sequentially */ | ||
58 | BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, | ||
59 | prev->ccid2s_seq ) >= 0); | ||
60 | BUG_ON(time_before(seqp->ccid2s_sent, | ||
61 | prev->ccid2s_sent)); | ||
62 | |||
63 | seqp = prev; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | BUG_ON(pipe != hc->tx_pipe); | ||
68 | ccid2_pr_debug("len of chain=%d\n", len); | ||
69 | |||
70 | do { | ||
71 | seqp = seqp->ccid2s_prev; | ||
72 | len++; | ||
73 | } while (seqp != hc->tx_seqh); | ||
74 | |||
75 | ccid2_pr_debug("total len=%d\n", len); | ||
76 | BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN); | ||
77 | } | ||
78 | #else | 34 | #else |
79 | #define ccid2_pr_debug(format, a...) | 35 | #define ccid2_pr_debug(format, a...) |
80 | #define ccid2_hc_tx_check_sanity(hc) | ||
81 | #endif | 36 | #endif |
82 | 37 | ||
83 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) | 38 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) |
@@ -123,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) | |||
123 | 78 | ||
124 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | 79 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
125 | { | 80 | { |
126 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 81 | if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) |
127 | 82 | return CCID_PACKET_WILL_DEQUEUE_LATER; | |
128 | if (hc->tx_pipe < hc->tx_cwnd) | 83 | return CCID_PACKET_SEND_AT_ONCE; |
129 | return 0; | ||
130 | |||
131 | return 1; /* XXX CCID should dequeue when ready instead of polling */ | ||
132 | } | 84 | } |
133 | 85 | ||
134 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | 86 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) |
@@ -156,19 +108,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
156 | dp->dccps_l_ack_ratio = val; | 108 | dp->dccps_l_ack_ratio = val; |
157 | } | 109 | } |
158 | 110 | ||
159 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val) | ||
160 | { | ||
161 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
162 | hc->tx_srtt = val; | ||
163 | } | ||
164 | |||
165 | static void ccid2_start_rto_timer(struct sock *sk); | ||
166 | |||
167 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 111 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
168 | { | 112 | { |
169 | struct sock *sk = (struct sock *)data; | 113 | struct sock *sk = (struct sock *)data; |
170 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 114 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
171 | long s; | 115 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); |
172 | 116 | ||
173 | bh_lock_sock(sk); | 117 | bh_lock_sock(sk); |
174 | if (sock_owned_by_user(sk)) { | 118 | if (sock_owned_by_user(sk)) { |
@@ -178,23 +122,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
178 | 122 | ||
179 | ccid2_pr_debug("RTO_EXPIRE\n"); | 123 | ccid2_pr_debug("RTO_EXPIRE\n"); |
180 | 124 | ||
181 | ccid2_hc_tx_check_sanity(hc); | ||
182 | |||
183 | /* back-off timer */ | 125 | /* back-off timer */ |
184 | hc->tx_rto <<= 1; | 126 | hc->tx_rto <<= 1; |
185 | 127 | if (hc->tx_rto > DCCP_RTO_MAX) | |
186 | s = hc->tx_rto / HZ; | 128 | hc->tx_rto = DCCP_RTO_MAX; |
187 | if (s > 60) | ||
188 | hc->tx_rto = 60 * HZ; | ||
189 | |||
190 | ccid2_start_rto_timer(sk); | ||
191 | 129 | ||
192 | /* adjust pipe, cwnd etc */ | 130 | /* adjust pipe, cwnd etc */ |
193 | hc->tx_ssthresh = hc->tx_cwnd / 2; | 131 | hc->tx_ssthresh = hc->tx_cwnd / 2; |
194 | if (hc->tx_ssthresh < 2) | 132 | if (hc->tx_ssthresh < 2) |
195 | hc->tx_ssthresh = 2; | 133 | hc->tx_ssthresh = 2; |
196 | hc->tx_cwnd = 1; | 134 | hc->tx_cwnd = 1; |
197 | hc->tx_pipe = 0; | 135 | hc->tx_pipe = 0; |
198 | 136 | ||
199 | /* clear state about stuff we sent */ | 137 | /* clear state about stuff we sent */ |
200 | hc->tx_seqt = hc->tx_seqh; | 138 | hc->tx_seqt = hc->tx_seqh; |
@@ -204,23 +142,18 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
204 | hc->tx_rpseq = 0; | 142 | hc->tx_rpseq = 0; |
205 | hc->tx_rpdupack = -1; | 143 | hc->tx_rpdupack = -1; |
206 | ccid2_change_l_ack_ratio(sk, 1); | 144 | ccid2_change_l_ack_ratio(sk, 1); |
207 | ccid2_hc_tx_check_sanity(hc); | 145 | |
146 | /* if we were blocked before, we may now send cwnd=1 packet */ | ||
147 | if (sender_was_blocked) | ||
148 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | ||
149 | /* restart backed-off timer */ | ||
150 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
208 | out: | 151 | out: |
209 | bh_unlock_sock(sk); | 152 | bh_unlock_sock(sk); |
210 | sock_put(sk); | 153 | sock_put(sk); |
211 | } | 154 | } |
212 | 155 | ||
213 | static void ccid2_start_rto_timer(struct sock *sk) | 156 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
214 | { | ||
215 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
216 | |||
217 | ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto); | ||
218 | |||
219 | BUG_ON(timer_pending(&hc->tx_rtotimer)); | ||
220 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
221 | } | ||
222 | |||
223 | static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | ||
224 | { | 157 | { |
225 | struct dccp_sock *dp = dccp_sk(sk); | 158 | struct dccp_sock *dp = dccp_sk(sk); |
226 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 159 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
@@ -230,7 +163,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
230 | 163 | ||
231 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; | 164 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; |
232 | hc->tx_seqh->ccid2s_acked = 0; | 165 | hc->tx_seqh->ccid2s_acked = 0; |
233 | hc->tx_seqh->ccid2s_sent = jiffies; | 166 | hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; |
234 | 167 | ||
235 | next = hc->tx_seqh->ccid2s_next; | 168 | next = hc->tx_seqh->ccid2s_next; |
236 | /* check if we need to alloc more space */ | 169 | /* check if we need to alloc more space */ |
@@ -296,99 +229,104 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
296 | } | 229 | } |
297 | #endif | 230 | #endif |
298 | 231 | ||
299 | /* setup RTO timer */ | 232 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
300 | if (!timer_pending(&hc->tx_rtotimer)) | ||
301 | ccid2_start_rto_timer(sk); | ||
302 | 233 | ||
303 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 234 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
304 | do { | 235 | do { |
305 | struct ccid2_seq *seqp = hc->tx_seqt; | 236 | struct ccid2_seq *seqp = hc->tx_seqt; |
306 | 237 | ||
307 | while (seqp != hc->tx_seqh) { | 238 | while (seqp != hc->tx_seqh) { |
308 | ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", | 239 | ccid2_pr_debug("out seq=%llu acked=%d time=%u\n", |
309 | (unsigned long long)seqp->ccid2s_seq, | 240 | (unsigned long long)seqp->ccid2s_seq, |
310 | seqp->ccid2s_acked, seqp->ccid2s_sent); | 241 | seqp->ccid2s_acked, seqp->ccid2s_sent); |
311 | seqp = seqp->ccid2s_next; | 242 | seqp = seqp->ccid2s_next; |
312 | } | 243 | } |
313 | } while (0); | 244 | } while (0); |
314 | ccid2_pr_debug("=========\n"); | 245 | ccid2_pr_debug("=========\n"); |
315 | ccid2_hc_tx_check_sanity(hc); | ||
316 | #endif | 246 | #endif |
317 | } | 247 | } |
318 | 248 | ||
319 | /* XXX Lame code duplication! | 249 | /** |
320 | * returns -1 if none was found. | 250 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm |
321 | * else returns the next offset to use in the function call. | 251 | * This code is almost identical with TCP's tcp_rtt_estimator(), since |
252 | * - it has a higher sampling frequency (recommended by RFC 1323), | ||
253 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | ||
254 | * - it is simple (cf. more complex proposals such as Eifel timer or research | ||
255 | * which suggests that the gain should be set according to window size), | ||
256 | * - in tests it was found to work well with CCID2 [gerrit]. | ||
322 | */ | 257 | */ |
323 | static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, | 258 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) |
324 | unsigned char **vec, unsigned char *veclen) | ||
325 | { | 259 | { |
326 | const struct dccp_hdr *dh = dccp_hdr(skb); | 260 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
327 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | 261 | long m = mrtt ? : 1; |
328 | unsigned char *opt_ptr; | 262 | |
329 | const unsigned char *opt_end = (unsigned char *)dh + | 263 | if (hc->tx_srtt == 0) { |
330 | (dh->dccph_doff * 4); | 264 | /* First measurement m */ |
331 | unsigned char opt, len; | 265 | hc->tx_srtt = m << 3; |
332 | unsigned char *value; | 266 | hc->tx_mdev = m << 1; |
333 | 267 | ||
334 | BUG_ON(offset < 0); | 268 | hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk)); |
335 | options += offset; | 269 | hc->tx_rttvar = hc->tx_mdev_max; |
336 | opt_ptr = options; | 270 | |
337 | if (opt_ptr >= opt_end) | 271 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; |
338 | return -1; | 272 | } else { |
339 | 273 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | |
340 | while (opt_ptr != opt_end) { | 274 | m -= (hc->tx_srtt >> 3); |
341 | opt = *opt_ptr++; | 275 | hc->tx_srtt += m; |
342 | len = 0; | 276 | |
343 | value = NULL; | 277 | /* Similarly, update scaled mdev with regard to |m| */ |
344 | 278 | if (m < 0) { | |
345 | /* Check if this isn't a single byte option */ | 279 | m = -m; |
346 | if (opt > DCCPO_MAX_RESERVED) { | 280 | m -= (hc->tx_mdev >> 2); |
347 | if (opt_ptr == opt_end) | ||
348 | goto out_invalid_option; | ||
349 | |||
350 | len = *opt_ptr++; | ||
351 | if (len < 3) | ||
352 | goto out_invalid_option; | ||
353 | /* | 281 | /* |
354 | * Remove the type and len fields, leaving | 282 | * This neutralises RTO increase when RTT < SRTT - mdev |
355 | * just the value size | 283 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control |
284 | * in Linux TCP", USENIX 2002, pp. 49-62). | ||
356 | */ | 285 | */ |
357 | len -= 2; | 286 | if (m > 0) |
358 | value = opt_ptr; | 287 | m >>= 3; |
359 | opt_ptr += len; | 288 | } else { |
289 | m -= (hc->tx_mdev >> 2); | ||
290 | } | ||
291 | hc->tx_mdev += m; | ||
360 | 292 | ||
361 | if (opt_ptr > opt_end) | 293 | if (hc->tx_mdev > hc->tx_mdev_max) { |
362 | goto out_invalid_option; | 294 | hc->tx_mdev_max = hc->tx_mdev; |
295 | if (hc->tx_mdev_max > hc->tx_rttvar) | ||
296 | hc->tx_rttvar = hc->tx_mdev_max; | ||
363 | } | 297 | } |
364 | 298 | ||
365 | switch (opt) { | 299 | /* |
366 | case DCCPO_ACK_VECTOR_0: | 300 | * Decay RTTVAR at most once per flight, exploiting that |
367 | case DCCPO_ACK_VECTOR_1: | 301 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) |
368 | *vec = value; | 302 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) |
369 | *veclen = len; | 303 | * GAR is a useful bound for FlightSize = pipe. |
370 | return offset + (opt_ptr - options); | 304 | * AWL is probably too low here, as it over-estimates pipe. |
305 | */ | ||
306 | if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) { | ||
307 | if (hc->tx_mdev_max < hc->tx_rttvar) | ||
308 | hc->tx_rttvar -= (hc->tx_rttvar - | ||
309 | hc->tx_mdev_max) >> 2; | ||
310 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; | ||
311 | hc->tx_mdev_max = tcp_rto_min(sk); | ||
371 | } | 312 | } |
372 | } | 313 | } |
373 | 314 | ||
374 | return -1; | 315 | /* |
375 | 316 | * Set RTO from SRTT and RTTVAR | |
376 | out_invalid_option: | 317 | * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms. |
377 | DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); | 318 | * This agrees with RFC 4341, 5: |
378 | return -1; | 319 | * "Because DCCP does not retransmit data, DCCP does not require |
379 | } | 320 | * TCP's recommended minimum timeout of one second". |
380 | 321 | */ | |
381 | static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) | 322 | hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar; |
382 | { | ||
383 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
384 | 323 | ||
385 | sk_stop_timer(sk, &hc->tx_rtotimer); | 324 | if (hc->tx_rto > DCCP_RTO_MAX) |
386 | ccid2_pr_debug("deleted RTO timer\n"); | 325 | hc->tx_rto = DCCP_RTO_MAX; |
387 | } | 326 | } |
388 | 327 | ||
389 | static inline void ccid2_new_ack(struct sock *sk, | 328 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, |
390 | struct ccid2_seq *seqp, | 329 | unsigned int *maxincr) |
391 | unsigned int *maxincr) | ||
392 | { | 330 | { |
393 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 331 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
394 | 332 | ||
@@ -402,93 +340,27 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
402 | hc->tx_cwnd += 1; | 340 | hc->tx_cwnd += 1; |
403 | hc->tx_packets_acked = 0; | 341 | hc->tx_packets_acked = 0; |
404 | } | 342 | } |
405 | 343 | /* | |
406 | /* update RTO */ | 344 | * FIXME: RTT is sampled several times per acknowledgment (for each |
407 | if (hc->tx_srtt == -1 || | 345 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). |
408 | time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { | 346 | * This causes the RTT to be over-estimated, since the older entries |
409 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; | 347 | * in the Ack Vector have earlier sending times. |
410 | int s; | 348 | * The cleanest solution is to not use the ccid2s_sent field at all |
411 | 349 | * and instead use DCCP timestamps: requires changes in other places. | |
412 | /* first measurement */ | 350 | */ |
413 | if (hc->tx_srtt == -1) { | 351 | ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent); |
414 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | ||
415 | r, jiffies, | ||
416 | (unsigned long long)seqp->ccid2s_seq); | ||
417 | ccid2_change_srtt(hc, r); | ||
418 | hc->tx_rttvar = r >> 1; | ||
419 | } else { | ||
420 | /* RTTVAR */ | ||
421 | long tmp = hc->tx_srtt - r; | ||
422 | long srtt; | ||
423 | |||
424 | if (tmp < 0) | ||
425 | tmp *= -1; | ||
426 | |||
427 | tmp >>= 2; | ||
428 | hc->tx_rttvar *= 3; | ||
429 | hc->tx_rttvar >>= 2; | ||
430 | hc->tx_rttvar += tmp; | ||
431 | |||
432 | /* SRTT */ | ||
433 | srtt = hc->tx_srtt; | ||
434 | srtt *= 7; | ||
435 | srtt >>= 3; | ||
436 | tmp = r >> 3; | ||
437 | srtt += tmp; | ||
438 | ccid2_change_srtt(hc, srtt); | ||
439 | } | ||
440 | s = hc->tx_rttvar << 2; | ||
441 | /* clock granularity is 1 when based on jiffies */ | ||
442 | if (!s) | ||
443 | s = 1; | ||
444 | hc->tx_rto = hc->tx_srtt + s; | ||
445 | |||
446 | /* must be at least a second */ | ||
447 | s = hc->tx_rto / HZ; | ||
448 | /* DCCP doesn't require this [but I like it cuz my code sux] */ | ||
449 | #if 1 | ||
450 | if (s < 1) | ||
451 | hc->tx_rto = HZ; | ||
452 | #endif | ||
453 | /* max 60 seconds */ | ||
454 | if (s > 60) | ||
455 | hc->tx_rto = HZ * 60; | ||
456 | |||
457 | hc->tx_lastrtt = jiffies; | ||
458 | |||
459 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", | ||
460 | hc->tx_srtt, hc->tx_rttvar, | ||
461 | hc->tx_rto, HZ, r); | ||
462 | } | ||
463 | |||
464 | /* we got a new ack, so re-start RTO timer */ | ||
465 | ccid2_hc_tx_kill_rto_timer(sk); | ||
466 | ccid2_start_rto_timer(sk); | ||
467 | } | ||
468 | |||
469 | static void ccid2_hc_tx_dec_pipe(struct sock *sk) | ||
470 | { | ||
471 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
472 | |||
473 | if (hc->tx_pipe == 0) | ||
474 | DCCP_BUG("pipe == 0"); | ||
475 | else | ||
476 | hc->tx_pipe--; | ||
477 | |||
478 | if (hc->tx_pipe == 0) | ||
479 | ccid2_hc_tx_kill_rto_timer(sk); | ||
480 | } | 352 | } |
481 | 353 | ||
482 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | 354 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
483 | { | 355 | { |
484 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 356 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
485 | 357 | ||
486 | if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { | 358 | if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) { |
487 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | 359 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); |
488 | return; | 360 | return; |
489 | } | 361 | } |
490 | 362 | ||
491 | hc->tx_last_cong = jiffies; | 363 | hc->tx_last_cong = ccid2_time_stamp; |
492 | 364 | ||
493 | hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; | 365 | hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; |
494 | hc->tx_ssthresh = max(hc->tx_cwnd, 2U); | 366 | hc->tx_ssthresh = max(hc->tx_cwnd, 2U); |
@@ -498,19 +370,31 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | |||
498 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); | 370 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); |
499 | } | 371 | } |
500 | 372 | ||
373 | static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, | ||
374 | u8 option, u8 *optval, u8 optlen) | ||
375 | { | ||
376 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
377 | |||
378 | switch (option) { | ||
379 | case DCCPO_ACK_VECTOR_0: | ||
380 | case DCCPO_ACK_VECTOR_1: | ||
381 | return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, | ||
382 | option - DCCPO_ACK_VECTOR_0); | ||
383 | } | ||
384 | return 0; | ||
385 | } | ||
386 | |||
501 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 387 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
502 | { | 388 | { |
503 | struct dccp_sock *dp = dccp_sk(sk); | 389 | struct dccp_sock *dp = dccp_sk(sk); |
504 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 390 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
391 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); | ||
392 | struct dccp_ackvec_parsed *avp; | ||
505 | u64 ackno, seqno; | 393 | u64 ackno, seqno; |
506 | struct ccid2_seq *seqp; | 394 | struct ccid2_seq *seqp; |
507 | unsigned char *vector; | ||
508 | unsigned char veclen; | ||
509 | int offset = 0; | ||
510 | int done = 0; | 395 | int done = 0; |
511 | unsigned int maxincr = 0; | 396 | unsigned int maxincr = 0; |
512 | 397 | ||
513 | ccid2_hc_tx_check_sanity(hc); | ||
514 | /* check reverse path congestion */ | 398 | /* check reverse path congestion */ |
515 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 399 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
516 | 400 | ||
@@ -541,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
541 | } | 425 | } |
542 | 426 | ||
543 | /* check forward path congestion */ | 427 | /* check forward path congestion */ |
544 | /* still didn't send out new data packets */ | 428 | if (dccp_packet_without_ack(skb)) |
545 | if (hc->tx_seqh == hc->tx_seqt) | ||
546 | return; | 429 | return; |
547 | 430 | ||
548 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 431 | /* still didn't send out new data packets */ |
549 | case DCCP_PKT_ACK: | 432 | if (hc->tx_seqh == hc->tx_seqt) |
550 | case DCCP_PKT_DATAACK: | 433 | goto done; |
551 | break; | ||
552 | default: | ||
553 | return; | ||
554 | } | ||
555 | 434 | ||
556 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | 435 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; |
557 | if (after48(ackno, hc->tx_high_ack)) | 436 | if (after48(ackno, hc->tx_high_ack)) |
@@ -575,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
575 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); | 454 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
576 | 455 | ||
577 | /* go through all ack vectors */ | 456 | /* go through all ack vectors */ |
578 | while ((offset = ccid2_ackvector(sk, skb, offset, | 457 | list_for_each_entry(avp, &hc->tx_av_chunks, node) { |
579 | &vector, &veclen)) != -1) { | ||
580 | /* go through this ack vector */ | 458 | /* go through this ack vector */ |
581 | while (veclen--) { | 459 | for (; avp->len--; avp->vec++) { |
582 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | 460 | u64 ackno_end_rl = SUB48(ackno, |
583 | u64 ackno_end_rl = SUB48(ackno, rl); | 461 | dccp_ackvec_runlen(avp->vec)); |
584 | 462 | ||
585 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", | 463 | ccid2_pr_debug("ackvec %llu |%u,%u|\n", |
586 | (unsigned long long)ackno, | 464 | (unsigned long long)ackno, |
587 | (unsigned long long)ackno_end_rl); | 465 | dccp_ackvec_state(avp->vec) >> 6, |
466 | dccp_ackvec_runlen(avp->vec)); | ||
588 | /* if the seqno we are analyzing is larger than the | 467 | /* if the seqno we are analyzing is larger than the |
589 | * current ackno, then move towards the tail of our | 468 | * current ackno, then move towards the tail of our |
590 | * seqnos. | 469 | * seqnos. |
@@ -603,24 +482,22 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
603 | * run length | 482 | * run length |
604 | */ | 483 | */ |
605 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | 484 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { |
606 | const u8 state = *vector & | 485 | const u8 state = dccp_ackvec_state(avp->vec); |
607 | DCCP_ACKVEC_STATE_MASK; | ||
608 | 486 | ||
609 | /* new packet received or marked */ | 487 | /* new packet received or marked */ |
610 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && | 488 | if (state != DCCPAV_NOT_RECEIVED && |
611 | !seqp->ccid2s_acked) { | 489 | !seqp->ccid2s_acked) { |
612 | if (state == | 490 | if (state == DCCPAV_ECN_MARKED) |
613 | DCCP_ACKVEC_STATE_ECN_MARKED) { | ||
614 | ccid2_congestion_event(sk, | 491 | ccid2_congestion_event(sk, |
615 | seqp); | 492 | seqp); |
616 | } else | 493 | else |
617 | ccid2_new_ack(sk, seqp, | 494 | ccid2_new_ack(sk, seqp, |
618 | &maxincr); | 495 | &maxincr); |
619 | 496 | ||
620 | seqp->ccid2s_acked = 1; | 497 | seqp->ccid2s_acked = 1; |
621 | ccid2_pr_debug("Got ack for %llu\n", | 498 | ccid2_pr_debug("Got ack for %llu\n", |
622 | (unsigned long long)seqp->ccid2s_seq); | 499 | (unsigned long long)seqp->ccid2s_seq); |
623 | ccid2_hc_tx_dec_pipe(sk); | 500 | hc->tx_pipe--; |
624 | } | 501 | } |
625 | if (seqp == hc->tx_seqt) { | 502 | if (seqp == hc->tx_seqt) { |
626 | done = 1; | 503 | done = 1; |
@@ -632,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
632 | break; | 509 | break; |
633 | 510 | ||
634 | ackno = SUB48(ackno_end_rl, 1); | 511 | ackno = SUB48(ackno_end_rl, 1); |
635 | vector++; | ||
636 | } | 512 | } |
637 | if (done) | 513 | if (done) |
638 | break; | 514 | break; |
@@ -677,7 +553,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
677 | * one ack vector. | 553 | * one ack vector. |
678 | */ | 554 | */ |
679 | ccid2_congestion_event(sk, seqp); | 555 | ccid2_congestion_event(sk, seqp); |
680 | ccid2_hc_tx_dec_pipe(sk); | 556 | hc->tx_pipe--; |
681 | } | 557 | } |
682 | if (seqp == hc->tx_seqt) | 558 | if (seqp == hc->tx_seqt) |
683 | break; | 559 | break; |
@@ -695,7 +571,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
695 | hc->tx_seqt = hc->tx_seqt->ccid2s_next; | 571 | hc->tx_seqt = hc->tx_seqt->ccid2s_next; |
696 | } | 572 | } |
697 | 573 | ||
698 | ccid2_hc_tx_check_sanity(hc); | 574 | /* restart RTO timer if not all outstanding data has been acked */ |
575 | if (hc->tx_pipe == 0) | ||
576 | sk_stop_timer(sk, &hc->tx_rtotimer); | ||
577 | else | ||
578 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
579 | done: | ||
580 | /* check if incoming Acks allow pending packets to be sent */ | ||
581 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) | ||
582 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | ||
583 | dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * Convert RFC 3390 larger initial window into an equivalent number of packets. | ||
588 | * This is based on the numbers specified in RFC 5681, 3.1. | ||
589 | */ | ||
590 | static inline u32 rfc3390_bytes_to_packets(const u32 smss) | ||
591 | { | ||
592 | return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); | ||
699 | } | 593 | } |
700 | 594 | ||
701 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 595 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -707,12 +601,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
707 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ | 601 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ |
708 | hc->tx_ssthresh = ~0U; | 602 | hc->tx_ssthresh = ~0U; |
709 | 603 | ||
710 | /* | 604 | /* Use larger initial windows (RFC 4341, section 5). */ |
711 | * RFC 4341, 5: "The cwnd parameter is initialized to at most four | 605 | hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); |
712 | * packets for new connections, following the rules from [RFC3390]". | ||
713 | * We need to convert the bytes of RFC3390 into the packets of RFC 4341. | ||
714 | */ | ||
715 | hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); | ||
716 | 606 | ||
717 | /* Make sure that Ack Ratio is enabled and within bounds. */ | 607 | /* Make sure that Ack Ratio is enabled and within bounds. */ |
718 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); | 608 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); |
@@ -723,15 +613,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
723 | if (ccid2_hc_tx_alloc_seq(hc)) | 613 | if (ccid2_hc_tx_alloc_seq(hc)) |
724 | return -ENOMEM; | 614 | return -ENOMEM; |
725 | 615 | ||
726 | hc->tx_rto = 3 * HZ; | 616 | hc->tx_rto = DCCP_TIMEOUT_INIT; |
727 | ccid2_change_srtt(hc, -1); | ||
728 | hc->tx_rttvar = -1; | ||
729 | hc->tx_rpdupack = -1; | 617 | hc->tx_rpdupack = -1; |
730 | hc->tx_last_cong = jiffies; | 618 | hc->tx_last_cong = ccid2_time_stamp; |
731 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 619 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |
732 | (unsigned long)sk); | 620 | (unsigned long)sk); |
733 | 621 | INIT_LIST_HEAD(&hc->tx_av_chunks); | |
734 | ccid2_hc_tx_check_sanity(hc); | ||
735 | return 0; | 622 | return 0; |
736 | } | 623 | } |
737 | 624 | ||
@@ -740,7 +627,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) | |||
740 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 627 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
741 | int i; | 628 | int i; |
742 | 629 | ||
743 | ccid2_hc_tx_kill_rto_timer(sk); | 630 | sk_stop_timer(sk, &hc->tx_rtotimer); |
744 | 631 | ||
745 | for (i = 0; i < hc->tx_seqbufc; i++) | 632 | for (i = 0; i < hc->tx_seqbufc; i++) |
746 | kfree(hc->tx_seqbuf[i]); | 633 | kfree(hc->tx_seqbuf[i]); |
@@ -765,16 +652,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
765 | } | 652 | } |
766 | 653 | ||
767 | struct ccid_operations ccid2_ops = { | 654 | struct ccid_operations ccid2_ops = { |
768 | .ccid_id = DCCPC_CCID2, | 655 | .ccid_id = DCCPC_CCID2, |
769 | .ccid_name = "TCP-like", | 656 | .ccid_name = "TCP-like", |
770 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | 657 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), |
771 | .ccid_hc_tx_init = ccid2_hc_tx_init, | 658 | .ccid_hc_tx_init = ccid2_hc_tx_init, |
772 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, | 659 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, |
773 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, | 660 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, |
774 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, | 661 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, |
775 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, | 662 | .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, |
776 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), | 663 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, |
777 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | 664 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), |
665 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | ||
778 | }; | 666 | }; |
779 | 667 | ||
780 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 668 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |