aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bittau <a.bittau@cs.ucl.ac.uk>2006-03-20 20:19:55 -0500
committerDavid S. Miller <davem@davemloft.net>2006-03-20 20:19:55 -0500
commit02bcf28c82c8e4b72c4b89bddbbb6fea1a646d07 (patch)
treed70435d97b3454dcafb6decd6defd603fdf10f82
parente229c2fb3370a0c4ebac06cad67ce1cb35abcfe6 (diff)
[DCCP] ackvec: Introduce ack vector records
Based on a patch by Andrea Bittau. Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk> Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/dccp/ackvec.c240
-rw-r--r--net/dccp/ackvec.h31
2 files changed, 173 insertions, 98 deletions
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 64408253b14e..b4ff14f3d4f8 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -22,6 +22,47 @@
22#include <net/sock.h> 22#include <net/sock.h>
23 23
24static kmem_cache_t *dccp_ackvec_slab; 24static kmem_cache_t *dccp_ackvec_slab;
25static kmem_cache_t *dccp_ackvec_record_slab;
26
27static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
28{
29 struct dccp_ackvec_record *avr =
30 kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
31
32 if (avr != NULL)
33 INIT_LIST_HEAD(&avr->dccpavr_node);
34
35 return avr;
36}
37
38static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
39{
40 if (unlikely(avr == NULL))
41 return;
42 /* Check if deleting a linked record */
43 WARN_ON(!list_empty(&avr->dccpavr_node));
44 kmem_cache_free(dccp_ackvec_record_slab, avr);
45}
46
47static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
48 struct dccp_ackvec_record *avr)
49{
50 /*
51 * AVRs are sorted by seqno. Since we are sending them in order, we
52 * just add the AVR at the head of the list.
53 * -sorbo.
54 */
55 if (!list_empty(&av->dccpav_records)) {
56 const struct dccp_ackvec_record *head =
57 list_entry(av->dccpav_records.next,
58 struct dccp_ackvec_record,
59 dccpavr_node);
60 BUG_ON(before48(avr->dccpavr_ack_seqno,
61 head->dccpavr_ack_seqno));
62 }
63
64 list_add(&avr->dccpavr_node, &av->dccpav_records);
65}
25 66
26int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) 67int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
27{ 68{
@@ -35,6 +76,14 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
35 struct timeval now; 76 struct timeval now;
36 u32 elapsed_time; 77 u32 elapsed_time;
37 unsigned char *to, *from; 78 unsigned char *to, *from;
79 struct dccp_ackvec_record *avr;
80
81 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
82 return -1;
83
84 avr = dccp_ackvec_record_new();
85 if (avr == NULL)
86 return -1;
38 87
39 dccp_timestamp(sk, &now); 88 dccp_timestamp(sk, &now);
40 elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; 89 elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
@@ -42,19 +91,6 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
42 if (elapsed_time != 0) 91 if (elapsed_time != 0)
43 dccp_insert_option_elapsed_time(sk, skb, elapsed_time); 92 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
44 93
45 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
46 return -1;
47
48 /*
49 * XXX: now we have just one ack vector sent record, so
50 * we have to wait for it to be cleared.
51 *
52 * Of course this is not acceptable, but this is just for
53 * basic testing now.
54 */
55 if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1)
56 return -1;
57
58 DCCP_SKB_CB(skb)->dccpd_opt_len += len; 94 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
59 95
60 to = skb_push(skb, len); 96 to = skb_push(skb, len);
@@ -65,8 +101,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
65 from = av->dccpav_buf + av->dccpav_buf_head; 101 from = av->dccpav_buf + av->dccpav_buf_head;
66 102
67 /* Check if buf_head wraps */ 103 /* Check if buf_head wraps */
68 if ((int)av->dccpav_buf_head + len > av->dccpav_vec_len) { 104 if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) {
69 const u32 tailsize = av->dccpav_vec_len - av->dccpav_buf_head; 105 const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head;
70 106
71 memcpy(to, from, tailsize); 107 memcpy(to, from, tailsize);
72 to += tailsize; 108 to += tailsize;
@@ -83,21 +119,21 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
83 * sequence number it used for the ack packet; ack_ptr will equal 119 * sequence number it used for the ack packet; ack_ptr will equal
84 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will 120 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
85 * equal buf_nonce. 121 * equal buf_nonce.
86 *
87 * This implemention uses just one ack record for now.
88 */ 122 */
89 av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 123 avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
90 av->dccpav_ack_ptr = av->dccpav_buf_head; 124 avr->dccpavr_ack_ptr = av->dccpav_buf_head;
91 av->dccpav_ack_ackno = av->dccpav_buf_ackno; 125 avr->dccpavr_ack_ackno = av->dccpav_buf_ackno;
92 av->dccpav_ack_nonce = av->dccpav_buf_nonce; 126 avr->dccpavr_ack_nonce = av->dccpav_buf_nonce;
93 av->dccpav_sent_len = av->dccpav_vec_len; 127 avr->dccpavr_sent_len = av->dccpav_vec_len;
128
129 dccp_ackvec_insert_avr(av, avr);
94 130
95 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " 131 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
96 "ack_ackno=%llu\n", 132 "ack_ackno=%llu\n",
97 debug_prefix, av->dccpav_sent_len, 133 debug_prefix, avr->dccpavr_sent_len,
98 (unsigned long long)av->dccpav_ack_seqno, 134 (unsigned long long)avr->dccpavr_ack_seqno,
99 (unsigned long long)av->dccpav_ack_ackno); 135 (unsigned long long)avr->dccpavr_ack_ackno);
100 return -1; 136 return 0;
101} 137}
102 138
103struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) 139struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
@@ -107,13 +143,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
107 if (av != NULL) { 143 if (av != NULL) {
108 av->dccpav_buf_head = 144 av->dccpav_buf_head =
109 av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; 145 av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
110 av->dccpav_buf_ackno = 146 av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
111 av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU;
112 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; 147 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
113 av->dccpav_ack_ptr = 0; 148 av->dccpav_ack_ptr = 0;
114 av->dccpav_time.tv_sec = 0; 149 av->dccpav_time.tv_sec = 0;
115 av->dccpav_time.tv_usec = 0; 150 av->dccpav_time.tv_usec = 0;
116 av->dccpav_sent_len = av->dccpav_vec_len = 0; 151 av->dccpav_sent_len = av->dccpav_vec_len = 0;
152 INIT_LIST_HEAD(&av->dccpav_records);
117 } 153 }
118 154
119 return av; 155 return av;
@@ -121,6 +157,9 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
121 157
122void dccp_ackvec_free(struct dccp_ackvec *av) 158void dccp_ackvec_free(struct dccp_ackvec *av)
123{ 159{
160 if (unlikely(av == NULL))
161 return;
162 WARN_ON(!list_empty(&av->dccpav_records));
124 kmem_cache_free(dccp_ackvec_slab, av); 163 kmem_cache_free(dccp_ackvec_slab, av);
125} 164}
126 165
@@ -299,44 +338,50 @@ void dccp_ackvec_print(const struct dccp_ackvec *av)
299} 338}
300#endif 339#endif
301 340
302static void dccp_ackvec_throw_away_ack_record(struct dccp_ackvec *av) 341static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
342 struct dccp_ackvec_record *avr)
303{ 343{
304 /* 344 struct dccp_ackvec_record *next;
305 * As we're keeping track of the ack vector size (dccpav_vec_len) and 345
306 * the sent ack vector size (dccpav_sent_len) we don't need 346 av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1;
307 * dccpav_buf_tail at all, but keep this code here as in the future 347 if (av->dccpav_buf_tail == 0)
308 * we'll implement a vector of ack records, as suggested in 348 av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
309 * draft-ietf-dccp-spec-11.txt Appendix A. -acme 349
310 */ 350 av->dccpav_vec_len -= avr->dccpavr_sent_len;
311#if 0 351
312 u32 new_buf_tail = av->dccpav_ack_ptr + 1; 352 /* free records */
313 if (new_buf_tail >= av->dccpav_vec_len) 353 list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
314 new_buf_tail -= av->dccpav_vec_len; 354 dccpavr_node) {
315 av->dccpav_buf_tail = new_buf_tail; 355 list_del_init(&avr->dccpavr_node);
316#endif 356 dccp_ackvec_record_delete(avr);
317 av->dccpav_vec_len -= av->dccpav_sent_len; 357 }
318} 358}
319 359
320void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, 360void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
321 const u64 ackno) 361 const u64 ackno)
322{ 362{
323 /* Check if we actually sent an ACK vector */ 363 struct dccp_ackvec_record *avr;
324 if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
325 return;
326 364
327 if (ackno == av->dccpav_ack_seqno) { 365 /*
366 * If we traverse backwards, it should be faster when we have large
367 * windows. We will be receiving ACKs for stuff we sent a while back
368 * -sorbo.
369 */
370 list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
371 if (ackno == avr->dccpavr_ack_seqno) {
328#ifdef CONFIG_IP_DCCP_DEBUG 372#ifdef CONFIG_IP_DCCP_DEBUG
329 struct dccp_sock *dp = dccp_sk(sk); 373 struct dccp_sock *dp = dccp_sk(sk);
330 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? 374 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
331 "CLIENT rx ack: " : "server rx ack: "; 375 "CLIENT rx ack: " : "server rx ack: ";
332#endif 376#endif
333 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " 377 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
334 "ack_ackno=%llu, ACKED!\n", 378 "ack_ackno=%llu, ACKED!\n",
335 debug_prefix, 1, 379 debug_prefix, 1,
336 (unsigned long long)av->dccpav_ack_seqno, 380 (unsigned long long)avr->dccpavr_ack_seqno,
337 (unsigned long long)av->dccpav_ack_ackno); 381 (unsigned long long)avr->dccpavr_ack_ackno);
338 dccp_ackvec_throw_away_ack_record(av); 382 dccp_ackvec_throw_record(av, avr);
339 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; 383 break;
384 }
340 } 385 }
341} 386}
342 387
@@ -346,28 +391,20 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
346 const unsigned char *vector) 391 const unsigned char *vector)
347{ 392{
348 unsigned char i; 393 unsigned char i;
394 struct dccp_ackvec_record *avr;
349 395
350 /* Check if we actually sent an ACK vector */ 396 /* Check if we actually sent an ACK vector */
351 if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) 397 if (list_empty(&av->dccpav_records))
352 return; 398 return;
353 /*
354 * We're in the receiver half connection, so if the received an ACK
355 * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're
356 * not interested.
357 *
358 * Extra explanation with example:
359 *
360 * if we received an ACK vector with ackno 50, it can only be acking
361 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
362 */
363 /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */
364 if (before48(ackno, av->dccpav_ack_seqno)) {
365 /* dccp_pr_debug_cat("yes\n"); */
366 return;
367 }
368 /* dccp_pr_debug_cat("no\n"); */
369 399
370 i = len; 400 i = len;
401 /*
402 * XXX
403 * I think it might be more efficient to work backwards. See comment on
404 * rcv_ackno. -sorbo.
405 */
406 avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record,
407 dccpavr_node);
371 while (i--) { 408 while (i--) {
372 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 409 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
373 u64 ackno_end_rl; 410 u64 ackno_end_rl;
@@ -375,14 +412,20 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
375 dccp_set_seqno(&ackno_end_rl, ackno - rl); 412 dccp_set_seqno(&ackno_end_rl, ackno - rl);
376 413
377 /* 414 /*
378 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, 415 * If our AVR sequence number is greater than the ack, go
379 * av->dccpav_ack_seqno, ackno); 416 * forward in the AVR list until it is not so.
380 */ 417 */
381 if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { 418 list_for_each_entry_from(avr, &av->dccpav_records,
419 dccpavr_node) {
420 if (!after48(avr->dccpavr_ack_seqno, ackno))
421 goto found;
422 }
423 /* End of the dccpav_records list, not found, exit */
424 break;
425found:
426 if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
382 const u8 state = (*vector & 427 const u8 state = (*vector &
383 DCCP_ACKVEC_STATE_MASK) >> 6; 428 DCCP_ACKVEC_STATE_MASK) >> 6;
384 /* dccp_pr_debug_cat("yes\n"); */
385
386 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { 429 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
387#ifdef CONFIG_IP_DCCP_DEBUG 430#ifdef CONFIG_IP_DCCP_DEBUG
388 struct dccp_sock *dp = dccp_sk(sk); 431 struct dccp_sock *dp = dccp_sk(sk);
@@ -395,19 +438,16 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
395 "ACKED!\n", 438 "ACKED!\n",
396 debug_prefix, len, 439 debug_prefix, len,
397 (unsigned long long) 440 (unsigned long long)
398 av->dccpav_ack_seqno, 441 avr->dccpavr_ack_seqno,
399 (unsigned long long) 442 (unsigned long long)
400 av->dccpav_ack_ackno); 443 avr->dccpavr_ack_ackno);
401 dccp_ackvec_throw_away_ack_record(av); 444 dccp_ackvec_throw_record(av, avr);
402 } 445 }
403 /* 446 /*
404 * If dccpav_ack_seqno was not received, no problem 447 * If it wasn't received, continue scanning... we might
405 * we'll send another ACK vector. 448 * find another one.
406 */ 449 */
407 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
408 break;
409 } 450 }
410 /* dccp_pr_debug_cat("no\n"); */
411 451
412 dccp_set_seqno(&ackno, ackno_end_rl - 1); 452 dccp_set_seqno(&ackno, ackno_end_rl - 1);
413 ++vector; 453 ++vector;
@@ -428,19 +468,31 @@ int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
428} 468}
429 469
430static char dccp_ackvec_slab_msg[] __initdata = 470static char dccp_ackvec_slab_msg[] __initdata =
431 KERN_CRIT "DCCP: Unable to create ack vectors slab cache\n"; 471 KERN_CRIT "DCCP: Unable to create ack vectors slab caches\n";
432 472
433int __init dccp_ackvec_init(void) 473int __init dccp_ackvec_init(void)
434{ 474{
435 dccp_ackvec_slab = kmem_cache_create("dccp_ackvec", 475 dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
436 sizeof(struct dccp_ackvec), 0, 476 sizeof(struct dccp_ackvec), 0,
437 SLAB_HWCACHE_ALIGN, NULL, NULL); 477 SLAB_HWCACHE_ALIGN, NULL, NULL);
438 if (dccp_ackvec_slab == NULL) { 478 if (dccp_ackvec_slab == NULL)
439 printk(dccp_ackvec_slab_msg); 479 goto out_err;
440 return -ENOBUFS; 480
441 } 481 dccp_ackvec_record_slab =
482 kmem_cache_create("dccp_ackvec_record",
483 sizeof(struct dccp_ackvec_record),
484 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
485 if (dccp_ackvec_record_slab == NULL)
486 goto out_destroy_slab;
442 487
443 return 0; 488 return 0;
489
490out_destroy_slab:
491 kmem_cache_destroy(dccp_ackvec_slab);
492 dccp_ackvec_slab = NULL;
493out_err:
494 printk(dccp_ackvec_slab_msg);
495 return -ENOBUFS;
444} 496}
445 497
446void dccp_ackvec_exit(void) 498void dccp_ackvec_exit(void)
@@ -449,4 +501,8 @@ void dccp_ackvec_exit(void)
449 kmem_cache_destroy(dccp_ackvec_slab); 501 kmem_cache_destroy(dccp_ackvec_slab);
450 dccp_ackvec_slab = NULL; 502 dccp_ackvec_slab = NULL;
451 } 503 }
504 if (dccp_ackvec_record_slab != NULL) {
505 kmem_cache_destroy(dccp_ackvec_record_slab);
506 dccp_ackvec_record_slab = NULL;
507 }
452} 508}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 470bae8a9d07..ec7a89bb7b39 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -13,6 +13,7 @@
13 13
14#include <linux/config.h> 14#include <linux/config.h>
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/list.h>
16#include <linux/time.h> 17#include <linux/time.h>
17#include <linux/types.h> 18#include <linux/types.h>
18 19
@@ -42,11 +43,8 @@
42 * Ack Vectors it has recently sent. For each packet sent carrying an 43 * Ack Vectors it has recently sent. For each packet sent carrying an
43 * Ack Vector, it remembers four variables: 44 * Ack Vector, it remembers four variables:
44 * 45 *
45 * @dccpav_ack_seqno - the Sequence Number used for the packet
46 * (HC-Receiver seqno)
47 * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. 46 * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
48 * @dccpav_ack_ackno - the Acknowledgement Number used for the packet 47 * @dccpav_records - list of dccp_ackvec_record
49 * (HC-Sender seqno)
50 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. 48 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
51 * 49 *
52 * @dccpav_time - the time in usecs 50 * @dccpav_time - the time in usecs
@@ -54,8 +52,7 @@
54 */ 52 */
55struct dccp_ackvec { 53struct dccp_ackvec {
56 u64 dccpav_buf_ackno; 54 u64 dccpav_buf_ackno;
57 u64 dccpav_ack_seqno; 55 struct list_head dccpav_records;
58 u64 dccpav_ack_ackno;
59 struct timeval dccpav_time; 56 struct timeval dccpav_time;
60 u8 dccpav_buf_head; 57 u8 dccpav_buf_head;
61 u8 dccpav_buf_tail; 58 u8 dccpav_buf_tail;
@@ -67,6 +64,28 @@ struct dccp_ackvec {
67 u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN]; 64 u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN];
68}; 65};
69 66
67/** struct dccp_ackvec_record - ack vector record
68 *
69 * ACK vector record as defined in Appendix A of spec.
70 *
71 * The list is sorted by dccpavr_ack_seqno
72 *
73 * @dccpavr_node - node in dccpav_records
74 * @dccpavr_ack_seqno - sequence number of the packet this record was sent on
75 * @dccpavr_ack_ackno - sequence number being acknowledged
76 * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts
77 * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent
78 * @dccpavr_sent_len - lenght of the record in dccpav_buf
79 */
80struct dccp_ackvec_record {
81 struct list_head dccpavr_node;
82 u64 dccpavr_ack_seqno;
83 u64 dccpavr_ack_ackno;
84 u8 dccpavr_ack_ptr;
85 u8 dccpavr_ack_nonce;
86 u8 dccpavr_sent_len;
87};
88
70struct sock; 89struct sock;
71struct sk_buff; 90struct sk_buff;
72 91