aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2012-01-04 09:18:38 -0500
committerDavid S. Miller <davem@davemloft.net>2012-01-05 14:01:21 -0500
commit18cb809850fb499ad9bf288696a95f4071f73931 (patch)
tree6f6c7c836176385baf0f185b6f5b4092e9ac1fdf /net
parent23021c21055f88a428b6deb6f803fa0d659e023f (diff)
net_sched: sfq: extend limits
SFQ as implemented in Linux is very limited, with at most 127 flows and limit of 127 packets. [ So if 127 flows are active, we have one packet per flow ] This patch brings to SFQ following features to cope with modern needs. - Ability to specify a smaller per flow limit of inflight packets. (default value being at 127 packets) - Ability to have up to 65408 active flows (instead of 127) - Ability to have head drops instead of tail drops (to drop old packets from a flow) Example of use : No more than 20 packets per flow, max 8000 flows, max 20000 packets in SFQ qdisc, hash table of 65536 slots. tc qdisc add ... sfq \ flows 8000 \ depth 20 \ headdrop \ limit 20000 \ divisor 65536 Ram usage : 2 bytes per hash table entry (instead of previous 1 byte/entry) 32 bytes per flow on 64bit arches, instead of 384 for QFQ, so much better cache hit ratio. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Dave Taht <dave.taht@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/sched/sch_sfq.c175
1 files changed, 117 insertions, 58 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 843018154a5..0a7964009e8 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -66,16 +66,18 @@
66 SFQ is superior for this purpose. 66 SFQ is superior for this purpose.
67 67
68 IMPLEMENTATION: 68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128; 69 This implementation limits :
70 max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. 70 - maximal queue length per flow to 127 packets.
71 The only goal of this restrictions was that all data 71 - max mtu to 2^18-1;
72 fit into one 4K page on 32bit arches. 72 - max 65408 flows,
73 - number of hash buckets to 65536.
73 74
74 It is easy to increase these values, but not in flight. */ 75 It is easy to increase these values, but not in flight. */
75 76
76#define SFQ_DEPTH 128 /* max number of packets per flow */ 77#define SFQ_MAX_DEPTH 127 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */ 78#define SFQ_DEFAULT_FLOWS 128
78#define SFQ_EMPTY_SLOT 255 79#define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
80#define SFQ_EMPTY_SLOT 0xffff
79#define SFQ_DEFAULT_HASH_DIVISOR 1024 81#define SFQ_DEFAULT_HASH_DIVISOR 1024
80 82
81/* We use 16 bits to store allot, and want to handle packets up to 64K 83/* We use 16 bits to store allot, and want to handle packets up to 64K
@@ -84,13 +86,13 @@
84#define SFQ_ALLOT_SHIFT 3 86#define SFQ_ALLOT_SHIFT 3
85#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) 87#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
86 88
87/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ 89/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
88typedef unsigned char sfq_index; 90typedef u16 sfq_index;
89 91
90/* 92/*
91 * We dont use pointers to save space. 93 * We dont use pointers to save space.
92 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array 94 * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
93 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] 95 * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
94 * are 'pointers' to dep[] array 96 * are 'pointers' to dep[] array
95 */ 97 */
96struct sfq_head { 98struct sfq_head {
@@ -102,28 +104,38 @@ struct sfq_slot {
102 struct sk_buff *skblist_next; 104 struct sk_buff *skblist_next;
103 struct sk_buff *skblist_prev; 105 struct sk_buff *skblist_prev;
104 sfq_index qlen; /* number of skbs in skblist */ 106 sfq_index qlen; /* number of skbs in skblist */
105 sfq_index next; /* next slot in sfq chain */ 107 sfq_index next; /* next slot in sfq RR chain */
106 struct sfq_head dep; /* anchor in dep[] chains */ 108 struct sfq_head dep; /* anchor in dep[] chains */
107 unsigned short hash; /* hash value (index in ht[]) */ 109 unsigned short hash; /* hash value (index in ht[]) */
108 short allot; /* credit for this slot */ 110 short allot; /* credit for this slot */
109}; 111};
110 112
111struct sfq_sched_data { 113struct sfq_sched_data {
112/* Parameters */ 114/* frequently used fields */
113 int perturb_period; 115 int limit; /* limit of total number of packets in this qdisc */
114 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
115 int limit;
116 unsigned int divisor; /* number of slots in hash table */ 116 unsigned int divisor; /* number of slots in hash table */
117/* Variables */ 117 unsigned int maxflows; /* number of flows in flows array */
118 struct tcf_proto *filter_list; 118 int headdrop;
119 struct timer_list perturb_timer; 119 int maxdepth; /* limit of packets per flow */
120
120 u32 perturbation; 121 u32 perturbation;
122 struct tcf_proto *filter_list;
121 sfq_index cur_depth; /* depth of longest slot */ 123 sfq_index cur_depth; /* depth of longest slot */
122 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ 124 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
123 struct sfq_slot *tail; /* current slot in round */ 125 struct sfq_slot *tail; /* current slot in round */
124 sfq_index *ht; /* Hash table (divisor slots) */ 126 sfq_index *ht; /* Hash table ('divisor' slots) */
125 struct sfq_slot slots[SFQ_SLOTS]; 127 struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
126 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ 128
129 struct sfq_head dep[SFQ_MAX_DEPTH + 1];
130 /* Linked lists of slots, indexed by depth
131 * dep[0] : list of unused flows
132 * dep[1] : list of flows with 1 packet
133 * dep[X] : list of flows with X packets
134 */
135
136 int perturb_period;
137 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
138 struct timer_list perturb_timer;
127}; 139};
128 140
129/* 141/*
@@ -131,9 +143,9 @@ struct sfq_sched_data {
131 */ 143 */
132static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) 144static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
133{ 145{
134 if (val < SFQ_SLOTS) 146 if (val < SFQ_MAX_FLOWS)
135 return &q->slots[val].dep; 147 return &q->slots[val].dep;
136 return &q->dep[val - SFQ_SLOTS]; 148 return &q->dep[val - SFQ_MAX_FLOWS];
137} 149}
138 150
139/* 151/*
@@ -199,18 +211,19 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
199} 211}
200 212
201/* 213/*
202 * x : slot number [0 .. SFQ_SLOTS - 1] 214 * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
203 */ 215 */
204static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 216static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
205{ 217{
206 sfq_index p, n; 218 sfq_index p, n;
207 int qlen = q->slots[x].qlen; 219 struct sfq_slot *slot = &q->slots[x];
220 int qlen = slot->qlen;
208 221
209 p = qlen + SFQ_SLOTS; 222 p = qlen + SFQ_MAX_FLOWS;
210 n = q->dep[qlen].next; 223 n = q->dep[qlen].next;
211 224
212 q->slots[x].dep.next = n; 225 slot->dep.next = n;
213 q->slots[x].dep.prev = p; 226 slot->dep.prev = p;
214 227
215 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ 228 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
216 sfq_dep_head(q, n)->prev = x; 229 sfq_dep_head(q, n)->prev = x;
@@ -275,6 +288,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
275 288
276static inline void slot_queue_init(struct sfq_slot *slot) 289static inline void slot_queue_init(struct sfq_slot *slot)
277{ 290{
291 memset(slot, 0, sizeof(*slot));
278 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; 292 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
279} 293}
280 294
@@ -305,7 +319,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
305 x = q->dep[d].next; 319 x = q->dep[d].next;
306 slot = &q->slots[x]; 320 slot = &q->slots[x];
307drop: 321drop:
308 skb = slot_dequeue_tail(slot); 322 skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
309 len = qdisc_pkt_len(skb); 323 len = qdisc_pkt_len(skb);
310 sfq_dec(q, x); 324 sfq_dec(q, x);
311 kfree_skb(skb); 325 kfree_skb(skb);
@@ -349,16 +363,27 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
349 slot = &q->slots[x]; 363 slot = &q->slots[x];
350 if (x == SFQ_EMPTY_SLOT) { 364 if (x == SFQ_EMPTY_SLOT) {
351 x = q->dep[0].next; /* get a free slot */ 365 x = q->dep[0].next; /* get a free slot */
366 if (x >= SFQ_MAX_FLOWS)
367 return qdisc_drop(skb, sch);
352 q->ht[hash] = x; 368 q->ht[hash] = x;
353 slot = &q->slots[x]; 369 slot = &q->slots[x];
354 slot->hash = hash; 370 slot->hash = hash;
355 } 371 }
356 372
357 /* If selected queue has length q->limit, do simple tail drop, 373 if (slot->qlen >= q->maxdepth) {
358 * i.e. drop _this_ packet. 374 struct sk_buff *head;
359 */ 375
360 if (slot->qlen >= q->limit) 376 if (!q->headdrop)
361 return qdisc_drop(skb, sch); 377 return qdisc_drop(skb, sch);
378
379 head = slot_dequeue_head(slot);
380 sch->qstats.backlog -= qdisc_pkt_len(head);
381 qdisc_drop(head, sch);
382
383 sch->qstats.backlog += qdisc_pkt_len(skb);
384 slot_queue_add(slot, skb);
385 return NET_XMIT_CN;
386 }
362 387
363 sch->qstats.backlog += qdisc_pkt_len(skb); 388 sch->qstats.backlog += qdisc_pkt_len(skb);
364 slot_queue_add(slot, skb); 389 slot_queue_add(slot, skb);
@@ -445,16 +470,18 @@ sfq_reset(struct Qdisc *sch)
445 * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change 470 * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
446 * counters. 471 * counters.
447 */ 472 */
448static void sfq_rehash(struct sfq_sched_data *q) 473static void sfq_rehash(struct Qdisc *sch)
449{ 474{
475 struct sfq_sched_data *q = qdisc_priv(sch);
450 struct sk_buff *skb; 476 struct sk_buff *skb;
451 int i; 477 int i;
452 struct sfq_slot *slot; 478 struct sfq_slot *slot;
453 struct sk_buff_head list; 479 struct sk_buff_head list;
480 int dropped = 0;
454 481
455 __skb_queue_head_init(&list); 482 __skb_queue_head_init(&list);
456 483
457 for (i = 0; i < SFQ_SLOTS; i++) { 484 for (i = 0; i < q->maxflows; i++) {
458 slot = &q->slots[i]; 485 slot = &q->slots[i];
459 if (!slot->qlen) 486 if (!slot->qlen)
460 continue; 487 continue;
@@ -474,10 +501,18 @@ static void sfq_rehash(struct sfq_sched_data *q)
474 slot = &q->slots[x]; 501 slot = &q->slots[x];
475 if (x == SFQ_EMPTY_SLOT) { 502 if (x == SFQ_EMPTY_SLOT) {
476 x = q->dep[0].next; /* get a free slot */ 503 x = q->dep[0].next; /* get a free slot */
504 if (x >= SFQ_MAX_FLOWS) {
505drop: sch->qstats.backlog -= qdisc_pkt_len(skb);
506 kfree_skb(skb);
507 dropped++;
508 continue;
509 }
477 q->ht[hash] = x; 510 q->ht[hash] = x;
478 slot = &q->slots[x]; 511 slot = &q->slots[x];
479 slot->hash = hash; 512 slot->hash = hash;
480 } 513 }
514 if (slot->qlen >= q->maxdepth)
515 goto drop;
481 slot_queue_add(slot, skb); 516 slot_queue_add(slot, skb);
482 sfq_inc(q, x); 517 sfq_inc(q, x);
483 if (slot->qlen == 1) { /* The flow is new */ 518 if (slot->qlen == 1) { /* The flow is new */
@@ -491,6 +526,8 @@ static void sfq_rehash(struct sfq_sched_data *q)
491 slot->allot = q->scaled_quantum; 526 slot->allot = q->scaled_quantum;
492 } 527 }
493 } 528 }
529 sch->q.qlen -= dropped;
530 qdisc_tree_decrease_qlen(sch, dropped);
494} 531}
495 532
496static void sfq_perturbation(unsigned long arg) 533static void sfq_perturbation(unsigned long arg)
@@ -502,7 +539,7 @@ static void sfq_perturbation(unsigned long arg)
502 spin_lock(root_lock); 539 spin_lock(root_lock);
503 q->perturbation = net_random(); 540 q->perturbation = net_random();
504 if (!q->filter_list && q->tail) 541 if (!q->filter_list && q->tail)
505 sfq_rehash(q); 542 sfq_rehash(sch);
506 spin_unlock(root_lock); 543 spin_unlock(root_lock);
507 544
508 if (q->perturb_period) 545 if (q->perturb_period)
@@ -513,23 +550,39 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
513{ 550{
514 struct sfq_sched_data *q = qdisc_priv(sch); 551 struct sfq_sched_data *q = qdisc_priv(sch);
515 struct tc_sfq_qopt *ctl = nla_data(opt); 552 struct tc_sfq_qopt *ctl = nla_data(opt);
553 struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
516 unsigned int qlen; 554 unsigned int qlen;
517 555
518 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 556 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
519 return -EINVAL; 557 return -EINVAL;
520 558 if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
559 ctl_v1 = nla_data(opt);
521 if (ctl->divisor && 560 if (ctl->divisor &&
522 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) 561 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
523 return -EINVAL; 562 return -EINVAL;
524 563
525 sch_tree_lock(sch); 564 sch_tree_lock(sch);
526 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); 565 if (ctl->quantum) {
527 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 566 q->quantum = ctl->quantum;
567 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
568 }
528 q->perturb_period = ctl->perturb_period * HZ; 569 q->perturb_period = ctl->perturb_period * HZ;
529 if (ctl->limit) 570 if (ctl->flows)
530 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 571 q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
531 if (ctl->divisor) 572 if (ctl->divisor) {
532 q->divisor = ctl->divisor; 573 q->divisor = ctl->divisor;
574 q->maxflows = min_t(u32, q->maxflows, q->divisor);
575 }
576 if (ctl_v1) {
577 if (ctl_v1->depth)
578 q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
579 q->headdrop = ctl_v1->headdrop;
580 }
581 if (ctl->limit) {
582 q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
583 q->maxflows = min_t(u32, q->maxflows, q->limit);
584 }
585
533 qlen = sch->q.qlen; 586 qlen = sch->q.qlen;
534 while (sch->q.qlen > q->limit) 587 while (sch->q.qlen > q->limit)
535 sfq_drop(sch); 588 sfq_drop(sch);
@@ -571,6 +624,7 @@ static void sfq_destroy(struct Qdisc *sch)
571 q->perturb_period = 0; 624 q->perturb_period = 0;
572 del_timer_sync(&q->perturb_timer); 625 del_timer_sync(&q->perturb_timer);
573 sfq_free(q->ht); 626 sfq_free(q->ht);
627 sfq_free(q->slots);
574} 628}
575 629
576static int sfq_init(struct Qdisc *sch, struct nlattr *opt) 630static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
@@ -582,15 +636,17 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
582 q->perturb_timer.data = (unsigned long)sch; 636 q->perturb_timer.data = (unsigned long)sch;
583 init_timer_deferrable(&q->perturb_timer); 637 init_timer_deferrable(&q->perturb_timer);
584 638
585 for (i = 0; i < SFQ_DEPTH; i++) { 639 for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
586 q->dep[i].next = i + SFQ_SLOTS; 640 q->dep[i].next = i + SFQ_MAX_FLOWS;
587 q->dep[i].prev = i + SFQ_SLOTS; 641 q->dep[i].prev = i + SFQ_MAX_FLOWS;
588 } 642 }
589 643
590 q->limit = SFQ_DEPTH - 1; 644 q->limit = SFQ_MAX_DEPTH;
645 q->maxdepth = SFQ_MAX_DEPTH;
591 q->cur_depth = 0; 646 q->cur_depth = 0;
592 q->tail = NULL; 647 q->tail = NULL;
593 q->divisor = SFQ_DEFAULT_HASH_DIVISOR; 648 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
649 q->maxflows = SFQ_DEFAULT_FLOWS;
594 q->quantum = psched_mtu(qdisc_dev(sch)); 650 q->quantum = psched_mtu(qdisc_dev(sch));
595 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 651 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
596 q->perturb_period = 0; 652 q->perturb_period = 0;
@@ -603,14 +659,15 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
603 } 659 }
604 660
605 q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); 661 q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
606 if (!q->ht) { 662 q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
663 if (!q->ht || !q->slots) {
607 sfq_destroy(sch); 664 sfq_destroy(sch);
608 return -ENOMEM; 665 return -ENOMEM;
609 } 666 }
610 for (i = 0; i < q->divisor; i++) 667 for (i = 0; i < q->divisor; i++)
611 q->ht[i] = SFQ_EMPTY_SLOT; 668 q->ht[i] = SFQ_EMPTY_SLOT;
612 669
613 for (i = 0; i < SFQ_SLOTS; i++) { 670 for (i = 0; i < q->maxflows; i++) {
614 slot_queue_init(&q->slots[i]); 671 slot_queue_init(&q->slots[i]);
615 sfq_link(q, i); 672 sfq_link(q, i);
616 } 673 }
@@ -625,14 +682,16 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
625{ 682{
626 struct sfq_sched_data *q = qdisc_priv(sch); 683 struct sfq_sched_data *q = qdisc_priv(sch);
627 unsigned char *b = skb_tail_pointer(skb); 684 unsigned char *b = skb_tail_pointer(skb);
628 struct tc_sfq_qopt opt; 685 struct tc_sfq_qopt_v1 opt;
629 686
630 opt.quantum = q->quantum; 687 memset(&opt, 0, sizeof(opt));
631 opt.perturb_period = q->perturb_period / HZ; 688 opt.v0.quantum = q->quantum;
632 689 opt.v0.perturb_period = q->perturb_period / HZ;
633 opt.limit = q->limit; 690 opt.v0.limit = q->limit;
634 opt.divisor = q->divisor; 691 opt.v0.divisor = q->divisor;
635 opt.flows = q->limit; 692 opt.v0.flows = q->maxflows;
693 opt.depth = q->maxdepth;
694 opt.headdrop = q->headdrop;
636 695
637 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 696 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
638 697