diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-01-19 19:14:58 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-01-20 19:59:16 -0500 |
commit | 817fb15dfd988d8dda916ee04fa506f0c466b9d6 (patch) | |
tree | 9c857324cff66804949994cccb6050ef6b6c2aea /net/sched | |
parent | 3fbd8758b027995b677046dae46f9b41ea88c88f (diff) |
net_sched: sfq: allow divisor to be a parameter
SFQ currently uses a 1024 slots hash table, and its internal structure
(sfq_sched_data) allocation needs order-1 page on x86_64
Allow tc command to specify a divisor value (hash table size), between 1
and 65536.
If no value is provided, assume the 1024 default size.
This allows admins to setup smaller (or bigger) SFQ for specific needs.
This also brings back sfq_sched_data allocations to order-0 ones, saving
3KB per SFQ qdisc.
Jesper uses ~55.000 SFQ in one machine, this patch should free 165 MB of
memory.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Jesper Dangaard Brouer <hawk@diku.dk>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Jamal Hadi Salim <hadi@cyberus.ca>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/sch_sfq.c | 42 |
1 files changed, 30 insertions, 12 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 54a36f43a1f1..156ad30980b5 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/skbuff.h> | 21 | #include <linux/skbuff.h> |
22 | #include <linux/jhash.h> | 22 | #include <linux/jhash.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | ||
24 | #include <net/ip.h> | 25 | #include <net/ip.h> |
25 | #include <net/netlink.h> | 26 | #include <net/netlink.h> |
26 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> |
@@ -76,7 +77,8 @@ | |||
76 | #define SFQ_DEPTH 128 /* max number of packets per flow */ | 77 | #define SFQ_DEPTH 128 /* max number of packets per flow */ |
77 | #define SFQ_SLOTS 128 /* max number of flows */ | 78 | #define SFQ_SLOTS 128 /* max number of flows */ |
78 | #define SFQ_EMPTY_SLOT 255 | 79 | #define SFQ_EMPTY_SLOT 255 |
79 | #define SFQ_HASH_DIVISOR 1024 | 80 | #define SFQ_DEFAULT_HASH_DIVISOR 1024 |
81 | |||
80 | /* We use 16 bits to store allot, and want to handle packets up to 64K | 82 | /* We use 16 bits to store allot, and want to handle packets up to 64K |
81 | * Scale allot by 8 (1<<3) so that no overflow occurs. | 83 | * Scale allot by 8 (1<<3) so that no overflow occurs. |
82 | */ | 84 | */ |
@@ -112,7 +114,7 @@ struct sfq_sched_data { | |||
112 | int perturb_period; | 114 | int perturb_period; |
113 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ | 115 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ |
114 | int limit; | 116 | int limit; |
115 | 117 | unsigned int divisor; /* number of slots in hash table */ | |
116 | /* Variables */ | 118 | /* Variables */ |
117 | struct tcf_proto *filter_list; | 119 | struct tcf_proto *filter_list; |
118 | struct timer_list perturb_timer; | 120 | struct timer_list perturb_timer; |
@@ -120,7 +122,7 @@ struct sfq_sched_data { | |||
120 | sfq_index cur_depth; /* depth of longest slot */ | 122 | sfq_index cur_depth; /* depth of longest slot */ |
121 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ | 123 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ |
122 | struct sfq_slot *tail; /* current slot in round */ | 124 | struct sfq_slot *tail; /* current slot in round */ |
123 | sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ | 125 | sfq_index *ht; /* Hash table (divisor slots) */ |
124 | struct sfq_slot slots[SFQ_SLOTS]; | 126 | struct sfq_slot slots[SFQ_SLOTS]; |
125 | struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ | 127 | struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ |
126 | }; | 128 | }; |
@@ -137,7 +139,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index | |||
137 | 139 | ||
138 | static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) | 140 | static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) |
139 | { | 141 | { |
140 | return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); | 142 | return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); |
141 | } | 143 | } |
142 | 144 | ||
143 | static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | 145 | static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) |
@@ -201,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
201 | 203 | ||
202 | if (TC_H_MAJ(skb->priority) == sch->handle && | 204 | if (TC_H_MAJ(skb->priority) == sch->handle && |
203 | TC_H_MIN(skb->priority) > 0 && | 205 | TC_H_MIN(skb->priority) > 0 && |
204 | TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) | 206 | TC_H_MIN(skb->priority) <= q->divisor) |
205 | return TC_H_MIN(skb->priority); | 207 | return TC_H_MIN(skb->priority); |
206 | 208 | ||
207 | if (!q->filter_list) | 209 | if (!q->filter_list) |
@@ -219,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
219 | return 0; | 221 | return 0; |
220 | } | 222 | } |
221 | #endif | 223 | #endif |
222 | if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) | 224 | if (TC_H_MIN(res.classid) <= q->divisor) |
223 | return TC_H_MIN(res.classid); | 225 | return TC_H_MIN(res.classid); |
224 | } | 226 | } |
225 | return 0; | 227 | return 0; |
@@ -496,7 +498,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
496 | q->perturb_period = ctl->perturb_period * HZ; | 498 | q->perturb_period = ctl->perturb_period * HZ; |
497 | if (ctl->limit) | 499 | if (ctl->limit) |
498 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); | 500 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); |
499 | 501 | if (ctl->divisor) { | |
502 | if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536) | ||
503 | return -EINVAL; | ||
504 | q->divisor = ctl->divisor; | ||
505 | } | ||
500 | qlen = sch->q.qlen; | 506 | qlen = sch->q.qlen; |
501 | while (sch->q.qlen > q->limit) | 507 | while (sch->q.qlen > q->limit) |
502 | sfq_drop(sch); | 508 | sfq_drop(sch); |
@@ -514,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
514 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | 520 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) |
515 | { | 521 | { |
516 | struct sfq_sched_data *q = qdisc_priv(sch); | 522 | struct sfq_sched_data *q = qdisc_priv(sch); |
523 | size_t sz; | ||
517 | int i; | 524 | int i; |
518 | 525 | ||
519 | q->perturb_timer.function = sfq_perturbation; | 526 | q->perturb_timer.function = sfq_perturbation; |
520 | q->perturb_timer.data = (unsigned long)sch; | 527 | q->perturb_timer.data = (unsigned long)sch; |
521 | init_timer_deferrable(&q->perturb_timer); | 528 | init_timer_deferrable(&q->perturb_timer); |
522 | 529 | ||
523 | for (i = 0; i < SFQ_HASH_DIVISOR; i++) | ||
524 | q->ht[i] = SFQ_EMPTY_SLOT; | ||
525 | |||
526 | for (i = 0; i < SFQ_DEPTH; i++) { | 530 | for (i = 0; i < SFQ_DEPTH; i++) { |
527 | q->dep[i].next = i + SFQ_SLOTS; | 531 | q->dep[i].next = i + SFQ_SLOTS; |
528 | q->dep[i].prev = i + SFQ_SLOTS; | 532 | q->dep[i].prev = i + SFQ_SLOTS; |
@@ -531,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
531 | q->limit = SFQ_DEPTH - 1; | 535 | q->limit = SFQ_DEPTH - 1; |
532 | q->cur_depth = 0; | 536 | q->cur_depth = 0; |
533 | q->tail = NULL; | 537 | q->tail = NULL; |
538 | q->divisor = SFQ_DEFAULT_HASH_DIVISOR; | ||
534 | if (opt == NULL) { | 539 | if (opt == NULL) { |
535 | q->quantum = psched_mtu(qdisc_dev(sch)); | 540 | q->quantum = psched_mtu(qdisc_dev(sch)); |
536 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | 541 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); |
@@ -542,6 +547,15 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
542 | return err; | 547 | return err; |
543 | } | 548 | } |
544 | 549 | ||
550 | sz = sizeof(q->ht[0]) * q->divisor; | ||
551 | q->ht = kmalloc(sz, GFP_KERNEL); | ||
552 | if (!q->ht && sz > PAGE_SIZE) | ||
553 | q->ht = vmalloc(sz); | ||
554 | if (!q->ht) | ||
555 | return -ENOMEM; | ||
556 | for (i = 0; i < q->divisor; i++) | ||
557 | q->ht[i] = SFQ_EMPTY_SLOT; | ||
558 | |||
545 | for (i = 0; i < SFQ_SLOTS; i++) { | 559 | for (i = 0; i < SFQ_SLOTS; i++) { |
546 | slot_queue_init(&q->slots[i]); | 560 | slot_queue_init(&q->slots[i]); |
547 | sfq_link(q, i); | 561 | sfq_link(q, i); |
@@ -556,6 +570,10 @@ static void sfq_destroy(struct Qdisc *sch) | |||
556 | tcf_destroy_chain(&q->filter_list); | 570 | tcf_destroy_chain(&q->filter_list); |
557 | q->perturb_period = 0; | 571 | q->perturb_period = 0; |
558 | del_timer_sync(&q->perturb_timer); | 572 | del_timer_sync(&q->perturb_timer); |
573 | if (is_vmalloc_addr(q->ht)) | ||
574 | vfree(q->ht); | ||
575 | else | ||
576 | kfree(q->ht); | ||
559 | } | 577 | } |
560 | 578 | ||
561 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | 579 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) |
@@ -568,7 +586,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
568 | opt.perturb_period = q->perturb_period / HZ; | 586 | opt.perturb_period = q->perturb_period / HZ; |
569 | 587 | ||
570 | opt.limit = q->limit; | 588 | opt.limit = q->limit; |
571 | opt.divisor = SFQ_HASH_DIVISOR; | 589 | opt.divisor = q->divisor; |
572 | opt.flows = q->limit; | 590 | opt.flows = q->limit; |
573 | 591 | ||
574 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 592 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
@@ -646,7 +664,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
646 | if (arg->stop) | 664 | if (arg->stop) |
647 | return; | 665 | return; |
648 | 666 | ||
649 | for (i = 0; i < SFQ_HASH_DIVISOR; i++) { | 667 | for (i = 0; i < q->divisor; i++) { |
650 | if (q->ht[i] == SFQ_EMPTY_SLOT || | 668 | if (q->ht[i] == SFQ_EMPTY_SLOT || |
651 | arg->count < arg->skip) { | 669 | arg->count < arg->skip) { |
652 | arg->count++; | 670 | arg->count++; |