diff options
Diffstat (limited to 'net/sched/sch_sfq.c')
-rw-r--r-- | net/sched/sch_sfq.c | 369 |
1 files changed, 236 insertions, 133 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4f5510e2bd6f..0a7964009e8c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -17,14 +17,13 @@ | |||
17 | #include <linux/in.h> | 17 | #include <linux/in.h> |
18 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
19 | #include <linux/init.h> | 19 | #include <linux/init.h> |
20 | #include <linux/ipv6.h> | ||
21 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> |
22 | #include <linux/jhash.h> | 21 | #include <linux/jhash.h> |
23 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
25 | #include <net/ip.h> | ||
26 | #include <net/netlink.h> | 24 | #include <net/netlink.h> |
27 | #include <net/pkt_sched.h> | 25 | #include <net/pkt_sched.h> |
26 | #include <net/flow_keys.h> | ||
28 | 27 | ||
29 | 28 | ||
30 | /* Stochastic Fairness Queuing algorithm. | 29 | /* Stochastic Fairness Queuing algorithm. |
@@ -67,16 +66,18 @@ | |||
67 | SFQ is superior for this purpose. | 66 | SFQ is superior for this purpose. |
68 | 67 | ||
69 | IMPLEMENTATION: | 68 | IMPLEMENTATION: |
70 | This implementation limits maximal queue length to 128; | 69 | This implementation limits : |
71 | max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. | 70 | - maximal queue length per flow to 127 packets. |
72 | The only goal of this restrictions was that all data | 71 | - max mtu to 2^18-1; |
73 | fit into one 4K page on 32bit arches. | 72 | - max 65408 flows, |
73 | - number of hash buckets to 65536. | ||
74 | 74 | ||
75 | It is easy to increase these values, but not in flight. */ | 75 | It is easy to increase these values, but not in flight. */ |
76 | 76 | ||
77 | #define SFQ_DEPTH 128 /* max number of packets per flow */ | 77 | #define SFQ_MAX_DEPTH 127 /* max number of packets per flow */ |
78 | #define SFQ_SLOTS 128 /* max number of flows */ | 78 | #define SFQ_DEFAULT_FLOWS 128 |
79 | #define SFQ_EMPTY_SLOT 255 | 79 | #define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */ |
80 | #define SFQ_EMPTY_SLOT 0xffff | ||
80 | #define SFQ_DEFAULT_HASH_DIVISOR 1024 | 81 | #define SFQ_DEFAULT_HASH_DIVISOR 1024 |
81 | 82 | ||
82 | /* We use 16 bits to store allot, and want to handle packets up to 64K | 83 | /* We use 16 bits to store allot, and want to handle packets up to 64K |
@@ -85,13 +86,13 @@ | |||
85 | #define SFQ_ALLOT_SHIFT 3 | 86 | #define SFQ_ALLOT_SHIFT 3 |
86 | #define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) | 87 | #define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) |
87 | 88 | ||
88 | /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ | 89 | /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ |
89 | typedef unsigned char sfq_index; | 90 | typedef u16 sfq_index; |
90 | 91 | ||
91 | /* | 92 | /* |
92 | * We dont use pointers to save space. | 93 | * We dont use pointers to save space. |
93 | * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array | 94 | * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array |
94 | * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] | 95 | * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH] |
95 | * are 'pointers' to dep[] array | 96 | * are 'pointers' to dep[] array |
96 | */ | 97 | */ |
97 | struct sfq_head { | 98 | struct sfq_head { |
@@ -103,28 +104,38 @@ struct sfq_slot { | |||
103 | struct sk_buff *skblist_next; | 104 | struct sk_buff *skblist_next; |
104 | struct sk_buff *skblist_prev; | 105 | struct sk_buff *skblist_prev; |
105 | sfq_index qlen; /* number of skbs in skblist */ | 106 | sfq_index qlen; /* number of skbs in skblist */ |
106 | sfq_index next; /* next slot in sfq chain */ | 107 | sfq_index next; /* next slot in sfq RR chain */ |
107 | struct sfq_head dep; /* anchor in dep[] chains */ | 108 | struct sfq_head dep; /* anchor in dep[] chains */ |
108 | unsigned short hash; /* hash value (index in ht[]) */ | 109 | unsigned short hash; /* hash value (index in ht[]) */ |
109 | short allot; /* credit for this slot */ | 110 | short allot; /* credit for this slot */ |
110 | }; | 111 | }; |
111 | 112 | ||
112 | struct sfq_sched_data { | 113 | struct sfq_sched_data { |
113 | /* Parameters */ | 114 | /* frequently used fields */ |
114 | int perturb_period; | 115 | int limit; /* limit of total number of packets in this qdisc */ |
115 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ | ||
116 | int limit; | ||
117 | unsigned int divisor; /* number of slots in hash table */ | 116 | unsigned int divisor; /* number of slots in hash table */ |
118 | /* Variables */ | 117 | unsigned int maxflows; /* number of flows in flows array */ |
119 | struct tcf_proto *filter_list; | 118 | int headdrop; |
120 | struct timer_list perturb_timer; | 119 | int maxdepth; /* limit of packets per flow */ |
120 | |||
121 | u32 perturbation; | 121 | u32 perturbation; |
122 | struct tcf_proto *filter_list; | ||
122 | sfq_index cur_depth; /* depth of longest slot */ | 123 | sfq_index cur_depth; /* depth of longest slot */ |
123 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ | 124 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ |
124 | struct sfq_slot *tail; /* current slot in round */ | 125 | struct sfq_slot *tail; /* current slot in round */ |
125 | sfq_index *ht; /* Hash table (divisor slots) */ | 126 | sfq_index *ht; /* Hash table ('divisor' slots) */ |
126 | struct sfq_slot slots[SFQ_SLOTS]; | 127 | struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ |
127 | struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ | 128 | |
129 | struct sfq_head dep[SFQ_MAX_DEPTH + 1]; | ||
130 | /* Linked lists of slots, indexed by depth | ||
131 | * dep[0] : list of unused flows | ||
132 | * dep[1] : list of flows with 1 packet | ||
133 | * dep[X] : list of flows with X packets | ||
134 | */ | ||
135 | |||
136 | int perturb_period; | ||
137 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ | ||
138 | struct timer_list perturb_timer; | ||
128 | }; | 139 | }; |
129 | 140 | ||
130 | /* | 141 | /* |
@@ -132,66 +143,36 @@ struct sfq_sched_data { | |||
132 | */ | 143 | */ |
133 | static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) | 144 | static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) |
134 | { | 145 | { |
135 | if (val < SFQ_SLOTS) | 146 | if (val < SFQ_MAX_FLOWS) |
136 | return &q->slots[val].dep; | 147 | return &q->slots[val].dep; |
137 | return &q->dep[val - SFQ_SLOTS]; | 148 | return &q->dep[val - SFQ_MAX_FLOWS]; |
138 | } | 149 | } |
139 | 150 | ||
140 | static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) | 151 | /* |
152 | * In order to be able to quickly rehash our queue when timer changes | ||
153 | * q->perturbation, we store flow_keys in skb->cb[] | ||
154 | */ | ||
155 | struct sfq_skb_cb { | ||
156 | struct flow_keys keys; | ||
157 | }; | ||
158 | |||
159 | static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) | ||
141 | { | 160 | { |
142 | return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); | 161 | BUILD_BUG_ON(sizeof(skb->cb) < |
162 | sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb)); | ||
163 | return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; | ||
143 | } | 164 | } |
144 | 165 | ||
145 | static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | 166 | static unsigned int sfq_hash(const struct sfq_sched_data *q, |
167 | const struct sk_buff *skb) | ||
146 | { | 168 | { |
147 | u32 h, h2; | 169 | const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; |
148 | 170 | unsigned int hash; | |
149 | switch (skb->protocol) { | ||
150 | case htons(ETH_P_IP): | ||
151 | { | ||
152 | const struct iphdr *iph; | ||
153 | int poff; | ||
154 | |||
155 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | ||
156 | goto err; | ||
157 | iph = ip_hdr(skb); | ||
158 | h = (__force u32)iph->daddr; | ||
159 | h2 = (__force u32)iph->saddr ^ iph->protocol; | ||
160 | if (ip_is_fragment(iph)) | ||
161 | break; | ||
162 | poff = proto_ports_offset(iph->protocol); | ||
163 | if (poff >= 0 && | ||
164 | pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { | ||
165 | iph = ip_hdr(skb); | ||
166 | h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff); | ||
167 | } | ||
168 | break; | ||
169 | } | ||
170 | case htons(ETH_P_IPV6): | ||
171 | { | ||
172 | const struct ipv6hdr *iph; | ||
173 | int poff; | ||
174 | |||
175 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | ||
176 | goto err; | ||
177 | iph = ipv6_hdr(skb); | ||
178 | h = (__force u32)iph->daddr.s6_addr32[3]; | ||
179 | h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; | ||
180 | poff = proto_ports_offset(iph->nexthdr); | ||
181 | if (poff >= 0 && | ||
182 | pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { | ||
183 | iph = ipv6_hdr(skb); | ||
184 | h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff); | ||
185 | } | ||
186 | break; | ||
187 | } | ||
188 | default: | ||
189 | err: | ||
190 | h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol; | ||
191 | h2 = (unsigned long)skb->sk; | ||
192 | } | ||
193 | 171 | ||
194 | return sfq_fold_hash(q, h, h2); | 172 | hash = jhash_3words((__force u32)keys->dst, |
173 | (__force u32)keys->src ^ keys->ip_proto, | ||
174 | (__force u32)keys->ports, q->perturbation); | ||
175 | return hash & (q->divisor - 1); | ||
195 | } | 176 | } |
196 | 177 | ||
197 | static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | 178 | static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, |
@@ -206,8 +187,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
206 | TC_H_MIN(skb->priority) <= q->divisor) | 187 | TC_H_MIN(skb->priority) <= q->divisor) |
207 | return TC_H_MIN(skb->priority); | 188 | return TC_H_MIN(skb->priority); |
208 | 189 | ||
209 | if (!q->filter_list) | 190 | if (!q->filter_list) { |
191 | skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); | ||
210 | return sfq_hash(q, skb) + 1; | 192 | return sfq_hash(q, skb) + 1; |
193 | } | ||
211 | 194 | ||
212 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | 195 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; |
213 | result = tc_classify(skb, q->filter_list, &res); | 196 | result = tc_classify(skb, q->filter_list, &res); |
@@ -228,18 +211,19 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
228 | } | 211 | } |
229 | 212 | ||
230 | /* | 213 | /* |
231 | * x : slot number [0 .. SFQ_SLOTS - 1] | 214 | * x : slot number [0 .. SFQ_MAX_FLOWS - 1] |
232 | */ | 215 | */ |
233 | static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) | 216 | static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) |
234 | { | 217 | { |
235 | sfq_index p, n; | 218 | sfq_index p, n; |
236 | int qlen = q->slots[x].qlen; | 219 | struct sfq_slot *slot = &q->slots[x]; |
220 | int qlen = slot->qlen; | ||
237 | 221 | ||
238 | p = qlen + SFQ_SLOTS; | 222 | p = qlen + SFQ_MAX_FLOWS; |
239 | n = q->dep[qlen].next; | 223 | n = q->dep[qlen].next; |
240 | 224 | ||
241 | q->slots[x].dep.next = n; | 225 | slot->dep.next = n; |
242 | q->slots[x].dep.prev = p; | 226 | slot->dep.prev = p; |
243 | 227 | ||
244 | q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ | 228 | q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ |
245 | sfq_dep_head(q, n)->prev = x; | 229 | sfq_dep_head(q, n)->prev = x; |
@@ -304,6 +288,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) | |||
304 | 288 | ||
305 | static inline void slot_queue_init(struct sfq_slot *slot) | 289 | static inline void slot_queue_init(struct sfq_slot *slot) |
306 | { | 290 | { |
291 | memset(slot, 0, sizeof(*slot)); | ||
307 | slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; | 292 | slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; |
308 | } | 293 | } |
309 | 294 | ||
@@ -334,7 +319,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) | |||
334 | x = q->dep[d].next; | 319 | x = q->dep[d].next; |
335 | slot = &q->slots[x]; | 320 | slot = &q->slots[x]; |
336 | drop: | 321 | drop: |
337 | skb = slot_dequeue_tail(slot); | 322 | skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); |
338 | len = qdisc_pkt_len(skb); | 323 | len = qdisc_pkt_len(skb); |
339 | sfq_dec(q, x); | 324 | sfq_dec(q, x); |
340 | kfree_skb(skb); | 325 | kfree_skb(skb); |
@@ -378,16 +363,27 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
378 | slot = &q->slots[x]; | 363 | slot = &q->slots[x]; |
379 | if (x == SFQ_EMPTY_SLOT) { | 364 | if (x == SFQ_EMPTY_SLOT) { |
380 | x = q->dep[0].next; /* get a free slot */ | 365 | x = q->dep[0].next; /* get a free slot */ |
366 | if (x >= SFQ_MAX_FLOWS) | ||
367 | return qdisc_drop(skb, sch); | ||
381 | q->ht[hash] = x; | 368 | q->ht[hash] = x; |
382 | slot = &q->slots[x]; | 369 | slot = &q->slots[x]; |
383 | slot->hash = hash; | 370 | slot->hash = hash; |
384 | } | 371 | } |
385 | 372 | ||
386 | /* If selected queue has length q->limit, do simple tail drop, | 373 | if (slot->qlen >= q->maxdepth) { |
387 | * i.e. drop _this_ packet. | 374 | struct sk_buff *head; |
388 | */ | 375 | |
389 | if (slot->qlen >= q->limit) | 376 | if (!q->headdrop) |
390 | return qdisc_drop(skb, sch); | 377 | return qdisc_drop(skb, sch); |
378 | |||
379 | head = slot_dequeue_head(slot); | ||
380 | sch->qstats.backlog -= qdisc_pkt_len(head); | ||
381 | qdisc_drop(head, sch); | ||
382 | |||
383 | sch->qstats.backlog += qdisc_pkt_len(skb); | ||
384 | slot_queue_add(slot, skb); | ||
385 | return NET_XMIT_CN; | ||
386 | } | ||
391 | 387 | ||
392 | sch->qstats.backlog += qdisc_pkt_len(skb); | 388 | sch->qstats.backlog += qdisc_pkt_len(skb); |
393 | slot_queue_add(slot, skb); | 389 | slot_queue_add(slot, skb); |
@@ -395,11 +391,11 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
395 | if (slot->qlen == 1) { /* The flow is new */ | 391 | if (slot->qlen == 1) { /* The flow is new */ |
396 | if (q->tail == NULL) { /* It is the first flow */ | 392 | if (q->tail == NULL) { /* It is the first flow */ |
397 | slot->next = x; | 393 | slot->next = x; |
394 | q->tail = slot; | ||
398 | } else { | 395 | } else { |
399 | slot->next = q->tail->next; | 396 | slot->next = q->tail->next; |
400 | q->tail->next = x; | 397 | q->tail->next = x; |
401 | } | 398 | } |
402 | q->tail = slot; | ||
403 | slot->allot = q->scaled_quantum; | 399 | slot->allot = q->scaled_quantum; |
404 | } | 400 | } |
405 | if (++sch->q.qlen <= q->limit) | 401 | if (++sch->q.qlen <= q->limit) |
@@ -468,12 +464,83 @@ sfq_reset(struct Qdisc *sch) | |||
468 | kfree_skb(skb); | 464 | kfree_skb(skb); |
469 | } | 465 | } |
470 | 466 | ||
467 | /* | ||
468 | * When q->perturbation is changed, we rehash all queued skbs | ||
469 | * to avoid OOO (Out Of Order) effects. | ||
470 | * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change | ||
471 | * counters. | ||
472 | */ | ||
473 | static void sfq_rehash(struct Qdisc *sch) | ||
474 | { | ||
475 | struct sfq_sched_data *q = qdisc_priv(sch); | ||
476 | struct sk_buff *skb; | ||
477 | int i; | ||
478 | struct sfq_slot *slot; | ||
479 | struct sk_buff_head list; | ||
480 | int dropped = 0; | ||
481 | |||
482 | __skb_queue_head_init(&list); | ||
483 | |||
484 | for (i = 0; i < q->maxflows; i++) { | ||
485 | slot = &q->slots[i]; | ||
486 | if (!slot->qlen) | ||
487 | continue; | ||
488 | while (slot->qlen) { | ||
489 | skb = slot_dequeue_head(slot); | ||
490 | sfq_dec(q, i); | ||
491 | __skb_queue_tail(&list, skb); | ||
492 | } | ||
493 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; | ||
494 | } | ||
495 | q->tail = NULL; | ||
496 | |||
497 | while ((skb = __skb_dequeue(&list)) != NULL) { | ||
498 | unsigned int hash = sfq_hash(q, skb); | ||
499 | sfq_index x = q->ht[hash]; | ||
500 | |||
501 | slot = &q->slots[x]; | ||
502 | if (x == SFQ_EMPTY_SLOT) { | ||
503 | x = q->dep[0].next; /* get a free slot */ | ||
504 | if (x >= SFQ_MAX_FLOWS) { | ||
505 | drop: sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
506 | kfree_skb(skb); | ||
507 | dropped++; | ||
508 | continue; | ||
509 | } | ||
510 | q->ht[hash] = x; | ||
511 | slot = &q->slots[x]; | ||
512 | slot->hash = hash; | ||
513 | } | ||
514 | if (slot->qlen >= q->maxdepth) | ||
515 | goto drop; | ||
516 | slot_queue_add(slot, skb); | ||
517 | sfq_inc(q, x); | ||
518 | if (slot->qlen == 1) { /* The flow is new */ | ||
519 | if (q->tail == NULL) { /* It is the first flow */ | ||
520 | slot->next = x; | ||
521 | } else { | ||
522 | slot->next = q->tail->next; | ||
523 | q->tail->next = x; | ||
524 | } | ||
525 | q->tail = slot; | ||
526 | slot->allot = q->scaled_quantum; | ||
527 | } | ||
528 | } | ||
529 | sch->q.qlen -= dropped; | ||
530 | qdisc_tree_decrease_qlen(sch, dropped); | ||
531 | } | ||
532 | |||
471 | static void sfq_perturbation(unsigned long arg) | 533 | static void sfq_perturbation(unsigned long arg) |
472 | { | 534 | { |
473 | struct Qdisc *sch = (struct Qdisc *)arg; | 535 | struct Qdisc *sch = (struct Qdisc *)arg; |
474 | struct sfq_sched_data *q = qdisc_priv(sch); | 536 | struct sfq_sched_data *q = qdisc_priv(sch); |
537 | spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); | ||
475 | 538 | ||
539 | spin_lock(root_lock); | ||
476 | q->perturbation = net_random(); | 540 | q->perturbation = net_random(); |
541 | if (!q->filter_list && q->tail) | ||
542 | sfq_rehash(sch); | ||
543 | spin_unlock(root_lock); | ||
477 | 544 | ||
478 | if (q->perturb_period) | 545 | if (q->perturb_period) |
479 | mod_timer(&q->perturb_timer, jiffies + q->perturb_period); | 546 | mod_timer(&q->perturb_timer, jiffies + q->perturb_period); |
@@ -483,23 +550,39 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
483 | { | 550 | { |
484 | struct sfq_sched_data *q = qdisc_priv(sch); | 551 | struct sfq_sched_data *q = qdisc_priv(sch); |
485 | struct tc_sfq_qopt *ctl = nla_data(opt); | 552 | struct tc_sfq_qopt *ctl = nla_data(opt); |
553 | struct tc_sfq_qopt_v1 *ctl_v1 = NULL; | ||
486 | unsigned int qlen; | 554 | unsigned int qlen; |
487 | 555 | ||
488 | if (opt->nla_len < nla_attr_size(sizeof(*ctl))) | 556 | if (opt->nla_len < nla_attr_size(sizeof(*ctl))) |
489 | return -EINVAL; | 557 | return -EINVAL; |
490 | 558 | if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) | |
559 | ctl_v1 = nla_data(opt); | ||
491 | if (ctl->divisor && | 560 | if (ctl->divisor && |
492 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) | 561 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) |
493 | return -EINVAL; | 562 | return -EINVAL; |
494 | 563 | ||
495 | sch_tree_lock(sch); | 564 | sch_tree_lock(sch); |
496 | q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); | 565 | if (ctl->quantum) { |
497 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | 566 | q->quantum = ctl->quantum; |
567 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | ||
568 | } | ||
498 | q->perturb_period = ctl->perturb_period * HZ; | 569 | q->perturb_period = ctl->perturb_period * HZ; |
499 | if (ctl->limit) | 570 | if (ctl->flows) |
500 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); | 571 | q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); |
501 | if (ctl->divisor) | 572 | if (ctl->divisor) { |
502 | q->divisor = ctl->divisor; | 573 | q->divisor = ctl->divisor; |
574 | q->maxflows = min_t(u32, q->maxflows, q->divisor); | ||
575 | } | ||
576 | if (ctl_v1) { | ||
577 | if (ctl_v1->depth) | ||
578 | q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); | ||
579 | q->headdrop = ctl_v1->headdrop; | ||
580 | } | ||
581 | if (ctl->limit) { | ||
582 | q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); | ||
583 | q->maxflows = min_t(u32, q->maxflows, q->limit); | ||
584 | } | ||
585 | |||
503 | qlen = sch->q.qlen; | 586 | qlen = sch->q.qlen; |
504 | while (sch->q.qlen > q->limit) | 587 | while (sch->q.qlen > q->limit) |
505 | sfq_drop(sch); | 588 | sfq_drop(sch); |
@@ -514,46 +597,77 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
514 | return 0; | 597 | return 0; |
515 | } | 598 | } |
516 | 599 | ||
600 | static void *sfq_alloc(size_t sz) | ||
601 | { | ||
602 | void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); | ||
603 | |||
604 | if (!ptr) | ||
605 | ptr = vmalloc(sz); | ||
606 | return ptr; | ||
607 | } | ||
608 | |||
609 | static void sfq_free(void *addr) | ||
610 | { | ||
611 | if (addr) { | ||
612 | if (is_vmalloc_addr(addr)) | ||
613 | vfree(addr); | ||
614 | else | ||
615 | kfree(addr); | ||
616 | } | ||
617 | } | ||
618 | |||
619 | static void sfq_destroy(struct Qdisc *sch) | ||
620 | { | ||
621 | struct sfq_sched_data *q = qdisc_priv(sch); | ||
622 | |||
623 | tcf_destroy_chain(&q->filter_list); | ||
624 | q->perturb_period = 0; | ||
625 | del_timer_sync(&q->perturb_timer); | ||
626 | sfq_free(q->ht); | ||
627 | sfq_free(q->slots); | ||
628 | } | ||
629 | |||
517 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | 630 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) |
518 | { | 631 | { |
519 | struct sfq_sched_data *q = qdisc_priv(sch); | 632 | struct sfq_sched_data *q = qdisc_priv(sch); |
520 | size_t sz; | ||
521 | int i; | 633 | int i; |
522 | 634 | ||
523 | q->perturb_timer.function = sfq_perturbation; | 635 | q->perturb_timer.function = sfq_perturbation; |
524 | q->perturb_timer.data = (unsigned long)sch; | 636 | q->perturb_timer.data = (unsigned long)sch; |
525 | init_timer_deferrable(&q->perturb_timer); | 637 | init_timer_deferrable(&q->perturb_timer); |
526 | 638 | ||
527 | for (i = 0; i < SFQ_DEPTH; i++) { | 639 | for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { |
528 | q->dep[i].next = i + SFQ_SLOTS; | 640 | q->dep[i].next = i + SFQ_MAX_FLOWS; |
529 | q->dep[i].prev = i + SFQ_SLOTS; | 641 | q->dep[i].prev = i + SFQ_MAX_FLOWS; |
530 | } | 642 | } |
531 | 643 | ||
532 | q->limit = SFQ_DEPTH - 1; | 644 | q->limit = SFQ_MAX_DEPTH; |
645 | q->maxdepth = SFQ_MAX_DEPTH; | ||
533 | q->cur_depth = 0; | 646 | q->cur_depth = 0; |
534 | q->tail = NULL; | 647 | q->tail = NULL; |
535 | q->divisor = SFQ_DEFAULT_HASH_DIVISOR; | 648 | q->divisor = SFQ_DEFAULT_HASH_DIVISOR; |
536 | if (opt == NULL) { | 649 | q->maxflows = SFQ_DEFAULT_FLOWS; |
537 | q->quantum = psched_mtu(qdisc_dev(sch)); | 650 | q->quantum = psched_mtu(qdisc_dev(sch)); |
538 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | 651 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); |
539 | q->perturb_period = 0; | 652 | q->perturb_period = 0; |
540 | q->perturbation = net_random(); | 653 | q->perturbation = net_random(); |
541 | } else { | 654 | |
655 | if (opt) { | ||
542 | int err = sfq_change(sch, opt); | 656 | int err = sfq_change(sch, opt); |
543 | if (err) | 657 | if (err) |
544 | return err; | 658 | return err; |
545 | } | 659 | } |
546 | 660 | ||
547 | sz = sizeof(q->ht[0]) * q->divisor; | 661 | q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); |
548 | q->ht = kmalloc(sz, GFP_KERNEL); | 662 | q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); |
549 | if (!q->ht && sz > PAGE_SIZE) | 663 | if (!q->ht || !q->slots) { |
550 | q->ht = vmalloc(sz); | 664 | sfq_destroy(sch); |
551 | if (!q->ht) | ||
552 | return -ENOMEM; | 665 | return -ENOMEM; |
666 | } | ||
553 | for (i = 0; i < q->divisor; i++) | 667 | for (i = 0; i < q->divisor; i++) |
554 | q->ht[i] = SFQ_EMPTY_SLOT; | 668 | q->ht[i] = SFQ_EMPTY_SLOT; |
555 | 669 | ||
556 | for (i = 0; i < SFQ_SLOTS; i++) { | 670 | for (i = 0; i < q->maxflows; i++) { |
557 | slot_queue_init(&q->slots[i]); | 671 | slot_queue_init(&q->slots[i]); |
558 | sfq_link(q, i); | 672 | sfq_link(q, i); |
559 | } | 673 | } |
@@ -564,31 +678,20 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
564 | return 0; | 678 | return 0; |
565 | } | 679 | } |
566 | 680 | ||
567 | static void sfq_destroy(struct Qdisc *sch) | ||
568 | { | ||
569 | struct sfq_sched_data *q = qdisc_priv(sch); | ||
570 | |||
571 | tcf_destroy_chain(&q->filter_list); | ||
572 | q->perturb_period = 0; | ||
573 | del_timer_sync(&q->perturb_timer); | ||
574 | if (is_vmalloc_addr(q->ht)) | ||
575 | vfree(q->ht); | ||
576 | else | ||
577 | kfree(q->ht); | ||
578 | } | ||
579 | |||
580 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | 681 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) |
581 | { | 682 | { |
582 | struct sfq_sched_data *q = qdisc_priv(sch); | 683 | struct sfq_sched_data *q = qdisc_priv(sch); |
583 | unsigned char *b = skb_tail_pointer(skb); | 684 | unsigned char *b = skb_tail_pointer(skb); |
584 | struct tc_sfq_qopt opt; | 685 | struct tc_sfq_qopt_v1 opt; |
585 | 686 | ||
586 | opt.quantum = q->quantum; | 687 | memset(&opt, 0, sizeof(opt)); |
587 | opt.perturb_period = q->perturb_period / HZ; | 688 | opt.v0.quantum = q->quantum; |
588 | 689 | opt.v0.perturb_period = q->perturb_period / HZ; | |
589 | opt.limit = q->limit; | 690 | opt.v0.limit = q->limit; |
590 | opt.divisor = q->divisor; | 691 | opt.v0.divisor = q->divisor; |
591 | opt.flows = q->limit; | 692 | opt.v0.flows = q->maxflows; |
693 | opt.depth = q->maxdepth; | ||
694 | opt.headdrop = q->headdrop; | ||
592 | 695 | ||
593 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 696 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
594 | 697 | ||