aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/sch_sfq.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/sched/sch_sfq.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/sched/sch_sfq.c')
-rw-r--r--net/sched/sch_sfq.c409
1 files changed, 253 insertions, 156 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..b6ea6afa55b0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/jhash.h> 22#include <linux/jhash.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/vmalloc.h>
24#include <net/ip.h> 25#include <net/ip.h>
25#include <net/netlink.h> 26#include <net/netlink.h>
26#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
@@ -67,55 +68,81 @@
67 68
68 IMPLEMENTATION: 69 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128; 70 This implementation limits maximal queue length to 128;
70 maximal mtu to 2^15-1; number of hash buckets to 1024. 71 max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
71 The only goal of this restrictions was that all data 72 The only goal of this restrictions was that all data
72 fit into one 4K page :-). Struct sfq_sched_data is 73 fit into one 4K page on 32bit arches.
73 organized in anti-cache manner: all the data for a bucket
74 are scattered over different locations. This is not good,
75 but it allowed me to put it into 4K.
76 74
77 It is easy to increase these values, but not in flight. */ 75 It is easy to increase these values, but not in flight. */
78 76
79#define SFQ_DEPTH 128 77#define SFQ_DEPTH 128 /* max number of packets per flow */
80#define SFQ_HASH_DIVISOR 1024 78#define SFQ_SLOTS 128 /* max number of flows */
79#define SFQ_EMPTY_SLOT 255
80#define SFQ_DEFAULT_HASH_DIVISOR 1024
81 81
82/* This type should contain at least SFQ_DEPTH*2 values */ 82/* We use 16 bits to store allot, and want to handle packets up to 64K
83 * Scale allot by 8 (1<<3) so that no overflow occurs.
84 */
85#define SFQ_ALLOT_SHIFT 3
86#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
87
88/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
83typedef unsigned char sfq_index; 89typedef unsigned char sfq_index;
84 90
85struct sfq_head 91/*
86{ 92 * We dont use pointers to save space.
93 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
94 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
95 * are 'pointers' to dep[] array
96 */
97struct sfq_head {
87 sfq_index next; 98 sfq_index next;
88 sfq_index prev; 99 sfq_index prev;
89}; 100};
90 101
91struct sfq_sched_data 102struct sfq_slot {
92{ 103 struct sk_buff *skblist_next;
104 struct sk_buff *skblist_prev;
105 sfq_index qlen; /* number of skbs in skblist */
106 sfq_index next; /* next slot in sfq chain */
107 struct sfq_head dep; /* anchor in dep[] chains */
108 unsigned short hash; /* hash value (index in ht[]) */
109 short allot; /* credit for this slot */
110};
111
112struct sfq_sched_data {
93/* Parameters */ 113/* Parameters */
94 int perturb_period; 114 int perturb_period;
95 unsigned quantum; /* Allotment per round: MUST BE >= MTU */ 115 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
96 int limit; 116 int limit;
97 117 unsigned int divisor; /* number of slots in hash table */
98/* Variables */ 118/* Variables */
99 struct tcf_proto *filter_list; 119 struct tcf_proto *filter_list;
100 struct timer_list perturb_timer; 120 struct timer_list perturb_timer;
101 u32 perturbation; 121 u32 perturbation;
102 sfq_index tail; /* Index of current slot in round */ 122 sfq_index cur_depth; /* depth of longest slot */
103 sfq_index max_depth; /* Maximal depth */ 123 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
104 124 struct sfq_slot *tail; /* current slot in round */
105 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ 125 sfq_index *ht; /* Hash table (divisor slots) */
106 sfq_index next[SFQ_DEPTH]; /* Active slots link */ 126 struct sfq_slot slots[SFQ_SLOTS];
107 short allot[SFQ_DEPTH]; /* Current allotment per slot */ 127 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
108 unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109 struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110 struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
111}; 128};
112 129
113static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) 130/*
131 * sfq_head are either in a sfq_slot or in dep[] array
132 */
133static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
114{ 134{
115 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); 135 if (val < SFQ_SLOTS)
136 return &q->slots[val].dep;
137 return &q->dep[val - SFQ_SLOTS];
116} 138}
117 139
118static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) 140static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
141{
142 return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
143}
144
145static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
119{ 146{
120 u32 h, h2; 147 u32 h, h2;
121 148
@@ -123,40 +150,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 150 case htons(ETH_P_IP):
124 { 151 {
125 const struct iphdr *iph; 152 const struct iphdr *iph;
153 int poff;
126 154
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 155 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 156 goto err;
129 iph = ip_hdr(skb); 157 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 158 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 159 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 160 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 161 break;
134 iph->protocol == IPPROTO_UDP || 162 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 163 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 165 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 166 h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 167 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 168 break;
142 } 169 }
143 case htons(ETH_P_IPV6): 170 case htons(ETH_P_IPV6):
144 { 171 {
145 struct ipv6hdr *iph; 172 const struct ipv6hdr *iph;
173 int poff;
146 174
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 175 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 176 goto err;
149 iph = ipv6_hdr(skb); 177 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 178 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 179 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 180 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 181 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 183 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 184 h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 185 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 186 break;
161 } 187 }
162 default: 188 default:
@@ -177,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
177 203
178 if (TC_H_MAJ(skb->priority) == sch->handle && 204 if (TC_H_MAJ(skb->priority) == sch->handle &&
179 TC_H_MIN(skb->priority) > 0 && 205 TC_H_MIN(skb->priority) > 0 &&
180 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) 206 TC_H_MIN(skb->priority) <= q->divisor)
181 return TC_H_MIN(skb->priority); 207 return TC_H_MIN(skb->priority);
182 208
183 if (!q->filter_list) 209 if (!q->filter_list)
@@ -195,36 +221,47 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
195 return 0; 221 return 0;
196 } 222 }
197#endif 223#endif
198 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) 224 if (TC_H_MIN(res.classid) <= q->divisor)
199 return TC_H_MIN(res.classid); 225 return TC_H_MIN(res.classid);
200 } 226 }
201 return 0; 227 return 0;
202} 228}
203 229
230/*
231 * x : slot number [0 .. SFQ_SLOTS - 1]
232 */
204static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 233static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
205{ 234{
206 sfq_index p, n; 235 sfq_index p, n;
207 int d = q->qs[x].qlen + SFQ_DEPTH; 236 int qlen = q->slots[x].qlen;
208 237
209 p = d; 238 p = qlen + SFQ_SLOTS;
210 n = q->dep[d].next; 239 n = q->dep[qlen].next;
211 q->dep[x].next = n; 240
212 q->dep[x].prev = p; 241 q->slots[x].dep.next = n;
213 q->dep[p].next = q->dep[n].prev = x; 242 q->slots[x].dep.prev = p;
243
244 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
245 sfq_dep_head(q, n)->prev = x;
214} 246}
215 247
248#define sfq_unlink(q, x, n, p) \
249 n = q->slots[x].dep.next; \
250 p = q->slots[x].dep.prev; \
251 sfq_dep_head(q, p)->next = n; \
252 sfq_dep_head(q, n)->prev = p
253
254
216static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) 255static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
217{ 256{
218 sfq_index p, n; 257 sfq_index p, n;
258 int d;
219 259
220 n = q->dep[x].next; 260 sfq_unlink(q, x, n, p);
221 p = q->dep[x].prev;
222 q->dep[p].next = n;
223 q->dep[n].prev = p;
224
225 if (n == p && q->max_depth == q->qs[x].qlen + 1)
226 q->max_depth--;
227 261
262 d = q->slots[x].qlen--;
263 if (n == p && q->cur_depth == d)
264 q->cur_depth--;
228 sfq_link(q, x); 265 sfq_link(q, x);
229} 266}
230 267
@@ -233,34 +270,74 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
233 sfq_index p, n; 270 sfq_index p, n;
234 int d; 271 int d;
235 272
236 n = q->dep[x].next; 273 sfq_unlink(q, x, n, p);
237 p = q->dep[x].prev;
238 q->dep[p].next = n;
239 q->dep[n].prev = p;
240 d = q->qs[x].qlen;
241 if (q->max_depth < d)
242 q->max_depth = d;
243 274
275 d = ++q->slots[x].qlen;
276 if (q->cur_depth < d)
277 q->cur_depth = d;
244 sfq_link(q, x); 278 sfq_link(q, x);
245} 279}
246 280
281/* helper functions : might be changed when/if skb use a standard list_head */
282
283/* remove one skb from tail of slot queue */
284static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
285{
286 struct sk_buff *skb = slot->skblist_prev;
287
288 slot->skblist_prev = skb->prev;
289 skb->prev->next = (struct sk_buff *)slot;
290 skb->next = skb->prev = NULL;
291 return skb;
292}
293
294/* remove one skb from head of slot queue */
295static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
296{
297 struct sk_buff *skb = slot->skblist_next;
298
299 slot->skblist_next = skb->next;
300 skb->next->prev = (struct sk_buff *)slot;
301 skb->next = skb->prev = NULL;
302 return skb;
303}
304
305static inline void slot_queue_init(struct sfq_slot *slot)
306{
307 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
308}
309
310/* add skb to slot queue (tail add) */
311static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
312{
313 skb->prev = slot->skblist_prev;
314 skb->next = (struct sk_buff *)slot;
315 slot->skblist_prev->next = skb;
316 slot->skblist_prev = skb;
317}
318
319#define slot_queue_walk(slot, skb) \
320 for (skb = slot->skblist_next; \
321 skb != (struct sk_buff *)slot; \
322 skb = skb->next)
323
247static unsigned int sfq_drop(struct Qdisc *sch) 324static unsigned int sfq_drop(struct Qdisc *sch)
248{ 325{
249 struct sfq_sched_data *q = qdisc_priv(sch); 326 struct sfq_sched_data *q = qdisc_priv(sch);
250 sfq_index d = q->max_depth; 327 sfq_index x, d = q->cur_depth;
251 struct sk_buff *skb; 328 struct sk_buff *skb;
252 unsigned int len; 329 unsigned int len;
330 struct sfq_slot *slot;
253 331
254 /* Queue is full! Find the longest slot and 332 /* Queue is full! Find the longest slot and drop tail packet from it */
255 drop a packet from it */
256
257 if (d > 1) { 333 if (d > 1) {
258 sfq_index x = q->dep[d + SFQ_DEPTH].next; 334 x = q->dep[d].next;
259 skb = q->qs[x].prev; 335 slot = &q->slots[x];
336drop:
337 skb = slot_dequeue_tail(slot);
260 len = qdisc_pkt_len(skb); 338 len = qdisc_pkt_len(skb);
261 __skb_unlink(skb, &q->qs[x]);
262 kfree_skb(skb);
263 sfq_dec(q, x); 339 sfq_dec(q, x);
340 kfree_skb(skb);
264 sch->q.qlen--; 341 sch->q.qlen--;
265 sch->qstats.drops++; 342 sch->qstats.drops++;
266 sch->qstats.backlog -= len; 343 sch->qstats.backlog -= len;
@@ -269,19 +346,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
269 346
270 if (d == 1) { 347 if (d == 1) {
271 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 348 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
272 d = q->next[q->tail]; 349 x = q->tail->next;
273 q->next[q->tail] = q->next[d]; 350 slot = &q->slots[x];
274 q->allot[q->next[d]] += q->quantum; 351 q->tail->next = slot->next;
275 skb = q->qs[d].prev; 352 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
276 len = qdisc_pkt_len(skb); 353 goto drop;
277 __skb_unlink(skb, &q->qs[d]);
278 kfree_skb(skb);
279 sfq_dec(q, d);
280 sch->q.qlen--;
281 q->ht[q->hash[d]] = SFQ_DEPTH;
282 sch->qstats.drops++;
283 sch->qstats.backlog -= len;
284 return len;
285 } 354 }
286 355
287 return 0; 356 return 0;
@@ -292,7 +361,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
292{ 361{
293 struct sfq_sched_data *q = qdisc_priv(sch); 362 struct sfq_sched_data *q = qdisc_priv(sch);
294 unsigned int hash; 363 unsigned int hash;
295 sfq_index x; 364 sfq_index x, qlen;
365 struct sfq_slot *slot;
296 int uninitialized_var(ret); 366 int uninitialized_var(ret);
297 367
298 hash = sfq_classify(skb, sch, &ret); 368 hash = sfq_classify(skb, sch, &ret);
@@ -305,54 +375,42 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
305 hash--; 375 hash--;
306 376
307 x = q->ht[hash]; 377 x = q->ht[hash];
308 if (x == SFQ_DEPTH) { 378 slot = &q->slots[x];
309 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 379 if (x == SFQ_EMPTY_SLOT) {
310 q->hash[x] = hash; 380 x = q->dep[0].next; /* get a free slot */
381 q->ht[hash] = x;
382 slot = &q->slots[x];
383 slot->hash = hash;
311 } 384 }
312 385
313 /* If selected queue has length q->limit, this means that 386 /* If selected queue has length q->limit, do simple tail drop,
314 * all another queues are empty and that we do simple tail drop,
315 * i.e. drop _this_ packet. 387 * i.e. drop _this_ packet.
316 */ 388 */
317 if (q->qs[x].qlen >= q->limit) 389 if (slot->qlen >= q->limit)
318 return qdisc_drop(skb, sch); 390 return qdisc_drop(skb, sch);
319 391
320 sch->qstats.backlog += qdisc_pkt_len(skb); 392 sch->qstats.backlog += qdisc_pkt_len(skb);
321 __skb_queue_tail(&q->qs[x], skb); 393 slot_queue_add(slot, skb);
322 sfq_inc(q, x); 394 sfq_inc(q, x);
323 if (q->qs[x].qlen == 1) { /* The flow is new */ 395 if (slot->qlen == 1) { /* The flow is new */
324 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 396 if (q->tail == NULL) { /* It is the first flow */
325 q->tail = x; 397 slot->next = x;
326 q->next[x] = x;
327 q->allot[x] = q->quantum;
328 } else { 398 } else {
329 q->next[x] = q->next[q->tail]; 399 slot->next = q->tail->next;
330 q->next[q->tail] = x; 400 q->tail->next = x;
331 q->tail = x;
332 } 401 }
402 q->tail = slot;
403 slot->allot = q->scaled_quantum;
333 } 404 }
334 if (++sch->q.qlen <= q->limit) { 405 if (++sch->q.qlen <= q->limit)
335 sch->bstats.bytes += qdisc_pkt_len(skb);
336 sch->bstats.packets++;
337 return NET_XMIT_SUCCESS; 406 return NET_XMIT_SUCCESS;
338 }
339 407
408 qlen = slot->qlen;
340 sfq_drop(sch); 409 sfq_drop(sch);
341 return NET_XMIT_CN; 410 /* Return Congestion Notification only if we dropped a packet
342} 411 * from this flow.
343 412 */
344static struct sk_buff * 413 return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS;
345sfq_peek(struct Qdisc *sch)
346{
347 struct sfq_sched_data *q = qdisc_priv(sch);
348 sfq_index a;
349
350 /* No active slots */
351 if (q->tail == SFQ_DEPTH)
352 return NULL;
353
354 a = q->next[q->tail];
355 return skb_peek(&q->qs[a]);
356} 414}
357 415
358static struct sk_buff * 416static struct sk_buff *
@@ -360,34 +418,38 @@ sfq_dequeue(struct Qdisc *sch)
360{ 418{
361 struct sfq_sched_data *q = qdisc_priv(sch); 419 struct sfq_sched_data *q = qdisc_priv(sch);
362 struct sk_buff *skb; 420 struct sk_buff *skb;
363 sfq_index a, old_a; 421 sfq_index a, next_a;
422 struct sfq_slot *slot;
364 423
365 /* No active slots */ 424 /* No active slots */
366 if (q->tail == SFQ_DEPTH) 425 if (q->tail == NULL)
367 return NULL; 426 return NULL;
368 427
369 a = old_a = q->next[q->tail]; 428next_slot:
370 429 a = q->tail->next;
371 /* Grab packet */ 430 slot = &q->slots[a];
372 skb = __skb_dequeue(&q->qs[a]); 431 if (slot->allot <= 0) {
432 q->tail = slot;
433 slot->allot += q->scaled_quantum;
434 goto next_slot;
435 }
436 skb = slot_dequeue_head(slot);
373 sfq_dec(q, a); 437 sfq_dec(q, a);
438 qdisc_bstats_update(sch, skb);
374 sch->q.qlen--; 439 sch->q.qlen--;
375 sch->qstats.backlog -= qdisc_pkt_len(skb); 440 sch->qstats.backlog -= qdisc_pkt_len(skb);
376 441
377 /* Is the slot empty? */ 442 /* Is the slot empty? */
378 if (q->qs[a].qlen == 0) { 443 if (slot->qlen == 0) {
379 q->ht[q->hash[a]] = SFQ_DEPTH; 444 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
380 a = q->next[a]; 445 next_a = slot->next;
381 if (a == old_a) { 446 if (a == next_a) {
382 q->tail = SFQ_DEPTH; 447 q->tail = NULL; /* no more active slots */
383 return skb; 448 return skb;
384 } 449 }
385 q->next[q->tail] = a; 450 q->tail->next = next_a;
386 q->allot[a] += q->quantum; 451 } else {
387 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { 452 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
388 q->tail = a;
389 a = q->next[a];
390 q->allot[a] += q->quantum;
391 } 453 }
392 return skb; 454 return skb;
393} 455}
@@ -421,12 +483,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
421 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 483 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
422 return -EINVAL; 484 return -EINVAL;
423 485
486 if (ctl->divisor &&
487 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
488 return -EINVAL;
489
424 sch_tree_lock(sch); 490 sch_tree_lock(sch);
425 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); 491 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
492 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
426 q->perturb_period = ctl->perturb_period * HZ; 493 q->perturb_period = ctl->perturb_period * HZ;
427 if (ctl->limit) 494 if (ctl->limit)
428 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 495 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
429 496 if (ctl->divisor)
497 q->divisor = ctl->divisor;
430 qlen = sch->q.qlen; 498 qlen = sch->q.qlen;
431 while (sch->q.qlen > q->limit) 499 while (sch->q.qlen > q->limit)
432 sfq_drop(sch); 500 sfq_drop(sch);
@@ -444,26 +512,25 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
444static int sfq_init(struct Qdisc *sch, struct nlattr *opt) 512static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
445{ 513{
446 struct sfq_sched_data *q = qdisc_priv(sch); 514 struct sfq_sched_data *q = qdisc_priv(sch);
515 size_t sz;
447 int i; 516 int i;
448 517
449 q->perturb_timer.function = sfq_perturbation; 518 q->perturb_timer.function = sfq_perturbation;
450 q->perturb_timer.data = (unsigned long)sch; 519 q->perturb_timer.data = (unsigned long)sch;
451 init_timer_deferrable(&q->perturb_timer); 520 init_timer_deferrable(&q->perturb_timer);
452 521
453 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
454 q->ht[i] = SFQ_DEPTH;
455
456 for (i = 0; i < SFQ_DEPTH; i++) { 522 for (i = 0; i < SFQ_DEPTH; i++) {
457 skb_queue_head_init(&q->qs[i]); 523 q->dep[i].next = i + SFQ_SLOTS;
458 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; 524 q->dep[i].prev = i + SFQ_SLOTS;
459 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
460 } 525 }
461 526
462 q->limit = SFQ_DEPTH - 1; 527 q->limit = SFQ_DEPTH - 1;
463 q->max_depth = 0; 528 q->cur_depth = 0;
464 q->tail = SFQ_DEPTH; 529 q->tail = NULL;
530 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
465 if (opt == NULL) { 531 if (opt == NULL) {
466 q->quantum = psched_mtu(qdisc_dev(sch)); 532 q->quantum = psched_mtu(qdisc_dev(sch));
533 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
467 q->perturb_period = 0; 534 q->perturb_period = 0;
468 q->perturbation = net_random(); 535 q->perturbation = net_random();
469 } else { 536 } else {
@@ -472,8 +539,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
472 return err; 539 return err;
473 } 540 }
474 541
475 for (i = 0; i < SFQ_DEPTH; i++) 542 sz = sizeof(q->ht[0]) * q->divisor;
543 q->ht = kmalloc(sz, GFP_KERNEL);
544 if (!q->ht && sz > PAGE_SIZE)
545 q->ht = vmalloc(sz);
546 if (!q->ht)
547 return -ENOMEM;
548 for (i = 0; i < q->divisor; i++)
549 q->ht[i] = SFQ_EMPTY_SLOT;
550
551 for (i = 0; i < SFQ_SLOTS; i++) {
552 slot_queue_init(&q->slots[i]);
476 sfq_link(q, i); 553 sfq_link(q, i);
554 }
555 if (q->limit >= 1)
556 sch->flags |= TCQ_F_CAN_BYPASS;
557 else
558 sch->flags &= ~TCQ_F_CAN_BYPASS;
477 return 0; 559 return 0;
478} 560}
479 561
@@ -484,6 +566,10 @@ static void sfq_destroy(struct Qdisc *sch)
484 tcf_destroy_chain(&q->filter_list); 566 tcf_destroy_chain(&q->filter_list);
485 q->perturb_period = 0; 567 q->perturb_period = 0;
486 del_timer_sync(&q->perturb_timer); 568 del_timer_sync(&q->perturb_timer);
569 if (is_vmalloc_addr(q->ht))
570 vfree(q->ht);
571 else
572 kfree(q->ht);
487} 573}
488 574
489static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 575static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -496,7 +582,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
496 opt.perturb_period = q->perturb_period / HZ; 582 opt.perturb_period = q->perturb_period / HZ;
497 583
498 opt.limit = q->limit; 584 opt.limit = q->limit;
499 opt.divisor = SFQ_HASH_DIVISOR; 585 opt.divisor = q->divisor;
500 opt.flows = q->limit; 586 opt.flows = q->limit;
501 587
502 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 588 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -521,6 +607,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
521static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, 607static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
522 u32 classid) 608 u32 classid)
523{ 609{
610 /* we cannot bypass queue discipline anymore */
611 sch->flags &= ~TCQ_F_CAN_BYPASS;
524 return 0; 612 return 0;
525} 613}
526 614
@@ -548,10 +636,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
548 struct gnet_dump *d) 636 struct gnet_dump *d)
549{ 637{
550 struct sfq_sched_data *q = qdisc_priv(sch); 638 struct sfq_sched_data *q = qdisc_priv(sch);
551 sfq_index idx = q->ht[cl-1]; 639 sfq_index idx = q->ht[cl - 1];
552 struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; 640 struct gnet_stats_queue qs = { 0 };
553 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; 641 struct tc_sfq_xstats xstats = { 0 };
642 struct sk_buff *skb;
554 643
644 if (idx != SFQ_EMPTY_SLOT) {
645 const struct sfq_slot *slot = &q->slots[idx];
646
647 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
648 qs.qlen = slot->qlen;
649 slot_queue_walk(slot, skb)
650 qs.backlog += qdisc_pkt_len(skb);
651 }
555 if (gnet_stats_copy_queue(d, &qs) < 0) 652 if (gnet_stats_copy_queue(d, &qs) < 0)
556 return -1; 653 return -1;
557 return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 654 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -565,8 +662,8 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
565 if (arg->stop) 662 if (arg->stop)
566 return; 663 return;
567 664
568 for (i = 0; i < SFQ_HASH_DIVISOR; i++) { 665 for (i = 0; i < q->divisor; i++) {
569 if (q->ht[i] == SFQ_DEPTH || 666 if (q->ht[i] == SFQ_EMPTY_SLOT ||
570 arg->count < arg->skip) { 667 arg->count < arg->skip) {
571 arg->count++; 668 arg->count++;
572 continue; 669 continue;
@@ -597,7 +694,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
597 .priv_size = sizeof(struct sfq_sched_data), 694 .priv_size = sizeof(struct sfq_sched_data),
598 .enqueue = sfq_enqueue, 695 .enqueue = sfq_enqueue,
599 .dequeue = sfq_dequeue, 696 .dequeue = sfq_dequeue,
600 .peek = sfq_peek, 697 .peek = qdisc_peek_dequeued,
601 .drop = sfq_drop, 698 .drop = sfq_drop,
602 .init = sfq_init, 699 .init = sfq_init,
603 .reset = sfq_reset, 700 .reset = sfq_reset,