aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-12-20 07:54:58 -0500
committerDavid S. Miller <davem@davemloft.net>2010-12-21 00:32:59 -0500
commiteda83e3b63e88351310c13c99178eb4634f137b2 (patch)
tree55b9c1f75337a8ca4032e607405e370b437c398e
parentd9993be65a77f500ae926176baa264816bfe3816 (diff)
net_sched: sch_sfq: better struct layouts
Here is a respin of patch. I'll send a short patch to make SFQ more fair in presence of large packets as well. Thanks [PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts This patch shrinks sizeof(struct sfq_sched_data) from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and reduce text size as well. text data bss dec hex filename 4821 152 0 4973 136d old/net/sched/sch_sfq.o 4627 136 0 4763 129b new/net/sched/sch_sfq.o All data for a slot/flow is now grouped in a compact and cache friendly structure, instead of being spreaded in many different points. struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev; sfq_index qlen; /* number of skbs in skblist */ sfq_index next; /* next slot in sfq chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ }; Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Jarek Poplawski <jarkao2@gmail.com> Cc: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sched/sch_sfq.c260
1 files changed, 162 insertions, 98 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42396c965dd6..13322e8a0456 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,27 +67,42 @@
67 67
68 IMPLEMENTATION: 68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128; 69 This implementation limits maximal queue length to 128;
70 maximal mtu to 2^15-1; number of hash buckets to 1024. 70 maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
71 The only goal of this restrictions was that all data 71 The only goal of this restrictions was that all data
72 fit into one 4K page :-). Struct sfq_sched_data is 72 fit into one 4K page on 32bit arches.
73 organized in anti-cache manner: all the data for a bucket
74 are scattered over different locations. This is not good,
75 but it allowed me to put it into 4K.
76 73
77 It is easy to increase these values, but not in flight. */ 74 It is easy to increase these values, but not in flight. */
78 75
79#define SFQ_DEPTH 128 76#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255
80#define SFQ_HASH_DIVISOR 1024 79#define SFQ_HASH_DIVISOR 1024
81 80
82/* This type should contain at least SFQ_DEPTH*2 values */ 81/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
83typedef unsigned char sfq_index; 82typedef unsigned char sfq_index;
84 83
84/*
85 * We dont use pointers to save space.
86 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
87 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
88 * are 'pointers' to dep[] array
89 */
85struct sfq_head 90struct sfq_head
86{ 91{
87 sfq_index next; 92 sfq_index next;
88 sfq_index prev; 93 sfq_index prev;
89}; 94};
90 95
96struct sfq_slot {
97 struct sk_buff *skblist_next;
98 struct sk_buff *skblist_prev;
99 sfq_index qlen; /* number of skbs in skblist */
100 sfq_index next; /* next slot in sfq chain */
101 struct sfq_head dep; /* anchor in dep[] chains */
102 unsigned short hash; /* hash value (index in ht[]) */
103 short allot; /* credit for this slot */
104};
105
91struct sfq_sched_data 106struct sfq_sched_data
92{ 107{
93/* Parameters */ 108/* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
99 struct tcf_proto *filter_list; 114 struct tcf_proto *filter_list;
100 struct timer_list perturb_timer; 115 struct timer_list perturb_timer;
101 u32 perturbation; 116 u32 perturbation;
102 sfq_index tail; /* Index of current slot in round */ 117 sfq_index cur_depth; /* depth of longest slot */
103 sfq_index max_depth; /* Maximal depth */
104 118
119 struct sfq_slot *tail; /* current slot in round */
105 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ 120 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
106 sfq_index next[SFQ_DEPTH]; /* Active slots link */ 121 struct sfq_slot slots[SFQ_SLOTS];
107 short allot[SFQ_DEPTH]; /* Current allotment per slot */ 122 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
108 unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109 struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110 struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
111}; 123};
112 124
125/*
126 * sfq_head are either in a sfq_slot or in dep[] array
127 */
128static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
129{
130 if (val < SFQ_SLOTS)
131 return &q->slots[val].dep;
132 return &q->dep[val - SFQ_SLOTS];
133}
134
113static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) 135static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
114{ 136{
115 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); 137 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
200 return 0; 222 return 0;
201} 223}
202 224
225/*
226 * x : slot number [0 .. SFQ_SLOTS - 1]
227 */
203static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 228static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
204{ 229{
205 sfq_index p, n; 230 sfq_index p, n;
206 int d = q->qs[x].qlen + SFQ_DEPTH; 231 int qlen = q->slots[x].qlen;
232
233 p = qlen + SFQ_SLOTS;
234 n = q->dep[qlen].next;
207 235
208 p = d; 236 q->slots[x].dep.next = n;
209 n = q->dep[d].next; 237 q->slots[x].dep.prev = p;
210 q->dep[x].next = n; 238
211 q->dep[x].prev = p; 239 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
212 q->dep[p].next = q->dep[n].prev = x; 240 sfq_dep_head(q, n)->prev = x;
213} 241}
214 242
243#define sfq_unlink(q, x, n, p) \
244 n = q->slots[x].dep.next; \
245 p = q->slots[x].dep.prev; \
246 sfq_dep_head(q, p)->next = n; \
247 sfq_dep_head(q, n)->prev = p
248
249
215static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) 250static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
216{ 251{
217 sfq_index p, n; 252 sfq_index p, n;
253 int d;
218 254
219 n = q->dep[x].next; 255 sfq_unlink(q, x, n, p);
220 p = q->dep[x].prev;
221 q->dep[p].next = n;
222 q->dep[n].prev = p;
223
224 if (n == p && q->max_depth == q->qs[x].qlen + 1)
225 q->max_depth--;
226 256
257 d = q->slots[x].qlen--;
258 if (n == p && q->cur_depth == d)
259 q->cur_depth--;
227 sfq_link(q, x); 260 sfq_link(q, x);
228} 261}
229 262
@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
232 sfq_index p, n; 265 sfq_index p, n;
233 int d; 266 int d;
234 267
235 n = q->dep[x].next; 268 sfq_unlink(q, x, n, p);
236 p = q->dep[x].prev;
237 q->dep[p].next = n;
238 q->dep[n].prev = p;
239 d = q->qs[x].qlen;
240 if (q->max_depth < d)
241 q->max_depth = d;
242 269
270 d = ++q->slots[x].qlen;
271 if (q->cur_depth < d)
272 q->cur_depth = d;
243 sfq_link(q, x); 273 sfq_link(q, x);
244} 274}
245 275
276/* helper functions : might be changed when/if skb use a standard list_head */
277
278/* remove one skb from tail of slot queue */
279static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
280{
281 struct sk_buff *skb = slot->skblist_prev;
282
283 slot->skblist_prev = skb->prev;
284 skb->next = skb->prev = NULL;
285 return skb;
286}
287
288/* remove one skb from head of slot queue */
289static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
290{
291 struct sk_buff *skb = slot->skblist_next;
292
293 slot->skblist_next = skb->next;
294 skb->next = skb->prev = NULL;
295 return skb;
296}
297
298static inline void slot_queue_init(struct sfq_slot *slot)
299{
300 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
301}
302
303/* add skb to slot queue (tail add) */
304static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
305{
306 skb->prev = slot->skblist_prev;
307 skb->next = (struct sk_buff *)slot;
308 slot->skblist_prev->next = skb;
309 slot->skblist_prev = skb;
310}
311
312#define slot_queue_walk(slot, skb) \
313 for (skb = slot->skblist_next; \
314 skb != (struct sk_buff *)slot; \
315 skb = skb->next)
316
246static unsigned int sfq_drop(struct Qdisc *sch) 317static unsigned int sfq_drop(struct Qdisc *sch)
247{ 318{
248 struct sfq_sched_data *q = qdisc_priv(sch); 319 struct sfq_sched_data *q = qdisc_priv(sch);
249 sfq_index d = q->max_depth; 320 sfq_index x, d = q->cur_depth;
250 struct sk_buff *skb; 321 struct sk_buff *skb;
251 unsigned int len; 322 unsigned int len;
323 struct sfq_slot *slot;
252 324
253 /* Queue is full! Find the longest slot and 325 /* Queue is full! Find the longest slot and drop tail packet from it */
254 drop a packet from it */
255
256 if (d > 1) { 326 if (d > 1) {
257 sfq_index x = q->dep[d + SFQ_DEPTH].next; 327 x = q->dep[d].next;
258 skb = q->qs[x].prev; 328 slot = &q->slots[x];
329drop:
330 skb = slot_dequeue_tail(slot);
259 len = qdisc_pkt_len(skb); 331 len = qdisc_pkt_len(skb);
260 __skb_unlink(skb, &q->qs[x]);
261 kfree_skb(skb);
262 sfq_dec(q, x); 332 sfq_dec(q, x);
333 kfree_skb(skb);
263 sch->q.qlen--; 334 sch->q.qlen--;
264 sch->qstats.drops++; 335 sch->qstats.drops++;
265 sch->qstats.backlog -= len; 336 sch->qstats.backlog -= len;
@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
268 339
269 if (d == 1) { 340 if (d == 1) {
270 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 341 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
271 d = q->next[q->tail]; 342 x = q->tail->next;
272 q->next[q->tail] = q->next[d]; 343 slot = &q->slots[x];
273 skb = q->qs[d].prev; 344 q->tail->next = slot->next;
274 len = qdisc_pkt_len(skb); 345 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
275 __skb_unlink(skb, &q->qs[d]); 346 goto drop;
276 kfree_skb(skb);
277 sfq_dec(q, d);
278 sch->q.qlen--;
279 q->ht[q->hash[d]] = SFQ_DEPTH;
280 sch->qstats.drops++;
281 sch->qstats.backlog -= len;
282 return len;
283 } 347 }
284 348
285 return 0; 349 return 0;
@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
291 struct sfq_sched_data *q = qdisc_priv(sch); 355 struct sfq_sched_data *q = qdisc_priv(sch);
292 unsigned int hash; 356 unsigned int hash;
293 sfq_index x; 357 sfq_index x;
358 struct sfq_slot *slot;
294 int uninitialized_var(ret); 359 int uninitialized_var(ret);
295 360
296 hash = sfq_classify(skb, sch, &ret); 361 hash = sfq_classify(skb, sch, &ret);
@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
303 hash--; 368 hash--;
304 369
305 x = q->ht[hash]; 370 x = q->ht[hash];
306 if (x == SFQ_DEPTH) { 371 slot = &q->slots[x];
307 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 372 if (x == SFQ_EMPTY_SLOT) {
308 q->hash[x] = hash; 373 x = q->dep[0].next; /* get a free slot */
374 q->ht[hash] = x;
375 slot = &q->slots[x];
376 slot->hash = hash;
377 slot_queue_init(slot);
309 } 378 }
310 379
311 /* If selected queue has length q->limit, this means that 380 /* If selected queue has length q->limit, do simple tail drop,
312 * all another queues are empty and that we do simple tail drop,
313 * i.e. drop _this_ packet. 381 * i.e. drop _this_ packet.
314 */ 382 */
315 if (q->qs[x].qlen >= q->limit) 383 if (slot->qlen >= q->limit)
316 return qdisc_drop(skb, sch); 384 return qdisc_drop(skb, sch);
317 385
318 sch->qstats.backlog += qdisc_pkt_len(skb); 386 sch->qstats.backlog += qdisc_pkt_len(skb);
319 __skb_queue_tail(&q->qs[x], skb); 387 slot_queue_add(slot, skb);
320 sfq_inc(q, x); 388 sfq_inc(q, x);
321 if (q->qs[x].qlen == 1) { /* The flow is new */ 389 if (slot->qlen == 1) { /* The flow is new */
322 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 390 if (q->tail == NULL) { /* It is the first flow */
323 q->next[x] = x; 391 slot->next = x;
324 } else { 392 } else {
325 q->next[x] = q->next[q->tail]; 393 slot->next = q->tail->next;
326 q->next[q->tail] = x; 394 q->tail->next = x;
327 } 395 }
328 q->tail = x; 396 q->tail = slot;
329 q->allot[x] = q->quantum; 397 slot->allot = q->quantum;
330 } 398 }
331 if (++sch->q.qlen <= q->limit) { 399 if (++sch->q.qlen <= q->limit) {
332 sch->bstats.bytes += qdisc_pkt_len(skb); 400 sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -342,14 +410,12 @@ static struct sk_buff *
342sfq_peek(struct Qdisc *sch) 410sfq_peek(struct Qdisc *sch)
343{ 411{
344 struct sfq_sched_data *q = qdisc_priv(sch); 412 struct sfq_sched_data *q = qdisc_priv(sch);
345 sfq_index a;
346 413
347 /* No active slots */ 414 /* No active slots */
348 if (q->tail == SFQ_DEPTH) 415 if (q->tail == NULL)
349 return NULL; 416 return NULL;
350 417
351 a = q->next[q->tail]; 418 return q->slots[q->tail->next].skblist_next;
352 return skb_peek(&q->qs[a]);
353} 419}
354 420
355static struct sk_buff * 421static struct sk_buff *
@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
358 struct sfq_sched_data *q = qdisc_priv(sch); 424 struct sfq_sched_data *q = qdisc_priv(sch);
359 struct sk_buff *skb; 425 struct sk_buff *skb;
360 sfq_index a, next_a; 426 sfq_index a, next_a;
427 struct sfq_slot *slot;
361 428
362 /* No active slots */ 429 /* No active slots */
363 if (q->tail == SFQ_DEPTH) 430 if (q->tail == NULL)
364 return NULL; 431 return NULL;
365 432
366 a = q->next[q->tail]; 433 a = q->tail->next;
367 434 slot = &q->slots[a];
368 /* Grab packet */ 435 skb = slot_dequeue_head(slot);
369 skb = __skb_dequeue(&q->qs[a]);
370 sfq_dec(q, a); 436 sfq_dec(q, a);
371 sch->q.qlen--; 437 sch->q.qlen--;
372 sch->qstats.backlog -= qdisc_pkt_len(skb); 438 sch->qstats.backlog -= qdisc_pkt_len(skb);
373 439
374 /* Is the slot empty? */ 440 /* Is the slot empty? */
375 if (q->qs[a].qlen == 0) { 441 if (slot->qlen == 0) {
376 q->ht[q->hash[a]] = SFQ_DEPTH; 442 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
377 next_a = q->next[a]; 443 next_a = slot->next;
378 if (a == next_a) { 444 if (a == next_a) {
379 q->tail = SFQ_DEPTH; 445 q->tail = NULL; /* no more active slots */
380 return skb; 446 return skb;
381 } 447 }
382 q->next[q->tail] = next_a; 448 q->tail->next = next_a;
383 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { 449 } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
384 q->allot[a] += q->quantum; 450 q->tail = slot;
385 q->tail = a; 451 slot->allot += q->quantum;
386 } 452 }
387 return skb; 453 return skb;
388} 454}
@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
446 init_timer_deferrable(&q->perturb_timer); 512 init_timer_deferrable(&q->perturb_timer);
447 513
448 for (i = 0; i < SFQ_HASH_DIVISOR; i++) 514 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
449 q->ht[i] = SFQ_DEPTH; 515 q->ht[i] = SFQ_EMPTY_SLOT;
450 516
451 for (i = 0; i < SFQ_DEPTH; i++) { 517 for (i = 0; i < SFQ_DEPTH; i++) {
452 skb_queue_head_init(&q->qs[i]); 518 q->dep[i].next = i + SFQ_SLOTS;
453 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; 519 q->dep[i].prev = i + SFQ_SLOTS;
454 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
455 } 520 }
456 521
457 q->limit = SFQ_DEPTH - 1; 522 q->limit = SFQ_DEPTH - 1;
458 q->max_depth = 0; 523 q->cur_depth = 0;
459 q->tail = SFQ_DEPTH; 524 q->tail = NULL;
460 if (opt == NULL) { 525 if (opt == NULL) {
461 q->quantum = psched_mtu(qdisc_dev(sch)); 526 q->quantum = psched_mtu(qdisc_dev(sch));
462 q->perturb_period = 0; 527 q->perturb_period = 0;
@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
467 return err; 532 return err;
468 } 533 }
469 534
470 for (i = 0; i < SFQ_DEPTH; i++) 535 for (i = 0; i < SFQ_SLOTS; i++)
471 sfq_link(q, i); 536 sfq_link(q, i);
472 return 0; 537 return 0;
473} 538}
@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
543 struct gnet_dump *d) 608 struct gnet_dump *d)
544{ 609{
545 struct sfq_sched_data *q = qdisc_priv(sch); 610 struct sfq_sched_data *q = qdisc_priv(sch);
546 sfq_index idx = q->ht[cl-1]; 611 const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
547 struct sk_buff_head *list = &q->qs[idx]; 612 struct gnet_stats_queue qs = { .qlen = slot->qlen };
548 struct gnet_stats_queue qs = { .qlen = list->qlen }; 613 struct tc_sfq_xstats xstats = { .allot = slot->allot };
549 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
550 struct sk_buff *skb; 614 struct sk_buff *skb;
551 615
552 skb_queue_walk(list, skb) 616 slot_queue_walk(slot, skb)
553 qs.backlog += qdisc_pkt_len(skb); 617 qs.backlog += qdisc_pkt_len(skb);
554 618
555 if (gnet_stats_copy_queue(d, &qs) < 0) 619 if (gnet_stats_copy_queue(d, &qs) < 0)
@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
566 return; 630 return;
567 631
568 for (i = 0; i < SFQ_HASH_DIVISOR; i++) { 632 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
569 if (q->ht[i] == SFQ_DEPTH || 633 if (q->ht[i] == SFQ_EMPTY_SLOT ||
570 arg->count < arg->skip) { 634 arg->count < arg->skip) {
571 arg->count++; 635 arg->count++;
572 continue; 636 continue;