aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/sch_api.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-20 20:43:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-20 20:43:29 -0400
commitdb6d8c7a4027b48d797b369a53f8470aaeed7063 (patch)
treee140c104a89abc2154e1f41a7db8ebecbb6fa0b4 /net/sched/sch_api.c
parent3a533374283aea50eab3976d8a6d30532175f009 (diff)
parentfb65a7c091529bfffb1262515252c0d0f6241c5c (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (1232 commits) iucv: Fix bad merging. net_sched: Add size table for qdiscs net_sched: Add accessor function for packet length for qdiscs net_sched: Add qdisc_enqueue wrapper highmem: Export totalhigh_pages. ipv6 mcast: Omit redundant address family checks in ip6_mc_source(). net: Use standard structures for generic socket address structures. ipv6 netns: Make several "global" sysctl variables namespace aware. netns: Use net_eq() to compare net-namespaces for optimization. ipv6: remove unused macros from net/ipv6.h ipv6: remove unused parameter from ip6_ra_control tcp: fix kernel panic with listening_get_next tcp: Remove redundant checks when setting eff_sacks tcp: options clean up tcp: Fix MD5 signatures for non-linear skbs sctp: Update sctp global memory limit allocations. sctp: remove unnecessary byteshifting, calculate directly in big-endian sctp: Allow only 1 listening socket with SO_REUSEADDR sctp: Do not leak memory on multiple listen() calls sctp: Support ipv6only AF_INET6 sockets. ...
Diffstat (limited to 'net/sched/sch_api.c')
-rw-r--r--net/sched/sch_api.c613
1 files changed, 496 insertions, 117 deletions
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 10f01ad04380..5219d5f9d754 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -99,7 +99,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
99 ---requeue 99 ---requeue
100 100
101 requeues once dequeued packet. It is used for non-standard or 101 requeues once dequeued packet. It is used for non-standard or
102 just buggy devices, which can defer output even if dev->tbusy=0. 102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
103 103
104 ---reset 104 ---reset
105 105
@@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc);
185 185
186struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 186struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
187{ 187{
188 struct Qdisc *q; 188 unsigned int i;
189
190 for (i = 0; i < dev->num_tx_queues; i++) {
191 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
192 struct Qdisc *q, *txq_root = txq->qdisc;
193
194 if (!(txq_root->flags & TCQ_F_BUILTIN) &&
195 txq_root->handle == handle)
196 return txq_root;
189 197
190 list_for_each_entry(q, &dev->qdisc_list, list) { 198 list_for_each_entry(q, &txq_root->list, list) {
191 if (q->handle == handle) 199 if (q->handle == handle)
192 return q; 200 return q;
201 }
193 } 202 }
194 return NULL; 203 return NULL;
195} 204}
@@ -277,15 +286,137 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
277} 286}
278EXPORT_SYMBOL(qdisc_put_rtab); 287EXPORT_SYMBOL(qdisc_put_rtab);
279 288
289static LIST_HEAD(qdisc_stab_list);
290static DEFINE_SPINLOCK(qdisc_stab_lock);
291
292static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
293 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
294 [TCA_STAB_DATA] = { .type = NLA_BINARY },
295};
296
297static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
298{
299 struct nlattr *tb[TCA_STAB_MAX + 1];
300 struct qdisc_size_table *stab;
301 struct tc_sizespec *s;
302 unsigned int tsize = 0;
303 u16 *tab = NULL;
304 int err;
305
306 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
307 if (err < 0)
308 return ERR_PTR(err);
309 if (!tb[TCA_STAB_BASE])
310 return ERR_PTR(-EINVAL);
311
312 s = nla_data(tb[TCA_STAB_BASE]);
313
314 if (s->tsize > 0) {
315 if (!tb[TCA_STAB_DATA])
316 return ERR_PTR(-EINVAL);
317 tab = nla_data(tb[TCA_STAB_DATA]);
318 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
319 }
320
321 if (!s || tsize != s->tsize || (!tab && tsize > 0))
322 return ERR_PTR(-EINVAL);
323
324 spin_lock(&qdisc_stab_lock);
325
326 list_for_each_entry(stab, &qdisc_stab_list, list) {
327 if (memcmp(&stab->szopts, s, sizeof(*s)))
328 continue;
329 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
330 continue;
331 stab->refcnt++;
332 spin_unlock(&qdisc_stab_lock);
333 return stab;
334 }
335
336 spin_unlock(&qdisc_stab_lock);
337
338 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
339 if (!stab)
340 return ERR_PTR(-ENOMEM);
341
342 stab->refcnt = 1;
343 stab->szopts = *s;
344 if (tsize > 0)
345 memcpy(stab->data, tab, tsize * sizeof(u16));
346
347 spin_lock(&qdisc_stab_lock);
348 list_add_tail(&stab->list, &qdisc_stab_list);
349 spin_unlock(&qdisc_stab_lock);
350
351 return stab;
352}
353
354void qdisc_put_stab(struct qdisc_size_table *tab)
355{
356 if (!tab)
357 return;
358
359 spin_lock(&qdisc_stab_lock);
360
361 if (--tab->refcnt == 0) {
362 list_del(&tab->list);
363 kfree(tab);
364 }
365
366 spin_unlock(&qdisc_stab_lock);
367}
368EXPORT_SYMBOL(qdisc_put_stab);
369
370static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
371{
372 struct nlattr *nest;
373
374 nest = nla_nest_start(skb, TCA_STAB);
375 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
376 nla_nest_end(skb, nest);
377
378 return skb->len;
379
380nla_put_failure:
381 return -1;
382}
383
384void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
385{
386 int pkt_len, slot;
387
388 pkt_len = skb->len + stab->szopts.overhead;
389 if (unlikely(!stab->szopts.tsize))
390 goto out;
391
392 slot = pkt_len + stab->szopts.cell_align;
393 if (unlikely(slot < 0))
394 slot = 0;
395
396 slot >>= stab->szopts.cell_log;
397 if (likely(slot < stab->szopts.tsize))
398 pkt_len = stab->data[slot];
399 else
400 pkt_len = stab->data[stab->szopts.tsize - 1] *
401 (slot / stab->szopts.tsize) +
402 stab->data[slot % stab->szopts.tsize];
403
404 pkt_len <<= stab->szopts.size_log;
405out:
406 if (unlikely(pkt_len < 1))
407 pkt_len = 1;
408 qdisc_skb_cb(skb)->pkt_len = pkt_len;
409}
410EXPORT_SYMBOL(qdisc_calculate_pkt_len);
411
280static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 412static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
281{ 413{
282 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 414 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
283 timer); 415 timer);
284 struct net_device *dev = wd->qdisc->dev;
285 416
286 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 417 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
287 smp_wmb(); 418 smp_wmb();
288 netif_schedule(dev); 419 __netif_schedule(wd->qdisc);
289 420
290 return HRTIMER_NORESTART; 421 return HRTIMER_NORESTART;
291} 422}
@@ -316,6 +447,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
316} 447}
317EXPORT_SYMBOL(qdisc_watchdog_cancel); 448EXPORT_SYMBOL(qdisc_watchdog_cancel);
318 449
450struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
451{
452 unsigned int size = n * sizeof(struct hlist_head), i;
453 struct hlist_head *h;
454
455 if (size <= PAGE_SIZE)
456 h = kmalloc(size, GFP_KERNEL);
457 else
458 h = (struct hlist_head *)
459 __get_free_pages(GFP_KERNEL, get_order(size));
460
461 if (h != NULL) {
462 for (i = 0; i < n; i++)
463 INIT_HLIST_HEAD(&h[i]);
464 }
465 return h;
466}
467
468static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
469{
470 unsigned int size = n * sizeof(struct hlist_head);
471
472 if (size <= PAGE_SIZE)
473 kfree(h);
474 else
475 free_pages((unsigned long)h, get_order(size));
476}
477
478void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
479{
480 struct Qdisc_class_common *cl;
481 struct hlist_node *n, *next;
482 struct hlist_head *nhash, *ohash;
483 unsigned int nsize, nmask, osize;
484 unsigned int i, h;
485
486 /* Rehash when load factor exceeds 0.75 */
487 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
488 return;
489 nsize = clhash->hashsize * 2;
490 nmask = nsize - 1;
491 nhash = qdisc_class_hash_alloc(nsize);
492 if (nhash == NULL)
493 return;
494
495 ohash = clhash->hash;
496 osize = clhash->hashsize;
497
498 sch_tree_lock(sch);
499 for (i = 0; i < osize; i++) {
500 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
501 h = qdisc_class_hash(cl->classid, nmask);
502 hlist_add_head(&cl->hnode, &nhash[h]);
503 }
504 }
505 clhash->hash = nhash;
506 clhash->hashsize = nsize;
507 clhash->hashmask = nmask;
508 sch_tree_unlock(sch);
509
510 qdisc_class_hash_free(ohash, osize);
511}
512EXPORT_SYMBOL(qdisc_class_hash_grow);
513
514int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
515{
516 unsigned int size = 4;
517
518 clhash->hash = qdisc_class_hash_alloc(size);
519 if (clhash->hash == NULL)
520 return -ENOMEM;
521 clhash->hashsize = size;
522 clhash->hashmask = size - 1;
523 clhash->hashelems = 0;
524 return 0;
525}
526EXPORT_SYMBOL(qdisc_class_hash_init);
527
528void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
529{
530 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
531}
532EXPORT_SYMBOL(qdisc_class_hash_destroy);
533
534void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
535 struct Qdisc_class_common *cl)
536{
537 unsigned int h;
538
539 INIT_HLIST_NODE(&cl->hnode);
540 h = qdisc_class_hash(cl->classid, clhash->hashmask);
541 hlist_add_head(&cl->hnode, &clhash->hash[h]);
542 clhash->hashelems++;
543}
544EXPORT_SYMBOL(qdisc_class_hash_insert);
545
546void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
547 struct Qdisc_class_common *cl)
548{
549 hlist_del(&cl->hnode);
550 clhash->hashelems--;
551}
552EXPORT_SYMBOL(qdisc_class_hash_remove);
553
319/* Allocate an unique handle from space managed by kernel */ 554/* Allocate an unique handle from space managed by kernel */
320 555
321static u32 qdisc_alloc_handle(struct net_device *dev) 556static u32 qdisc_alloc_handle(struct net_device *dev)
@@ -332,32 +567,39 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
332 return i>0 ? autohandle : 0; 567 return i>0 ? autohandle : 0;
333} 568}
334 569
335/* Attach toplevel qdisc to device dev */ 570/* Attach toplevel qdisc to device queue. */
336 571
337static struct Qdisc * 572static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
338dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) 573 struct Qdisc *qdisc)
339{ 574{
575 spinlock_t *root_lock;
340 struct Qdisc *oqdisc; 576 struct Qdisc *oqdisc;
577 int ingress;
578
579 ingress = 0;
580 if (qdisc && qdisc->flags&TCQ_F_INGRESS)
581 ingress = 1;
582
583 if (ingress) {
584 oqdisc = dev_queue->qdisc;
585 } else {
586 oqdisc = dev_queue->qdisc_sleeping;
587 }
341 588
342 if (dev->flags & IFF_UP) 589 root_lock = qdisc_root_lock(oqdisc);
343 dev_deactivate(dev); 590 spin_lock_bh(root_lock);
344 591
345 qdisc_lock_tree(dev); 592 if (ingress) {
346 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
347 oqdisc = dev->qdisc_ingress;
348 /* Prune old scheduler */ 593 /* Prune old scheduler */
349 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { 594 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
350 /* delete */ 595 /* delete */
351 qdisc_reset(oqdisc); 596 qdisc_reset(oqdisc);
352 dev->qdisc_ingress = NULL; 597 dev_queue->qdisc = NULL;
353 } else { /* new */ 598 } else { /* new */
354 dev->qdisc_ingress = qdisc; 599 dev_queue->qdisc = qdisc;
355 } 600 }
356 601
357 } else { 602 } else {
358
359 oqdisc = dev->qdisc_sleeping;
360
361 /* Prune old scheduler */ 603 /* Prune old scheduler */
362 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) 604 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
363 qdisc_reset(oqdisc); 605 qdisc_reset(oqdisc);
@@ -365,14 +607,11 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
365 /* ... and graft new one */ 607 /* ... and graft new one */
366 if (qdisc == NULL) 608 if (qdisc == NULL)
367 qdisc = &noop_qdisc; 609 qdisc = &noop_qdisc;
368 dev->qdisc_sleeping = qdisc; 610 dev_queue->qdisc_sleeping = qdisc;
369 dev->qdisc = &noop_qdisc; 611 dev_queue->qdisc = &noop_qdisc;
370 } 612 }
371 613
372 qdisc_unlock_tree(dev); 614 spin_unlock_bh(root_lock);
373
374 if (dev->flags & IFF_UP)
375 dev_activate(dev);
376 615
377 return oqdisc; 616 return oqdisc;
378} 617}
@@ -389,7 +628,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
389 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 628 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
390 return; 629 return;
391 630
392 sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); 631 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
393 if (sch == NULL) { 632 if (sch == NULL) {
394 WARN_ON(parentid != TC_H_ROOT); 633 WARN_ON(parentid != TC_H_ROOT);
395 return; 634 return;
@@ -405,26 +644,66 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
405} 644}
406EXPORT_SYMBOL(qdisc_tree_decrease_qlen); 645EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
407 646
408/* Graft qdisc "new" to class "classid" of qdisc "parent" or 647static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
409 to device "dev". 648 struct Qdisc *old, struct Qdisc *new)
649{
650 if (new || old)
651 qdisc_notify(skb, n, clid, old, new);
652
653 if (old) {
654 spin_lock_bh(&old->q.lock);
655 qdisc_destroy(old);
656 spin_unlock_bh(&old->q.lock);
657 }
658}
410 659
411 Old qdisc is not destroyed but returned in *old. 660/* Graft qdisc "new" to class "classid" of qdisc "parent" or
661 * to device "dev".
662 *
663 * When appropriate send a netlink notification using 'skb'
664 * and "n".
665 *
666 * On success, destroy old qdisc.
412 */ 667 */
413 668
414static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 669static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
415 u32 classid, 670 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
416 struct Qdisc *new, struct Qdisc **old) 671 struct Qdisc *new, struct Qdisc *old)
417{ 672{
673 struct Qdisc *q = old;
418 int err = 0; 674 int err = 0;
419 struct Qdisc *q = *old;
420
421 675
422 if (parent == NULL) { 676 if (parent == NULL) {
423 if (q && q->flags&TCQ_F_INGRESS) { 677 unsigned int i, num_q, ingress;
424 *old = dev_graft_qdisc(dev, q); 678
425 } else { 679 ingress = 0;
426 *old = dev_graft_qdisc(dev, new); 680 num_q = dev->num_tx_queues;
681 if (q && q->flags & TCQ_F_INGRESS) {
682 num_q = 1;
683 ingress = 1;
684 }
685
686 if (dev->flags & IFF_UP)
687 dev_deactivate(dev);
688
689 for (i = 0; i < num_q; i++) {
690 struct netdev_queue *dev_queue = &dev->rx_queue;
691
692 if (!ingress)
693 dev_queue = netdev_get_tx_queue(dev, i);
694
695 if (ingress) {
696 old = dev_graft_qdisc(dev_queue, q);
697 } else {
698 old = dev_graft_qdisc(dev_queue, new);
699 if (new && i > 0)
700 atomic_inc(&new->refcnt);
701 }
702 notify_and_destroy(skb, n, classid, old, new);
427 } 703 }
704
705 if (dev->flags & IFF_UP)
706 dev_activate(dev);
428 } else { 707 } else {
429 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 708 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
430 709
@@ -433,10 +712,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
433 if (cops) { 712 if (cops) {
434 unsigned long cl = cops->get(parent, classid); 713 unsigned long cl = cops->get(parent, classid);
435 if (cl) { 714 if (cl) {
436 err = cops->graft(parent, cl, new, old); 715 err = cops->graft(parent, cl, new, &old);
437 cops->put(parent, cl); 716 cops->put(parent, cl);
438 } 717 }
439 } 718 }
719 if (!err)
720 notify_and_destroy(skb, n, classid, old, new);
440 } 721 }
441 return err; 722 return err;
442} 723}
@@ -448,13 +729,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
448 */ 729 */
449 730
450static struct Qdisc * 731static struct Qdisc *
451qdisc_create(struct net_device *dev, u32 parent, u32 handle, 732qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
452 struct nlattr **tca, int *errp) 733 u32 parent, u32 handle, struct nlattr **tca, int *errp)
453{ 734{
454 int err; 735 int err;
455 struct nlattr *kind = tca[TCA_KIND]; 736 struct nlattr *kind = tca[TCA_KIND];
456 struct Qdisc *sch; 737 struct Qdisc *sch;
457 struct Qdisc_ops *ops; 738 struct Qdisc_ops *ops;
739 struct qdisc_size_table *stab;
458 740
459 ops = qdisc_lookup_ops(kind); 741 ops = qdisc_lookup_ops(kind);
460#ifdef CONFIG_KMOD 742#ifdef CONFIG_KMOD
@@ -489,7 +771,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
489 if (ops == NULL) 771 if (ops == NULL)
490 goto err_out; 772 goto err_out;
491 773
492 sch = qdisc_alloc(dev, ops); 774 sch = qdisc_alloc(dev_queue, ops);
493 if (IS_ERR(sch)) { 775 if (IS_ERR(sch)) {
494 err = PTR_ERR(sch); 776 err = PTR_ERR(sch);
495 goto err_out2; 777 goto err_out2;
@@ -499,10 +781,8 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
499 781
500 if (handle == TC_H_INGRESS) { 782 if (handle == TC_H_INGRESS) {
501 sch->flags |= TCQ_F_INGRESS; 783 sch->flags |= TCQ_F_INGRESS;
502 sch->stats_lock = &dev->ingress_lock;
503 handle = TC_H_MAKE(TC_H_INGRESS, 0); 784 handle = TC_H_MAKE(TC_H_INGRESS, 0);
504 } else { 785 } else {
505 sch->stats_lock = &dev->queue_lock;
506 if (handle == 0) { 786 if (handle == 0) {
507 handle = qdisc_alloc_handle(dev); 787 handle = qdisc_alloc_handle(dev);
508 err = -ENOMEM; 788 err = -ENOMEM;
@@ -514,9 +794,17 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
514 sch->handle = handle; 794 sch->handle = handle;
515 795
516 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { 796 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
797 if (tca[TCA_STAB]) {
798 stab = qdisc_get_stab(tca[TCA_STAB]);
799 if (IS_ERR(stab)) {
800 err = PTR_ERR(stab);
801 goto err_out3;
802 }
803 sch->stab = stab;
804 }
517 if (tca[TCA_RATE]) { 805 if (tca[TCA_RATE]) {
518 err = gen_new_estimator(&sch->bstats, &sch->rate_est, 806 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
519 sch->stats_lock, 807 qdisc_root_lock(sch),
520 tca[TCA_RATE]); 808 tca[TCA_RATE]);
521 if (err) { 809 if (err) {
522 /* 810 /*
@@ -529,13 +817,13 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
529 goto err_out3; 817 goto err_out3;
530 } 818 }
531 } 819 }
532 qdisc_lock_tree(dev); 820 if (parent)
533 list_add_tail(&sch->list, &dev->qdisc_list); 821 list_add_tail(&sch->list, &dev_queue->qdisc->list);
534 qdisc_unlock_tree(dev);
535 822
536 return sch; 823 return sch;
537 } 824 }
538err_out3: 825err_out3:
826 qdisc_put_stab(sch->stab);
539 dev_put(dev); 827 dev_put(dev);
540 kfree((char *) sch - sch->padded); 828 kfree((char *) sch - sch->padded);
541err_out2: 829err_out2:
@@ -547,18 +835,29 @@ err_out:
547 835
548static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 836static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
549{ 837{
550 if (tca[TCA_OPTIONS]) { 838 struct qdisc_size_table *stab = NULL;
551 int err; 839 int err = 0;
552 840
841 if (tca[TCA_OPTIONS]) {
553 if (sch->ops->change == NULL) 842 if (sch->ops->change == NULL)
554 return -EINVAL; 843 return -EINVAL;
555 err = sch->ops->change(sch, tca[TCA_OPTIONS]); 844 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
556 if (err) 845 if (err)
557 return err; 846 return err;
558 } 847 }
848
849 if (tca[TCA_STAB]) {
850 stab = qdisc_get_stab(tca[TCA_STAB]);
851 if (IS_ERR(stab))
852 return PTR_ERR(stab);
853 }
854
855 qdisc_put_stab(sch->stab);
856 sch->stab = stab;
857
559 if (tca[TCA_RATE]) 858 if (tca[TCA_RATE])
560 gen_replace_estimator(&sch->bstats, &sch->rate_est, 859 gen_replace_estimator(&sch->bstats, &sch->rate_est,
561 sch->stats_lock, tca[TCA_RATE]); 860 qdisc_root_lock(sch), tca[TCA_RATE]);
562 return 0; 861 return 0;
563} 862}
564 863
@@ -634,10 +933,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
634 return -ENOENT; 933 return -ENOENT;
635 q = qdisc_leaf(p, clid); 934 q = qdisc_leaf(p, clid);
636 } else { /* ingress */ 935 } else { /* ingress */
637 q = dev->qdisc_ingress; 936 q = dev->rx_queue.qdisc;
638 } 937 }
639 } else { 938 } else {
640 q = dev->qdisc_sleeping; 939 struct netdev_queue *dev_queue;
940 dev_queue = netdev_get_tx_queue(dev, 0);
941 q = dev_queue->qdisc_sleeping;
641 } 942 }
642 if (!q) 943 if (!q)
643 return -ENOENT; 944 return -ENOENT;
@@ -657,14 +958,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
657 return -EINVAL; 958 return -EINVAL;
658 if (q->handle == 0) 959 if (q->handle == 0)
659 return -ENOENT; 960 return -ENOENT;
660 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) 961 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
661 return err; 962 return err;
662 if (q) {
663 qdisc_notify(skb, n, clid, q, NULL);
664 qdisc_lock_tree(dev);
665 qdisc_destroy(q);
666 qdisc_unlock_tree(dev);
667 }
668 } else { 963 } else {
669 qdisc_notify(skb, n, clid, NULL, q); 964 qdisc_notify(skb, n, clid, NULL, q);
670 } 965 }
@@ -708,10 +1003,12 @@ replay:
708 return -ENOENT; 1003 return -ENOENT;
709 q = qdisc_leaf(p, clid); 1004 q = qdisc_leaf(p, clid);
710 } else { /*ingress */ 1005 } else { /*ingress */
711 q = dev->qdisc_ingress; 1006 q = dev->rx_queue.qdisc;
712 } 1007 }
713 } else { 1008 } else {
714 q = dev->qdisc_sleeping; 1009 struct netdev_queue *dev_queue;
1010 dev_queue = netdev_get_tx_queue(dev, 0);
1011 q = dev_queue->qdisc_sleeping;
715 } 1012 }
716 1013
717 /* It may be default qdisc, ignore it */ 1014 /* It may be default qdisc, ignore it */
@@ -788,10 +1085,12 @@ create_n_graft:
788 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1085 if (!(n->nlmsg_flags&NLM_F_CREATE))
789 return -ENOENT; 1086 return -ENOENT;
790 if (clid == TC_H_INGRESS) 1087 if (clid == TC_H_INGRESS)
791 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, 1088 q = qdisc_create(dev, &dev->rx_queue,
1089 tcm->tcm_parent, tcm->tcm_parent,
792 tca, &err); 1090 tca, &err);
793 else 1091 else
794 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, 1092 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
1093 tcm->tcm_parent, tcm->tcm_handle,
795 tca, &err); 1094 tca, &err);
796 if (q == NULL) { 1095 if (q == NULL) {
797 if (err == -EAGAIN) 1096 if (err == -EAGAIN)
@@ -801,22 +1100,18 @@ create_n_graft:
801 1100
802graft: 1101graft:
803 if (1) { 1102 if (1) {
804 struct Qdisc *old_q = NULL; 1103 spinlock_t *root_lock;
805 err = qdisc_graft(dev, p, clid, q, &old_q); 1104
1105 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
806 if (err) { 1106 if (err) {
807 if (q) { 1107 if (q) {
808 qdisc_lock_tree(dev); 1108 root_lock = qdisc_root_lock(q);
1109 spin_lock_bh(root_lock);
809 qdisc_destroy(q); 1110 qdisc_destroy(q);
810 qdisc_unlock_tree(dev); 1111 spin_unlock_bh(root_lock);
811 } 1112 }
812 return err; 1113 return err;
813 } 1114 }
814 qdisc_notify(skb, n, clid, old_q, q);
815 if (old_q) {
816 qdisc_lock_tree(dev);
817 qdisc_destroy(old_q);
818 qdisc_unlock_tree(dev);
819 }
820 } 1115 }
821 return 0; 1116 return 0;
822} 1117}
@@ -834,7 +1129,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
834 tcm->tcm_family = AF_UNSPEC; 1129 tcm->tcm_family = AF_UNSPEC;
835 tcm->tcm__pad1 = 0; 1130 tcm->tcm__pad1 = 0;
836 tcm->tcm__pad2 = 0; 1131 tcm->tcm__pad2 = 0;
837 tcm->tcm_ifindex = q->dev->ifindex; 1132 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
838 tcm->tcm_parent = clid; 1133 tcm->tcm_parent = clid;
839 tcm->tcm_handle = q->handle; 1134 tcm->tcm_handle = q->handle;
840 tcm->tcm_info = atomic_read(&q->refcnt); 1135 tcm->tcm_info = atomic_read(&q->refcnt);
@@ -843,8 +1138,11 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
843 goto nla_put_failure; 1138 goto nla_put_failure;
844 q->qstats.qlen = q->q.qlen; 1139 q->qstats.qlen = q->q.qlen;
845 1140
1141 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1142 goto nla_put_failure;
1143
846 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 1144 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
847 TCA_XSTATS, q->stats_lock, &d) < 0) 1145 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
848 goto nla_put_failure; 1146 goto nla_put_failure;
849 1147
850 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 1148 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -894,13 +1192,57 @@ err_out:
894 return -EINVAL; 1192 return -EINVAL;
895} 1193}
896 1194
1195static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1196{
1197 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1198}
1199
1200static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1201 struct netlink_callback *cb,
1202 int *q_idx_p, int s_q_idx)
1203{
1204 int ret = 0, q_idx = *q_idx_p;
1205 struct Qdisc *q;
1206
1207 if (!root)
1208 return 0;
1209
1210 q = root;
1211 if (q_idx < s_q_idx) {
1212 q_idx++;
1213 } else {
1214 if (!tc_qdisc_dump_ignore(q) &&
1215 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1216 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1217 goto done;
1218 q_idx++;
1219 }
1220 list_for_each_entry(q, &root->list, list) {
1221 if (q_idx < s_q_idx) {
1222 q_idx++;
1223 continue;
1224 }
1225 if (!tc_qdisc_dump_ignore(q) &&
1226 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1227 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1228 goto done;
1229 q_idx++;
1230 }
1231
1232out:
1233 *q_idx_p = q_idx;
1234 return ret;
1235done:
1236 ret = -1;
1237 goto out;
1238}
1239
897static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1240static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
898{ 1241{
899 struct net *net = sock_net(skb->sk); 1242 struct net *net = sock_net(skb->sk);
900 int idx, q_idx; 1243 int idx, q_idx;
901 int s_idx, s_q_idx; 1244 int s_idx, s_q_idx;
902 struct net_device *dev; 1245 struct net_device *dev;
903 struct Qdisc *q;
904 1246
905 if (net != &init_net) 1247 if (net != &init_net)
906 return 0; 1248 return 0;
@@ -910,21 +1252,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
910 read_lock(&dev_base_lock); 1252 read_lock(&dev_base_lock);
911 idx = 0; 1253 idx = 0;
912 for_each_netdev(&init_net, dev) { 1254 for_each_netdev(&init_net, dev) {
1255 struct netdev_queue *dev_queue;
1256
913 if (idx < s_idx) 1257 if (idx < s_idx)
914 goto cont; 1258 goto cont;
915 if (idx > s_idx) 1259 if (idx > s_idx)
916 s_q_idx = 0; 1260 s_q_idx = 0;
917 q_idx = 0; 1261 q_idx = 0;
918 list_for_each_entry(q, &dev->qdisc_list, list) { 1262
919 if (q_idx < s_q_idx) { 1263 dev_queue = netdev_get_tx_queue(dev, 0);
920 q_idx++; 1264 if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
921 continue; 1265 goto done;
922 } 1266
923 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, 1267 dev_queue = &dev->rx_queue;
924 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) 1268 if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
925 goto done; 1269 goto done;
926 q_idx++; 1270
927 }
928cont: 1271cont:
929 idx++; 1272 idx++;
930 } 1273 }
@@ -949,6 +1292,7 @@ done:
949static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1292static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
950{ 1293{
951 struct net *net = sock_net(skb->sk); 1294 struct net *net = sock_net(skb->sk);
1295 struct netdev_queue *dev_queue;
952 struct tcmsg *tcm = NLMSG_DATA(n); 1296 struct tcmsg *tcm = NLMSG_DATA(n);
953 struct nlattr *tca[TCA_MAX + 1]; 1297 struct nlattr *tca[TCA_MAX + 1];
954 struct net_device *dev; 1298 struct net_device *dev;
@@ -986,6 +1330,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
986 1330
987 /* Step 1. Determine qdisc handle X:0 */ 1331 /* Step 1. Determine qdisc handle X:0 */
988 1332
1333 dev_queue = netdev_get_tx_queue(dev, 0);
989 if (pid != TC_H_ROOT) { 1334 if (pid != TC_H_ROOT) {
990 u32 qid1 = TC_H_MAJ(pid); 1335 u32 qid1 = TC_H_MAJ(pid);
991 1336
@@ -996,7 +1341,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
996 } else if (qid1) { 1341 } else if (qid1) {
997 qid = qid1; 1342 qid = qid1;
998 } else if (qid == 0) 1343 } else if (qid == 0)
999 qid = dev->qdisc_sleeping->handle; 1344 qid = dev_queue->qdisc_sleeping->handle;
1000 1345
1001 /* Now qid is genuine qdisc handle consistent 1346 /* Now qid is genuine qdisc handle consistent
1002 both with parent and child. 1347 both with parent and child.
@@ -1007,7 +1352,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1007 pid = TC_H_MAKE(qid, pid); 1352 pid = TC_H_MAKE(qid, pid);
1008 } else { 1353 } else {
1009 if (qid == 0) 1354 if (qid == 0)
1010 qid = dev->qdisc_sleeping->handle; 1355 qid = dev_queue->qdisc_sleeping->handle;
1011 } 1356 }
1012 1357
1013 /* OK. Locate qdisc */ 1358 /* OK. Locate qdisc */
@@ -1080,7 +1425,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1080 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1425 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1081 tcm = NLMSG_DATA(nlh); 1426 tcm = NLMSG_DATA(nlh);
1082 tcm->tcm_family = AF_UNSPEC; 1427 tcm->tcm_family = AF_UNSPEC;
1083 tcm->tcm_ifindex = q->dev->ifindex; 1428 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1084 tcm->tcm_parent = q->handle; 1429 tcm->tcm_parent = q->handle;
1085 tcm->tcm_handle = q->handle; 1430 tcm->tcm_handle = q->handle;
1086 tcm->tcm_info = 0; 1431 tcm->tcm_info = 0;
@@ -1089,7 +1434,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1089 goto nla_put_failure; 1434 goto nla_put_failure;
1090 1435
1091 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 1436 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1092 TCA_XSTATS, q->stats_lock, &d) < 0) 1437 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1093 goto nla_put_failure; 1438 goto nla_put_failure;
1094 1439
1095 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1440 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
@@ -1140,15 +1485,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
1140 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); 1485 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1141} 1486}
1142 1487
1488static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1489 struct tcmsg *tcm, struct netlink_callback *cb,
1490 int *t_p, int s_t)
1491{
1492 struct qdisc_dump_args arg;
1493
1494 if (tc_qdisc_dump_ignore(q) ||
1495 *t_p < s_t || !q->ops->cl_ops ||
1496 (tcm->tcm_parent &&
1497 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1498 (*t_p)++;
1499 return 0;
1500 }
1501 if (*t_p > s_t)
1502 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1503 arg.w.fn = qdisc_class_dump;
1504 arg.skb = skb;
1505 arg.cb = cb;
1506 arg.w.stop = 0;
1507 arg.w.skip = cb->args[1];
1508 arg.w.count = 0;
1509 q->ops->cl_ops->walk(q, &arg.w);
1510 cb->args[1] = arg.w.count;
1511 if (arg.w.stop)
1512 return -1;
1513 (*t_p)++;
1514 return 0;
1515}
1516
1517static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1518 struct tcmsg *tcm, struct netlink_callback *cb,
1519 int *t_p, int s_t)
1520{
1521 struct Qdisc *q;
1522
1523 if (!root)
1524 return 0;
1525
1526 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1527 return -1;
1528
1529 list_for_each_entry(q, &root->list, list) {
1530 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1531 return -1;
1532 }
1533
1534 return 0;
1535}
1536
1143static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 1537static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1144{ 1538{
1539 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1145 struct net *net = sock_net(skb->sk); 1540 struct net *net = sock_net(skb->sk);
1146 int t; 1541 struct netdev_queue *dev_queue;
1147 int s_t;
1148 struct net_device *dev; 1542 struct net_device *dev;
1149 struct Qdisc *q; 1543 int t, s_t;
1150 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1151 struct qdisc_dump_args arg;
1152 1544
1153 if (net != &init_net) 1545 if (net != &init_net)
1154 return 0; 1546 return 0;
@@ -1161,28 +1553,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1161 s_t = cb->args[0]; 1553 s_t = cb->args[0];
1162 t = 0; 1554 t = 0;
1163 1555
1164 list_for_each_entry(q, &dev->qdisc_list, list) { 1556 dev_queue = netdev_get_tx_queue(dev, 0);
1165 if (t < s_t || !q->ops->cl_ops || 1557 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1166 (tcm->tcm_parent && 1558 goto done;
1167 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1168 t++;
1169 continue;
1170 }
1171 if (t > s_t)
1172 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1173 arg.w.fn = qdisc_class_dump;
1174 arg.skb = skb;
1175 arg.cb = cb;
1176 arg.w.stop = 0;
1177 arg.w.skip = cb->args[1];
1178 arg.w.count = 0;
1179 q->ops->cl_ops->walk(q, &arg.w);
1180 cb->args[1] = arg.w.count;
1181 if (arg.w.stop)
1182 break;
1183 t++;
1184 }
1185 1559
1560 dev_queue = &dev->rx_queue;
1561 if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
1562 goto done;
1563
1564done:
1186 cb->args[0] = t; 1565 cb->args[0] = t;
1187 1566
1188 dev_put(dev); 1567 dev_put(dev);