diff options
Diffstat (limited to 'net/sched/sch_generic.c')
-rw-r--r-- | net/sched/sch_generic.c | 223 |
1 files changed, 153 insertions, 70 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 27d03816ec3e..4ae6aa562f2b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -37,15 +37,11 @@ | |||
37 | * - updates to tree and tree walking are only done under the rtnl mutex. | 37 | * - updates to tree and tree walking are only done under the rtnl mutex. |
38 | */ | 38 | */ |
39 | 39 | ||
40 | static inline int qdisc_qlen(struct Qdisc *q) | ||
41 | { | ||
42 | return q->q.qlen; | ||
43 | } | ||
44 | |||
45 | static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) | 40 | static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) |
46 | { | 41 | { |
47 | q->gso_skb = skb; | 42 | q->gso_skb = skb; |
48 | q->qstats.requeues++; | 43 | q->qstats.requeues++; |
44 | q->q.qlen++; /* it's still part of the queue */ | ||
49 | __netif_schedule(q); | 45 | __netif_schedule(q); |
50 | 46 | ||
51 | return 0; | 47 | return 0; |
@@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) | |||
61 | 57 | ||
62 | /* check the reason of requeuing without tx lock first */ | 58 | /* check the reason of requeuing without tx lock first */ |
63 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | 59 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); |
64 | if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) | 60 | if (!netif_tx_queue_stopped(txq) && |
61 | !netif_tx_queue_frozen(txq)) { | ||
65 | q->gso_skb = NULL; | 62 | q->gso_skb = NULL; |
66 | else | 63 | q->q.qlen--; |
64 | } else | ||
67 | skb = NULL; | 65 | skb = NULL; |
68 | } else { | 66 | } else { |
69 | skb = q->dequeue(q); | 67 | skb = q->dequeue(q); |
@@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
103 | } | 101 | } |
104 | 102 | ||
105 | /* | 103 | /* |
106 | * NOTE: Called under qdisc_lock(q) with locally disabled BH. | 104 | * Transmit one skb, and handle the return status as required. Holding the |
107 | * | 105 | * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this |
108 | * __QDISC_STATE_RUNNING guarantees only one CPU can process | 106 | * function. |
109 | * this qdisc at a time. qdisc_lock(q) serializes queue accesses for | ||
110 | * this queue. | ||
111 | * | ||
112 | * netif_tx_lock serializes accesses to device driver. | ||
113 | * | ||
114 | * qdisc_lock(q) and netif_tx_lock are mutually exclusive, | ||
115 | * if one is grabbed, another must be free. | ||
116 | * | ||
117 | * Note, that this procedure can be called by a watchdog timer | ||
118 | * | 107 | * |
119 | * Returns to the caller: | 108 | * Returns to the caller: |
120 | * 0 - queue is empty or throttled. | 109 | * 0 - queue is empty or throttled. |
121 | * >0 - queue is not empty. | 110 | * >0 - queue is not empty. |
122 | * | ||
123 | */ | 111 | */ |
124 | static inline int qdisc_restart(struct Qdisc *q) | 112 | int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, |
113 | struct net_device *dev, struct netdev_queue *txq, | ||
114 | spinlock_t *root_lock) | ||
125 | { | 115 | { |
126 | struct netdev_queue *txq; | ||
127 | int ret = NETDEV_TX_BUSY; | 116 | int ret = NETDEV_TX_BUSY; |
128 | struct net_device *dev; | ||
129 | spinlock_t *root_lock; | ||
130 | struct sk_buff *skb; | ||
131 | |||
132 | /* Dequeue packet */ | ||
133 | if (unlikely((skb = dequeue_skb(q)) == NULL)) | ||
134 | return 0; | ||
135 | |||
136 | root_lock = qdisc_lock(q); | ||
137 | 117 | ||
138 | /* And release qdisc */ | 118 | /* And release qdisc */ |
139 | spin_unlock(root_lock); | 119 | spin_unlock(root_lock); |
140 | 120 | ||
141 | dev = qdisc_dev(q); | ||
142 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | ||
143 | |||
144 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | 121 | HARD_TX_LOCK(dev, txq, smp_processor_id()); |
145 | if (!netif_tx_queue_stopped(txq) && | 122 | if (!netif_tx_queue_stopped(txq) && |
146 | !netif_tx_queue_frozen(txq)) | 123 | !netif_tx_queue_frozen(txq)) |
@@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Qdisc *q) | |||
177 | return ret; | 154 | return ret; |
178 | } | 155 | } |
179 | 156 | ||
157 | /* | ||
158 | * NOTE: Called under qdisc_lock(q) with locally disabled BH. | ||
159 | * | ||
160 | * __QDISC_STATE_RUNNING guarantees only one CPU can process | ||
161 | * this qdisc at a time. qdisc_lock(q) serializes queue accesses for | ||
162 | * this queue. | ||
163 | * | ||
164 | * netif_tx_lock serializes accesses to device driver. | ||
165 | * | ||
166 | * qdisc_lock(q) and netif_tx_lock are mutually exclusive, | ||
167 | * if one is grabbed, another must be free. | ||
168 | * | ||
169 | * Note, that this procedure can be called by a watchdog timer | ||
170 | * | ||
171 | * Returns to the caller: | ||
172 | * 0 - queue is empty or throttled. | ||
173 | * >0 - queue is not empty. | ||
174 | * | ||
175 | */ | ||
176 | static inline int qdisc_restart(struct Qdisc *q) | ||
177 | { | ||
178 | struct netdev_queue *txq; | ||
179 | struct net_device *dev; | ||
180 | spinlock_t *root_lock; | ||
181 | struct sk_buff *skb; | ||
182 | |||
183 | /* Dequeue packet */ | ||
184 | skb = dequeue_skb(q); | ||
185 | if (unlikely(!skb)) | ||
186 | return 0; | ||
187 | |||
188 | root_lock = qdisc_lock(q); | ||
189 | dev = qdisc_dev(q); | ||
190 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | ||
191 | |||
192 | return sch_direct_xmit(skb, q, dev, txq, root_lock); | ||
193 | } | ||
194 | |||
180 | void __qdisc_run(struct Qdisc *q) | 195 | void __qdisc_run(struct Qdisc *q) |
181 | { | 196 | { |
182 | unsigned long start_time = jiffies; | 197 | unsigned long start_time = jiffies; |
@@ -391,18 +406,38 @@ static const u8 prio2band[TC_PRIO_MAX+1] = | |||
391 | 406 | ||
392 | #define PFIFO_FAST_BANDS 3 | 407 | #define PFIFO_FAST_BANDS 3 |
393 | 408 | ||
394 | static inline struct sk_buff_head *prio2list(struct sk_buff *skb, | 409 | /* |
395 | struct Qdisc *qdisc) | 410 | * Private data for a pfifo_fast scheduler containing: |
411 | * - queues for the three band | ||
412 | * - bitmap indicating which of the bands contain skbs | ||
413 | */ | ||
414 | struct pfifo_fast_priv { | ||
415 | u32 bitmap; | ||
416 | struct sk_buff_head q[PFIFO_FAST_BANDS]; | ||
417 | }; | ||
418 | |||
419 | /* | ||
420 | * Convert a bitmap to the first band number where an skb is queued, where: | ||
421 | * bitmap=0 means there are no skbs on any band. | ||
422 | * bitmap=1 means there is an skb on band 0. | ||
423 | * bitmap=7 means there are skbs on all 3 bands, etc. | ||
424 | */ | ||
425 | static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; | ||
426 | |||
427 | static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, | ||
428 | int band) | ||
396 | { | 429 | { |
397 | struct sk_buff_head *list = qdisc_priv(qdisc); | 430 | return priv->q + band; |
398 | return list + prio2band[skb->priority & TC_PRIO_MAX]; | ||
399 | } | 431 | } |
400 | 432 | ||
401 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | 433 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) |
402 | { | 434 | { |
403 | struct sk_buff_head *list = prio2list(skb, qdisc); | 435 | if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { |
436 | int band = prio2band[skb->priority & TC_PRIO_MAX]; | ||
437 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | ||
438 | struct sk_buff_head *list = band2list(priv, band); | ||
404 | 439 | ||
405 | if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { | 440 | priv->bitmap |= (1 << band); |
406 | qdisc->q.qlen++; | 441 | qdisc->q.qlen++; |
407 | return __qdisc_enqueue_tail(skb, qdisc, list); | 442 | return __qdisc_enqueue_tail(skb, qdisc, list); |
408 | } | 443 | } |
@@ -412,14 +447,18 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | |||
412 | 447 | ||
413 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | 448 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) |
414 | { | 449 | { |
415 | int prio; | 450 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
416 | struct sk_buff_head *list = qdisc_priv(qdisc); | 451 | int band = bitmap2band[priv->bitmap]; |
417 | 452 | ||
418 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { | 453 | if (likely(band >= 0)) { |
419 | if (!skb_queue_empty(list + prio)) { | 454 | struct sk_buff_head *list = band2list(priv, band); |
420 | qdisc->q.qlen--; | 455 | struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); |
421 | return __qdisc_dequeue_head(qdisc, list + prio); | 456 | |
422 | } | 457 | qdisc->q.qlen--; |
458 | if (skb_queue_empty(list)) | ||
459 | priv->bitmap &= ~(1 << band); | ||
460 | |||
461 | return skb; | ||
423 | } | 462 | } |
424 | 463 | ||
425 | return NULL; | 464 | return NULL; |
@@ -427,12 +466,13 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | |||
427 | 466 | ||
428 | static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) | 467 | static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) |
429 | { | 468 | { |
430 | int prio; | 469 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
431 | struct sk_buff_head *list = qdisc_priv(qdisc); | 470 | int band = bitmap2band[priv->bitmap]; |
432 | 471 | ||
433 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { | 472 | if (band >= 0) { |
434 | if (!skb_queue_empty(list + prio)) | 473 | struct sk_buff_head *list = band2list(priv, band); |
435 | return skb_peek(list + prio); | 474 | |
475 | return skb_peek(list); | ||
436 | } | 476 | } |
437 | 477 | ||
438 | return NULL; | 478 | return NULL; |
@@ -441,11 +481,12 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) | |||
441 | static void pfifo_fast_reset(struct Qdisc* qdisc) | 481 | static void pfifo_fast_reset(struct Qdisc* qdisc) |
442 | { | 482 | { |
443 | int prio; | 483 | int prio; |
444 | struct sk_buff_head *list = qdisc_priv(qdisc); | 484 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
445 | 485 | ||
446 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) | 486 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) |
447 | __qdisc_reset_queue(qdisc, list + prio); | 487 | __qdisc_reset_queue(qdisc, band2list(priv, prio)); |
448 | 488 | ||
489 | priv->bitmap = 0; | ||
449 | qdisc->qstats.backlog = 0; | 490 | qdisc->qstats.backlog = 0; |
450 | qdisc->q.qlen = 0; | 491 | qdisc->q.qlen = 0; |
451 | } | 492 | } |
@@ -465,17 +506,17 @@ nla_put_failure: | |||
465 | static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) | 506 | static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) |
466 | { | 507 | { |
467 | int prio; | 508 | int prio; |
468 | struct sk_buff_head *list = qdisc_priv(qdisc); | 509 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
469 | 510 | ||
470 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) | 511 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) |
471 | skb_queue_head_init(list + prio); | 512 | skb_queue_head_init(band2list(priv, prio)); |
472 | 513 | ||
473 | return 0; | 514 | return 0; |
474 | } | 515 | } |
475 | 516 | ||
476 | static struct Qdisc_ops pfifo_fast_ops __read_mostly = { | 517 | struct Qdisc_ops pfifo_fast_ops __read_mostly = { |
477 | .id = "pfifo_fast", | 518 | .id = "pfifo_fast", |
478 | .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), | 519 | .priv_size = sizeof(struct pfifo_fast_priv), |
479 | .enqueue = pfifo_fast_enqueue, | 520 | .enqueue = pfifo_fast_enqueue, |
480 | .dequeue = pfifo_fast_dequeue, | 521 | .dequeue = pfifo_fast_dequeue, |
481 | .peek = pfifo_fast_peek, | 522 | .peek = pfifo_fast_peek, |
@@ -547,8 +588,11 @@ void qdisc_reset(struct Qdisc *qdisc) | |||
547 | if (ops->reset) | 588 | if (ops->reset) |
548 | ops->reset(qdisc); | 589 | ops->reset(qdisc); |
549 | 590 | ||
550 | kfree_skb(qdisc->gso_skb); | 591 | if (qdisc->gso_skb) { |
551 | qdisc->gso_skb = NULL; | 592 | kfree_skb(qdisc->gso_skb); |
593 | qdisc->gso_skb = NULL; | ||
594 | qdisc->q.qlen = 0; | ||
595 | } | ||
552 | } | 596 | } |
553 | EXPORT_SYMBOL(qdisc_reset); | 597 | EXPORT_SYMBOL(qdisc_reset); |
554 | 598 | ||
@@ -579,17 +623,29 @@ void qdisc_destroy(struct Qdisc *qdisc) | |||
579 | } | 623 | } |
580 | EXPORT_SYMBOL(qdisc_destroy); | 624 | EXPORT_SYMBOL(qdisc_destroy); |
581 | 625 | ||
582 | static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) | 626 | /* Attach toplevel qdisc to device queue. */ |
627 | struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, | ||
628 | struct Qdisc *qdisc) | ||
583 | { | 629 | { |
584 | unsigned int i; | 630 | struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; |
631 | spinlock_t *root_lock; | ||
585 | 632 | ||
586 | for (i = 0; i < dev->num_tx_queues; i++) { | 633 | root_lock = qdisc_lock(oqdisc); |
587 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | 634 | spin_lock_bh(root_lock); |
588 | 635 | ||
589 | if (txq->qdisc_sleeping != &noop_qdisc) | 636 | /* Prune old scheduler */ |
590 | return false; | 637 | if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) |
591 | } | 638 | qdisc_reset(oqdisc); |
592 | return true; | 639 | |
640 | /* ... and graft new one */ | ||
641 | if (qdisc == NULL) | ||
642 | qdisc = &noop_qdisc; | ||
643 | dev_queue->qdisc_sleeping = qdisc; | ||
644 | rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); | ||
645 | |||
646 | spin_unlock_bh(root_lock); | ||
647 | |||
648 | return oqdisc; | ||
593 | } | 649 | } |
594 | 650 | ||
595 | static void attach_one_default_qdisc(struct net_device *dev, | 651 | static void attach_one_default_qdisc(struct net_device *dev, |
@@ -605,12 +661,35 @@ static void attach_one_default_qdisc(struct net_device *dev, | |||
605 | printk(KERN_INFO "%s: activation failed\n", dev->name); | 661 | printk(KERN_INFO "%s: activation failed\n", dev->name); |
606 | return; | 662 | return; |
607 | } | 663 | } |
664 | |||
665 | /* Can by-pass the queue discipline for default qdisc */ | ||
666 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
608 | } else { | 667 | } else { |
609 | qdisc = &noqueue_qdisc; | 668 | qdisc = &noqueue_qdisc; |
610 | } | 669 | } |
611 | dev_queue->qdisc_sleeping = qdisc; | 670 | dev_queue->qdisc_sleeping = qdisc; |
612 | } | 671 | } |
613 | 672 | ||
673 | static void attach_default_qdiscs(struct net_device *dev) | ||
674 | { | ||
675 | struct netdev_queue *txq; | ||
676 | struct Qdisc *qdisc; | ||
677 | |||
678 | txq = netdev_get_tx_queue(dev, 0); | ||
679 | |||
680 | if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) { | ||
681 | netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); | ||
682 | dev->qdisc = txq->qdisc_sleeping; | ||
683 | atomic_inc(&dev->qdisc->refcnt); | ||
684 | } else { | ||
685 | qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); | ||
686 | if (qdisc) { | ||
687 | qdisc->ops->attach(qdisc); | ||
688 | dev->qdisc = qdisc; | ||
689 | } | ||
690 | } | ||
691 | } | ||
692 | |||
614 | static void transition_one_qdisc(struct net_device *dev, | 693 | static void transition_one_qdisc(struct net_device *dev, |
615 | struct netdev_queue *dev_queue, | 694 | struct netdev_queue *dev_queue, |
616 | void *_need_watchdog) | 695 | void *_need_watchdog) |
@@ -638,8 +717,8 @@ void dev_activate(struct net_device *dev) | |||
638 | virtual interfaces | 717 | virtual interfaces |
639 | */ | 718 | */ |
640 | 719 | ||
641 | if (dev_all_qdisc_sleeping_noop(dev)) | 720 | if (dev->qdisc == &noop_qdisc) |
642 | netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); | 721 | attach_default_qdiscs(dev); |
643 | 722 | ||
644 | if (!netif_carrier_ok(dev)) | 723 | if (!netif_carrier_ok(dev)) |
645 | /* Delay activation until next carrier-on event */ | 724 | /* Delay activation until next carrier-on event */ |
@@ -730,6 +809,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, | |||
730 | 809 | ||
731 | void dev_init_scheduler(struct net_device *dev) | 810 | void dev_init_scheduler(struct net_device *dev) |
732 | { | 811 | { |
812 | dev->qdisc = &noop_qdisc; | ||
733 | netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); | 813 | netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); |
734 | dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); | 814 | dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); |
735 | 815 | ||
@@ -755,5 +835,8 @@ void dev_shutdown(struct net_device *dev) | |||
755 | { | 835 | { |
756 | netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); | 836 | netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); |
757 | shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); | 837 | shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); |
838 | qdisc_destroy(dev->qdisc); | ||
839 | dev->qdisc = &noop_qdisc; | ||
840 | |||
758 | WARN_ON(timer_pending(&dev->watchdog_timer)); | 841 | WARN_ON(timer_pending(&dev->watchdog_timer)); |
759 | } | 842 | } |