diff options
Diffstat (limited to 'net/sched/sch_generic.c')
-rw-r--r-- | net/sched/sch_generic.c | 467 |
1 files changed, 258 insertions, 209 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 13afa7214392..0ddf69286f92 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -29,58 +29,36 @@ | |||
29 | /* Main transmission queue. */ | 29 | /* Main transmission queue. */ |
30 | 30 | ||
31 | /* Modifications to data participating in scheduling must be protected with | 31 | /* Modifications to data participating in scheduling must be protected with |
32 | * dev->queue_lock spinlock. | 32 | * qdisc_root_lock(qdisc) spinlock. |
33 | * | 33 | * |
34 | * The idea is the following: | 34 | * The idea is the following: |
35 | * - enqueue, dequeue are serialized via top level device | 35 | * - enqueue, dequeue are serialized via qdisc root lock |
36 | * spinlock dev->queue_lock. | 36 | * - ingress filtering is also serialized via qdisc root lock |
37 | * - ingress filtering is serialized via top level device | ||
38 | * spinlock dev->ingress_lock. | ||
39 | * - updates to tree and tree walking are only done under the rtnl mutex. | 37 | * - updates to tree and tree walking are only done under the rtnl mutex. |
40 | */ | 38 | */ |
41 | 39 | ||
42 | void qdisc_lock_tree(struct net_device *dev) | ||
43 | __acquires(dev->queue_lock) | ||
44 | __acquires(dev->ingress_lock) | ||
45 | { | ||
46 | spin_lock_bh(&dev->queue_lock); | ||
47 | spin_lock(&dev->ingress_lock); | ||
48 | } | ||
49 | EXPORT_SYMBOL(qdisc_lock_tree); | ||
50 | |||
51 | void qdisc_unlock_tree(struct net_device *dev) | ||
52 | __releases(dev->ingress_lock) | ||
53 | __releases(dev->queue_lock) | ||
54 | { | ||
55 | spin_unlock(&dev->ingress_lock); | ||
56 | spin_unlock_bh(&dev->queue_lock); | ||
57 | } | ||
58 | EXPORT_SYMBOL(qdisc_unlock_tree); | ||
59 | |||
60 | static inline int qdisc_qlen(struct Qdisc *q) | 40 | static inline int qdisc_qlen(struct Qdisc *q) |
61 | { | 41 | { |
62 | return q->q.qlen; | 42 | return q->q.qlen; |
63 | } | 43 | } |
64 | 44 | ||
65 | static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, | 45 | static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) |
66 | struct Qdisc *q) | ||
67 | { | 46 | { |
68 | if (unlikely(skb->next)) | 47 | if (unlikely(skb->next)) |
69 | dev->gso_skb = skb; | 48 | q->gso_skb = skb; |
70 | else | 49 | else |
71 | q->ops->requeue(skb, q); | 50 | q->ops->requeue(skb, q); |
72 | 51 | ||
73 | netif_schedule(dev); | 52 | __netif_schedule(q); |
74 | return 0; | 53 | return 0; |
75 | } | 54 | } |
76 | 55 | ||
77 | static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, | 56 | static inline struct sk_buff *dequeue_skb(struct Qdisc *q) |
78 | struct Qdisc *q) | ||
79 | { | 57 | { |
80 | struct sk_buff *skb; | 58 | struct sk_buff *skb; |
81 | 59 | ||
82 | if ((skb = dev->gso_skb)) | 60 | if ((skb = q->gso_skb)) |
83 | dev->gso_skb = NULL; | 61 | q->gso_skb = NULL; |
84 | else | 62 | else |
85 | skb = q->dequeue(q); | 63 | skb = q->dequeue(q); |
86 | 64 | ||
@@ -88,12 +66,12 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, | |||
88 | } | 66 | } |
89 | 67 | ||
90 | static inline int handle_dev_cpu_collision(struct sk_buff *skb, | 68 | static inline int handle_dev_cpu_collision(struct sk_buff *skb, |
91 | struct net_device *dev, | 69 | struct netdev_queue *dev_queue, |
92 | struct Qdisc *q) | 70 | struct Qdisc *q) |
93 | { | 71 | { |
94 | int ret; | 72 | int ret; |
95 | 73 | ||
96 | if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { | 74 | if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { |
97 | /* | 75 | /* |
98 | * Same CPU holding the lock. It may be a transient | 76 | * Same CPU holding the lock. It may be a transient |
99 | * configuration error, when hard_start_xmit() recurses. We | 77 | * configuration error, when hard_start_xmit() recurses. We |
@@ -103,7 +81,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
103 | kfree_skb(skb); | 81 | kfree_skb(skb); |
104 | if (net_ratelimit()) | 82 | if (net_ratelimit()) |
105 | printk(KERN_WARNING "Dead loop on netdevice %s, " | 83 | printk(KERN_WARNING "Dead loop on netdevice %s, " |
106 | "fix it urgently!\n", dev->name); | 84 | "fix it urgently!\n", dev_queue->dev->name); |
107 | ret = qdisc_qlen(q); | 85 | ret = qdisc_qlen(q); |
108 | } else { | 86 | } else { |
109 | /* | 87 | /* |
@@ -111,22 +89,22 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
111 | * some time. | 89 | * some time. |
112 | */ | 90 | */ |
113 | __get_cpu_var(netdev_rx_stat).cpu_collision++; | 91 | __get_cpu_var(netdev_rx_stat).cpu_collision++; |
114 | ret = dev_requeue_skb(skb, dev, q); | 92 | ret = dev_requeue_skb(skb, q); |
115 | } | 93 | } |
116 | 94 | ||
117 | return ret; | 95 | return ret; |
118 | } | 96 | } |
119 | 97 | ||
120 | /* | 98 | /* |
121 | * NOTE: Called under dev->queue_lock with locally disabled BH. | 99 | * NOTE: Called under qdisc_lock(q) with locally disabled BH. |
122 | * | 100 | * |
123 | * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this | 101 | * __QDISC_STATE_RUNNING guarantees only one CPU can process |
124 | * device at a time. dev->queue_lock serializes queue accesses for | 102 | * this qdisc at a time. qdisc_lock(q) serializes queue accesses for |
125 | * this device AND dev->qdisc pointer itself. | 103 | * this queue. |
126 | * | 104 | * |
127 | * netif_tx_lock serializes accesses to device driver. | 105 | * netif_tx_lock serializes accesses to device driver. |
128 | * | 106 | * |
129 | * dev->queue_lock and netif_tx_lock are mutually exclusive, | 107 | * qdisc_lock(q) and netif_tx_lock are mutually exclusive, |
130 | * if one is grabbed, another must be free. | 108 | * if one is grabbed, another must be free. |
131 | * | 109 | * |
132 | * Note, that this procedure can be called by a watchdog timer | 110 | * Note, that this procedure can be called by a watchdog timer |
@@ -136,27 +114,32 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
136 | * >0 - queue is not empty. | 114 | * >0 - queue is not empty. |
137 | * | 115 | * |
138 | */ | 116 | */ |
139 | static inline int qdisc_restart(struct net_device *dev) | 117 | static inline int qdisc_restart(struct Qdisc *q) |
140 | { | 118 | { |
141 | struct Qdisc *q = dev->qdisc; | 119 | struct netdev_queue *txq; |
142 | struct sk_buff *skb; | ||
143 | int ret = NETDEV_TX_BUSY; | 120 | int ret = NETDEV_TX_BUSY; |
121 | struct net_device *dev; | ||
122 | spinlock_t *root_lock; | ||
123 | struct sk_buff *skb; | ||
144 | 124 | ||
145 | /* Dequeue packet */ | 125 | /* Dequeue packet */ |
146 | if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) | 126 | if (unlikely((skb = dequeue_skb(q)) == NULL)) |
147 | return 0; | 127 | return 0; |
148 | 128 | ||
129 | root_lock = qdisc_root_lock(q); | ||
149 | 130 | ||
150 | /* And release queue */ | 131 | /* And release qdisc */ |
151 | spin_unlock(&dev->queue_lock); | 132 | spin_unlock(root_lock); |
152 | 133 | ||
153 | HARD_TX_LOCK(dev, smp_processor_id()); | 134 | dev = qdisc_dev(q); |
135 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | ||
136 | |||
137 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | ||
154 | if (!netif_subqueue_stopped(dev, skb)) | 138 | if (!netif_subqueue_stopped(dev, skb)) |
155 | ret = dev_hard_start_xmit(skb, dev); | 139 | ret = dev_hard_start_xmit(skb, dev, txq); |
156 | HARD_TX_UNLOCK(dev); | 140 | HARD_TX_UNLOCK(dev, txq); |
157 | 141 | ||
158 | spin_lock(&dev->queue_lock); | 142 | spin_lock(root_lock); |
159 | q = dev->qdisc; | ||
160 | 143 | ||
161 | switch (ret) { | 144 | switch (ret) { |
162 | case NETDEV_TX_OK: | 145 | case NETDEV_TX_OK: |
@@ -166,7 +149,7 @@ static inline int qdisc_restart(struct net_device *dev) | |||
166 | 149 | ||
167 | case NETDEV_TX_LOCKED: | 150 | case NETDEV_TX_LOCKED: |
168 | /* Driver try lock failed */ | 151 | /* Driver try lock failed */ |
169 | ret = handle_dev_cpu_collision(skb, dev, q); | 152 | ret = handle_dev_cpu_collision(skb, txq, q); |
170 | break; | 153 | break; |
171 | 154 | ||
172 | default: | 155 | default: |
@@ -175,33 +158,33 @@ static inline int qdisc_restart(struct net_device *dev) | |||
175 | printk(KERN_WARNING "BUG %s code %d qlen %d\n", | 158 | printk(KERN_WARNING "BUG %s code %d qlen %d\n", |
176 | dev->name, ret, q->q.qlen); | 159 | dev->name, ret, q->q.qlen); |
177 | 160 | ||
178 | ret = dev_requeue_skb(skb, dev, q); | 161 | ret = dev_requeue_skb(skb, q); |
179 | break; | 162 | break; |
180 | } | 163 | } |
181 | 164 | ||
165 | if (ret && netif_tx_queue_stopped(txq)) | ||
166 | ret = 0; | ||
167 | |||
182 | return ret; | 168 | return ret; |
183 | } | 169 | } |
184 | 170 | ||
185 | void __qdisc_run(struct net_device *dev) | 171 | void __qdisc_run(struct Qdisc *q) |
186 | { | 172 | { |
187 | unsigned long start_time = jiffies; | 173 | unsigned long start_time = jiffies; |
188 | 174 | ||
189 | while (qdisc_restart(dev)) { | 175 | while (qdisc_restart(q)) { |
190 | if (netif_queue_stopped(dev)) | ||
191 | break; | ||
192 | |||
193 | /* | 176 | /* |
194 | * Postpone processing if | 177 | * Postpone processing if |
195 | * 1. another process needs the CPU; | 178 | * 1. another process needs the CPU; |
196 | * 2. we've been doing it for too long. | 179 | * 2. we've been doing it for too long. |
197 | */ | 180 | */ |
198 | if (need_resched() || jiffies != start_time) { | 181 | if (need_resched() || jiffies != start_time) { |
199 | netif_schedule(dev); | 182 | __netif_schedule(q); |
200 | break; | 183 | break; |
201 | } | 184 | } |
202 | } | 185 | } |
203 | 186 | ||
204 | clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); | 187 | clear_bit(__QDISC_STATE_RUNNING, &q->state); |
205 | } | 188 | } |
206 | 189 | ||
207 | static void dev_watchdog(unsigned long arg) | 190 | static void dev_watchdog(unsigned long arg) |
@@ -209,19 +192,35 @@ static void dev_watchdog(unsigned long arg) | |||
209 | struct net_device *dev = (struct net_device *)arg; | 192 | struct net_device *dev = (struct net_device *)arg; |
210 | 193 | ||
211 | netif_tx_lock(dev); | 194 | netif_tx_lock(dev); |
212 | if (dev->qdisc != &noop_qdisc) { | 195 | if (!qdisc_tx_is_noop(dev)) { |
213 | if (netif_device_present(dev) && | 196 | if (netif_device_present(dev) && |
214 | netif_running(dev) && | 197 | netif_running(dev) && |
215 | netif_carrier_ok(dev)) { | 198 | netif_carrier_ok(dev)) { |
216 | if (netif_queue_stopped(dev) && | 199 | int some_queue_stopped = 0; |
217 | time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { | 200 | unsigned int i; |
201 | |||
202 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
203 | struct netdev_queue *txq; | ||
204 | |||
205 | txq = netdev_get_tx_queue(dev, i); | ||
206 | if (netif_tx_queue_stopped(txq)) { | ||
207 | some_queue_stopped = 1; | ||
208 | break; | ||
209 | } | ||
210 | } | ||
218 | 211 | ||
219 | printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", | 212 | if (some_queue_stopped && |
213 | time_after(jiffies, (dev->trans_start + | ||
214 | dev->watchdog_timeo))) { | ||
215 | printk(KERN_INFO "NETDEV WATCHDOG: %s: " | ||
216 | "transmit timed out\n", | ||
220 | dev->name); | 217 | dev->name); |
221 | dev->tx_timeout(dev); | 218 | dev->tx_timeout(dev); |
222 | WARN_ON_ONCE(1); | 219 | WARN_ON_ONCE(1); |
223 | } | 220 | } |
224 | if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) | 221 | if (!mod_timer(&dev->watchdog_timer, |
222 | round_jiffies(jiffies + | ||
223 | dev->watchdog_timeo))) | ||
225 | dev_hold(dev); | 224 | dev_hold(dev); |
226 | } | 225 | } |
227 | } | 226 | } |
@@ -317,12 +316,18 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { | |||
317 | .owner = THIS_MODULE, | 316 | .owner = THIS_MODULE, |
318 | }; | 317 | }; |
319 | 318 | ||
319 | static struct netdev_queue noop_netdev_queue = { | ||
320 | .qdisc = &noop_qdisc, | ||
321 | }; | ||
322 | |||
320 | struct Qdisc noop_qdisc = { | 323 | struct Qdisc noop_qdisc = { |
321 | .enqueue = noop_enqueue, | 324 | .enqueue = noop_enqueue, |
322 | .dequeue = noop_dequeue, | 325 | .dequeue = noop_dequeue, |
323 | .flags = TCQ_F_BUILTIN, | 326 | .flags = TCQ_F_BUILTIN, |
324 | .ops = &noop_qdisc_ops, | 327 | .ops = &noop_qdisc_ops, |
325 | .list = LIST_HEAD_INIT(noop_qdisc.list), | 328 | .list = LIST_HEAD_INIT(noop_qdisc.list), |
329 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), | ||
330 | .dev_queue = &noop_netdev_queue, | ||
326 | }; | 331 | }; |
327 | EXPORT_SYMBOL(noop_qdisc); | 332 | EXPORT_SYMBOL(noop_qdisc); |
328 | 333 | ||
@@ -335,112 +340,65 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { | |||
335 | .owner = THIS_MODULE, | 340 | .owner = THIS_MODULE, |
336 | }; | 341 | }; |
337 | 342 | ||
343 | static struct Qdisc noqueue_qdisc; | ||
344 | static struct netdev_queue noqueue_netdev_queue = { | ||
345 | .qdisc = &noqueue_qdisc, | ||
346 | }; | ||
347 | |||
338 | static struct Qdisc noqueue_qdisc = { | 348 | static struct Qdisc noqueue_qdisc = { |
339 | .enqueue = NULL, | 349 | .enqueue = NULL, |
340 | .dequeue = noop_dequeue, | 350 | .dequeue = noop_dequeue, |
341 | .flags = TCQ_F_BUILTIN, | 351 | .flags = TCQ_F_BUILTIN, |
342 | .ops = &noqueue_qdisc_ops, | 352 | .ops = &noqueue_qdisc_ops, |
343 | .list = LIST_HEAD_INIT(noqueue_qdisc.list), | 353 | .list = LIST_HEAD_INIT(noqueue_qdisc.list), |
354 | .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), | ||
355 | .dev_queue = &noqueue_netdev_queue, | ||
344 | }; | 356 | }; |
345 | 357 | ||
346 | 358 | ||
347 | static const u8 prio2band[TC_PRIO_MAX+1] = | 359 | static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) |
348 | { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; | ||
349 | |||
350 | /* 3-band FIFO queue: old style, but should be a bit faster than | ||
351 | generic prio+fifo combination. | ||
352 | */ | ||
353 | |||
354 | #define PFIFO_FAST_BANDS 3 | ||
355 | |||
356 | static inline struct sk_buff_head *prio2list(struct sk_buff *skb, | ||
357 | struct Qdisc *qdisc) | ||
358 | { | 360 | { |
359 | struct sk_buff_head *list = qdisc_priv(qdisc); | 361 | struct sk_buff_head *list = &qdisc->q; |
360 | return list + prio2band[skb->priority & TC_PRIO_MAX]; | ||
361 | } | ||
362 | |||
363 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | ||
364 | { | ||
365 | struct sk_buff_head *list = prio2list(skb, qdisc); | ||
366 | 362 | ||
367 | if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { | 363 | if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) |
368 | qdisc->q.qlen++; | ||
369 | return __qdisc_enqueue_tail(skb, qdisc, list); | 364 | return __qdisc_enqueue_tail(skb, qdisc, list); |
370 | } | ||
371 | 365 | ||
372 | return qdisc_drop(skb, qdisc); | 366 | return qdisc_drop(skb, qdisc); |
373 | } | 367 | } |
374 | 368 | ||
375 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | 369 | static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc) |
376 | { | 370 | { |
377 | int prio; | 371 | struct sk_buff_head *list = &qdisc->q; |
378 | struct sk_buff_head *list = qdisc_priv(qdisc); | ||
379 | 372 | ||
380 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { | 373 | if (!skb_queue_empty(list)) |
381 | if (!skb_queue_empty(list + prio)) { | 374 | return __qdisc_dequeue_head(qdisc, list); |
382 | qdisc->q.qlen--; | ||
383 | return __qdisc_dequeue_head(qdisc, list + prio); | ||
384 | } | ||
385 | } | ||
386 | 375 | ||
387 | return NULL; | 376 | return NULL; |
388 | } | 377 | } |
389 | 378 | ||
390 | static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) | 379 | static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) |
391 | { | 380 | { |
392 | qdisc->q.qlen++; | 381 | return __qdisc_requeue(skb, qdisc, &qdisc->q); |
393 | return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); | ||
394 | } | 382 | } |
395 | 383 | ||
396 | static void pfifo_fast_reset(struct Qdisc* qdisc) | 384 | static void fifo_fast_reset(struct Qdisc* qdisc) |
397 | { | 385 | { |
398 | int prio; | 386 | __qdisc_reset_queue(qdisc, &qdisc->q); |
399 | struct sk_buff_head *list = qdisc_priv(qdisc); | ||
400 | |||
401 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) | ||
402 | __qdisc_reset_queue(qdisc, list + prio); | ||
403 | |||
404 | qdisc->qstats.backlog = 0; | 387 | qdisc->qstats.backlog = 0; |
405 | qdisc->q.qlen = 0; | ||
406 | } | ||
407 | |||
408 | static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) | ||
409 | { | ||
410 | struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; | ||
411 | |||
412 | memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); | ||
413 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
414 | return skb->len; | ||
415 | |||
416 | nla_put_failure: | ||
417 | return -1; | ||
418 | } | 388 | } |
419 | 389 | ||
420 | static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) | 390 | static struct Qdisc_ops fifo_fast_ops __read_mostly = { |
421 | { | 391 | .id = "fifo_fast", |
422 | int prio; | 392 | .priv_size = 0, |
423 | struct sk_buff_head *list = qdisc_priv(qdisc); | 393 | .enqueue = fifo_fast_enqueue, |
424 | 394 | .dequeue = fifo_fast_dequeue, | |
425 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) | 395 | .requeue = fifo_fast_requeue, |
426 | skb_queue_head_init(list + prio); | 396 | .reset = fifo_fast_reset, |
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | static struct Qdisc_ops pfifo_fast_ops __read_mostly = { | ||
432 | .id = "pfifo_fast", | ||
433 | .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), | ||
434 | .enqueue = pfifo_fast_enqueue, | ||
435 | .dequeue = pfifo_fast_dequeue, | ||
436 | .requeue = pfifo_fast_requeue, | ||
437 | .init = pfifo_fast_init, | ||
438 | .reset = pfifo_fast_reset, | ||
439 | .dump = pfifo_fast_dump, | ||
440 | .owner = THIS_MODULE, | 397 | .owner = THIS_MODULE, |
441 | }; | 398 | }; |
442 | 399 | ||
443 | struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) | 400 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
401 | struct Qdisc_ops *ops) | ||
444 | { | 402 | { |
445 | void *p; | 403 | void *p; |
446 | struct Qdisc *sch; | 404 | struct Qdisc *sch; |
@@ -462,8 +420,8 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) | |||
462 | sch->ops = ops; | 420 | sch->ops = ops; |
463 | sch->enqueue = ops->enqueue; | 421 | sch->enqueue = ops->enqueue; |
464 | sch->dequeue = ops->dequeue; | 422 | sch->dequeue = ops->dequeue; |
465 | sch->dev = dev; | 423 | sch->dev_queue = dev_queue; |
466 | dev_hold(dev); | 424 | dev_hold(qdisc_dev(sch)); |
467 | atomic_set(&sch->refcnt, 1); | 425 | atomic_set(&sch->refcnt, 1); |
468 | 426 | ||
469 | return sch; | 427 | return sch; |
@@ -471,15 +429,16 @@ errout: | |||
471 | return ERR_PTR(err); | 429 | return ERR_PTR(err); |
472 | } | 430 | } |
473 | 431 | ||
474 | struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, | 432 | struct Qdisc * qdisc_create_dflt(struct net_device *dev, |
433 | struct netdev_queue *dev_queue, | ||
434 | struct Qdisc_ops *ops, | ||
475 | unsigned int parentid) | 435 | unsigned int parentid) |
476 | { | 436 | { |
477 | struct Qdisc *sch; | 437 | struct Qdisc *sch; |
478 | 438 | ||
479 | sch = qdisc_alloc(dev, ops); | 439 | sch = qdisc_alloc(dev_queue, ops); |
480 | if (IS_ERR(sch)) | 440 | if (IS_ERR(sch)) |
481 | goto errout; | 441 | goto errout; |
482 | sch->stats_lock = &dev->queue_lock; | ||
483 | sch->parent = parentid; | 442 | sch->parent = parentid; |
484 | 443 | ||
485 | if (!ops->init || ops->init(sch, NULL) == 0) | 444 | if (!ops->init || ops->init(sch, NULL) == 0) |
@@ -491,7 +450,7 @@ errout: | |||
491 | } | 450 | } |
492 | EXPORT_SYMBOL(qdisc_create_dflt); | 451 | EXPORT_SYMBOL(qdisc_create_dflt); |
493 | 452 | ||
494 | /* Under dev->queue_lock and BH! */ | 453 | /* Under qdisc_root_lock(qdisc) and BH! */ |
495 | 454 | ||
496 | void qdisc_reset(struct Qdisc *qdisc) | 455 | void qdisc_reset(struct Qdisc *qdisc) |
497 | { | 456 | { |
@@ -508,86 +467,164 @@ EXPORT_SYMBOL(qdisc_reset); | |||
508 | static void __qdisc_destroy(struct rcu_head *head) | 467 | static void __qdisc_destroy(struct rcu_head *head) |
509 | { | 468 | { |
510 | struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); | 469 | struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); |
470 | const struct Qdisc_ops *ops = qdisc->ops; | ||
471 | |||
472 | #ifdef CONFIG_NET_SCHED | ||
473 | qdisc_put_stab(qdisc->stab); | ||
474 | #endif | ||
475 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | ||
476 | if (ops->reset) | ||
477 | ops->reset(qdisc); | ||
478 | if (ops->destroy) | ||
479 | ops->destroy(qdisc); | ||
480 | |||
481 | module_put(ops->owner); | ||
482 | dev_put(qdisc_dev(qdisc)); | ||
483 | |||
484 | kfree_skb(qdisc->gso_skb); | ||
485 | |||
511 | kfree((char *) qdisc - qdisc->padded); | 486 | kfree((char *) qdisc - qdisc->padded); |
512 | } | 487 | } |
513 | 488 | ||
514 | /* Under dev->queue_lock and BH! */ | 489 | /* Under qdisc_root_lock(qdisc) and BH! */ |
515 | 490 | ||
516 | void qdisc_destroy(struct Qdisc *qdisc) | 491 | void qdisc_destroy(struct Qdisc *qdisc) |
517 | { | 492 | { |
518 | const struct Qdisc_ops *ops = qdisc->ops; | ||
519 | |||
520 | if (qdisc->flags & TCQ_F_BUILTIN || | 493 | if (qdisc->flags & TCQ_F_BUILTIN || |
521 | !atomic_dec_and_test(&qdisc->refcnt)) | 494 | !atomic_dec_and_test(&qdisc->refcnt)) |
522 | return; | 495 | return; |
523 | 496 | ||
524 | list_del(&qdisc->list); | 497 | if (qdisc->parent) |
525 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | 498 | list_del(&qdisc->list); |
526 | if (ops->reset) | ||
527 | ops->reset(qdisc); | ||
528 | if (ops->destroy) | ||
529 | ops->destroy(qdisc); | ||
530 | 499 | ||
531 | module_put(ops->owner); | ||
532 | dev_put(qdisc->dev); | ||
533 | call_rcu(&qdisc->q_rcu, __qdisc_destroy); | 500 | call_rcu(&qdisc->q_rcu, __qdisc_destroy); |
534 | } | 501 | } |
535 | EXPORT_SYMBOL(qdisc_destroy); | 502 | EXPORT_SYMBOL(qdisc_destroy); |
536 | 503 | ||
504 | static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) | ||
505 | { | ||
506 | unsigned int i; | ||
507 | |||
508 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
509 | struct netdev_queue *txq = netdev_get_tx_queue(dev, i); | ||
510 | |||
511 | if (txq->qdisc_sleeping != &noop_qdisc) | ||
512 | return false; | ||
513 | } | ||
514 | return true; | ||
515 | } | ||
516 | |||
517 | static void attach_one_default_qdisc(struct net_device *dev, | ||
518 | struct netdev_queue *dev_queue, | ||
519 | void *_unused) | ||
520 | { | ||
521 | struct Qdisc *qdisc; | ||
522 | |||
523 | if (dev->tx_queue_len) { | ||
524 | qdisc = qdisc_create_dflt(dev, dev_queue, | ||
525 | &fifo_fast_ops, TC_H_ROOT); | ||
526 | if (!qdisc) { | ||
527 | printk(KERN_INFO "%s: activation failed\n", dev->name); | ||
528 | return; | ||
529 | } | ||
530 | } else { | ||
531 | qdisc = &noqueue_qdisc; | ||
532 | } | ||
533 | dev_queue->qdisc_sleeping = qdisc; | ||
534 | } | ||
535 | |||
536 | static void transition_one_qdisc(struct net_device *dev, | ||
537 | struct netdev_queue *dev_queue, | ||
538 | void *_need_watchdog) | ||
539 | { | ||
540 | struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; | ||
541 | int *need_watchdog_p = _need_watchdog; | ||
542 | |||
543 | rcu_assign_pointer(dev_queue->qdisc, new_qdisc); | ||
544 | if (new_qdisc != &noqueue_qdisc) | ||
545 | *need_watchdog_p = 1; | ||
546 | } | ||
547 | |||
537 | void dev_activate(struct net_device *dev) | 548 | void dev_activate(struct net_device *dev) |
538 | { | 549 | { |
550 | int need_watchdog; | ||
551 | |||
539 | /* No queueing discipline is attached to device; | 552 | /* No queueing discipline is attached to device; |
540 | create default one i.e. pfifo_fast for devices, | 553 | * create default one i.e. fifo_fast for devices, |
541 | which need queueing and noqueue_qdisc for | 554 | * which need queueing and noqueue_qdisc for |
542 | virtual interfaces | 555 | * virtual interfaces. |
543 | */ | 556 | */ |
544 | 557 | ||
545 | if (dev->qdisc_sleeping == &noop_qdisc) { | 558 | if (dev_all_qdisc_sleeping_noop(dev)) |
546 | struct Qdisc *qdisc; | 559 | netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); |
547 | if (dev->tx_queue_len) { | ||
548 | qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, | ||
549 | TC_H_ROOT); | ||
550 | if (qdisc == NULL) { | ||
551 | printk(KERN_INFO "%s: activation failed\n", dev->name); | ||
552 | return; | ||
553 | } | ||
554 | list_add_tail(&qdisc->list, &dev->qdisc_list); | ||
555 | } else { | ||
556 | qdisc = &noqueue_qdisc; | ||
557 | } | ||
558 | dev->qdisc_sleeping = qdisc; | ||
559 | } | ||
560 | 560 | ||
561 | if (!netif_carrier_ok(dev)) | 561 | if (!netif_carrier_ok(dev)) |
562 | /* Delay activation until next carrier-on event */ | 562 | /* Delay activation until next carrier-on event */ |
563 | return; | 563 | return; |
564 | 564 | ||
565 | spin_lock_bh(&dev->queue_lock); | 565 | need_watchdog = 0; |
566 | rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); | 566 | netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); |
567 | if (dev->qdisc != &noqueue_qdisc) { | 567 | |
568 | if (need_watchdog) { | ||
568 | dev->trans_start = jiffies; | 569 | dev->trans_start = jiffies; |
569 | dev_watchdog_up(dev); | 570 | dev_watchdog_up(dev); |
570 | } | 571 | } |
571 | spin_unlock_bh(&dev->queue_lock); | ||
572 | } | 572 | } |
573 | 573 | ||
574 | void dev_deactivate(struct net_device *dev) | 574 | static void dev_deactivate_queue(struct net_device *dev, |
575 | struct netdev_queue *dev_queue, | ||
576 | void *_qdisc_default) | ||
575 | { | 577 | { |
578 | struct Qdisc *qdisc_default = _qdisc_default; | ||
579 | struct sk_buff *skb = NULL; | ||
576 | struct Qdisc *qdisc; | 580 | struct Qdisc *qdisc; |
577 | struct sk_buff *skb; | ||
578 | int running; | ||
579 | 581 | ||
580 | spin_lock_bh(&dev->queue_lock); | 582 | qdisc = dev_queue->qdisc; |
581 | qdisc = dev->qdisc; | 583 | if (qdisc) { |
582 | dev->qdisc = &noop_qdisc; | 584 | spin_lock_bh(qdisc_lock(qdisc)); |
583 | 585 | ||
584 | qdisc_reset(qdisc); | 586 | dev_queue->qdisc = qdisc_default; |
587 | qdisc_reset(qdisc); | ||
585 | 588 | ||
586 | skb = dev->gso_skb; | 589 | spin_unlock_bh(qdisc_lock(qdisc)); |
587 | dev->gso_skb = NULL; | 590 | } |
588 | spin_unlock_bh(&dev->queue_lock); | ||
589 | 591 | ||
590 | kfree_skb(skb); | 592 | kfree_skb(skb); |
593 | } | ||
594 | |||
595 | static bool some_qdisc_is_running(struct net_device *dev, int lock) | ||
596 | { | ||
597 | unsigned int i; | ||
598 | |||
599 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
600 | struct netdev_queue *dev_queue; | ||
601 | spinlock_t *root_lock; | ||
602 | struct Qdisc *q; | ||
603 | int val; | ||
604 | |||
605 | dev_queue = netdev_get_tx_queue(dev, i); | ||
606 | q = dev_queue->qdisc; | ||
607 | root_lock = qdisc_root_lock(q); | ||
608 | |||
609 | if (lock) | ||
610 | spin_lock_bh(root_lock); | ||
611 | |||
612 | val = test_bit(__QDISC_STATE_RUNNING, &q->state); | ||
613 | |||
614 | if (lock) | ||
615 | spin_unlock_bh(root_lock); | ||
616 | |||
617 | if (val) | ||
618 | return true; | ||
619 | } | ||
620 | return false; | ||
621 | } | ||
622 | |||
623 | void dev_deactivate(struct net_device *dev) | ||
624 | { | ||
625 | bool running; | ||
626 | |||
627 | netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); | ||
591 | 628 | ||
592 | dev_watchdog_down(dev); | 629 | dev_watchdog_down(dev); |
593 | 630 | ||
@@ -596,16 +633,14 @@ void dev_deactivate(struct net_device *dev) | |||
596 | 633 | ||
597 | /* Wait for outstanding qdisc_run calls. */ | 634 | /* Wait for outstanding qdisc_run calls. */ |
598 | do { | 635 | do { |
599 | while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) | 636 | while (some_qdisc_is_running(dev, 0)) |
600 | yield(); | 637 | yield(); |
601 | 638 | ||
602 | /* | 639 | /* |
603 | * Double-check inside queue lock to ensure that all effects | 640 | * Double-check inside queue lock to ensure that all effects |
604 | * of the queue run are visible when we return. | 641 | * of the queue run are visible when we return. |
605 | */ | 642 | */ |
606 | spin_lock_bh(&dev->queue_lock); | 643 | running = some_qdisc_is_running(dev, 1); |
607 | running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); | ||
608 | spin_unlock_bh(&dev->queue_lock); | ||
609 | 644 | ||
610 | /* | 645 | /* |
611 | * The running flag should never be set at this point because | 646 | * The running flag should never be set at this point because |
@@ -618,32 +653,46 @@ void dev_deactivate(struct net_device *dev) | |||
618 | } while (WARN_ON_ONCE(running)); | 653 | } while (WARN_ON_ONCE(running)); |
619 | } | 654 | } |
620 | 655 | ||
656 | static void dev_init_scheduler_queue(struct net_device *dev, | ||
657 | struct netdev_queue *dev_queue, | ||
658 | void *_qdisc) | ||
659 | { | ||
660 | struct Qdisc *qdisc = _qdisc; | ||
661 | |||
662 | dev_queue->qdisc = qdisc; | ||
663 | dev_queue->qdisc_sleeping = qdisc; | ||
664 | } | ||
665 | |||
621 | void dev_init_scheduler(struct net_device *dev) | 666 | void dev_init_scheduler(struct net_device *dev) |
622 | { | 667 | { |
623 | qdisc_lock_tree(dev); | 668 | netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); |
624 | dev->qdisc = &noop_qdisc; | 669 | dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); |
625 | dev->qdisc_sleeping = &noop_qdisc; | ||
626 | INIT_LIST_HEAD(&dev->qdisc_list); | ||
627 | qdisc_unlock_tree(dev); | ||
628 | 670 | ||
629 | setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); | 671 | setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); |
630 | } | 672 | } |
631 | 673 | ||
632 | void dev_shutdown(struct net_device *dev) | 674 | static void shutdown_scheduler_queue(struct net_device *dev, |
675 | struct netdev_queue *dev_queue, | ||
676 | void *_qdisc_default) | ||
633 | { | 677 | { |
634 | struct Qdisc *qdisc; | 678 | struct Qdisc *qdisc = dev_queue->qdisc_sleeping; |
679 | struct Qdisc *qdisc_default = _qdisc_default; | ||
635 | 680 | ||
636 | qdisc_lock_tree(dev); | 681 | if (qdisc) { |
637 | qdisc = dev->qdisc_sleeping; | 682 | spinlock_t *root_lock = qdisc_root_lock(qdisc); |
638 | dev->qdisc = &noop_qdisc; | 683 | |
639 | dev->qdisc_sleeping = &noop_qdisc; | 684 | dev_queue->qdisc = qdisc_default; |
640 | qdisc_destroy(qdisc); | 685 | dev_queue->qdisc_sleeping = qdisc_default; |
641 | #if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) | 686 | |
642 | if ((qdisc = dev->qdisc_ingress) != NULL) { | 687 | spin_lock(root_lock); |
643 | dev->qdisc_ingress = NULL; | ||
644 | qdisc_destroy(qdisc); | 688 | qdisc_destroy(qdisc); |
689 | spin_unlock(root_lock); | ||
645 | } | 690 | } |
646 | #endif | 691 | } |
692 | |||
693 | void dev_shutdown(struct net_device *dev) | ||
694 | { | ||
695 | netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); | ||
696 | shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); | ||
647 | BUG_TRAP(!timer_pending(&dev->watchdog_timer)); | 697 | BUG_TRAP(!timer_pending(&dev->watchdog_timer)); |
648 | qdisc_unlock_tree(dev); | ||
649 | } | 698 | } |