diff options
Diffstat (limited to 'net/sched/sch_generic.c')
-rw-r--r-- | net/sched/sch_generic.c | 219 |
1 files changed, 115 insertions, 104 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f4d34480a093..c81649cf0b9e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -11,27 +11,19 @@ | |||
11 | * - Ingress support | 11 | * - Ingress support |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <asm/uaccess.h> | ||
15 | #include <asm/system.h> | ||
16 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
17 | #include <linux/module.h> | 15 | #include <linux/module.h> |
18 | #include <linux/types.h> | 16 | #include <linux/types.h> |
19 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
20 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
21 | #include <linux/string.h> | 19 | #include <linux/string.h> |
22 | #include <linux/mm.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/sockios.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
27 | #include <linux/interrupt.h> | ||
28 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
29 | #include <linux/skbuff.h> | 22 | #include <linux/skbuff.h> |
30 | #include <linux/rtnetlink.h> | 23 | #include <linux/rtnetlink.h> |
31 | #include <linux/init.h> | 24 | #include <linux/init.h> |
32 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
33 | #include <linux/list.h> | 26 | #include <linux/list.h> |
34 | #include <net/sock.h> | ||
35 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> |
36 | 28 | ||
37 | /* Main transmission queue. */ | 29 | /* Main transmission queue. */ |
@@ -59,122 +51,143 @@ void qdisc_unlock_tree(struct net_device *dev) | |||
59 | spin_unlock_bh(&dev->queue_lock); | 51 | spin_unlock_bh(&dev->queue_lock); |
60 | } | 52 | } |
61 | 53 | ||
62 | /* | 54 | static inline int qdisc_qlen(struct Qdisc *q) |
63 | dev->queue_lock serializes queue accesses for this device | 55 | { |
64 | AND dev->qdisc pointer itself. | 56 | return q->q.qlen; |
57 | } | ||
65 | 58 | ||
66 | netif_tx_lock serializes accesses to device driver. | 59 | static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, |
60 | struct Qdisc *q) | ||
61 | { | ||
62 | if (unlikely(skb->next)) | ||
63 | dev->gso_skb = skb; | ||
64 | else | ||
65 | q->ops->requeue(skb, q); | ||
67 | 66 | ||
68 | dev->queue_lock and netif_tx_lock are mutually exclusive, | 67 | netif_schedule(dev); |
69 | if one is grabbed, another must be free. | 68 | return 0; |
70 | */ | 69 | } |
71 | 70 | ||
71 | static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, | ||
72 | struct Qdisc *q) | ||
73 | { | ||
74 | struct sk_buff *skb; | ||
72 | 75 | ||
73 | /* Kick device. | 76 | if ((skb = dev->gso_skb)) |
77 | dev->gso_skb = NULL; | ||
78 | else | ||
79 | skb = q->dequeue(q); | ||
74 | 80 | ||
75 | Returns: 0 - queue is empty or throttled. | 81 | return skb; |
76 | >0 - queue is not empty. | 82 | } |
77 | 83 | ||
78 | NOTE: Called under dev->queue_lock with locally disabled BH. | 84 | static inline int handle_dev_cpu_collision(struct sk_buff *skb, |
79 | */ | 85 | struct net_device *dev, |
86 | struct Qdisc *q) | ||
87 | { | ||
88 | int ret; | ||
80 | 89 | ||
90 | if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { | ||
91 | /* | ||
92 | * Same CPU holding the lock. It may be a transient | ||
93 | * configuration error, when hard_start_xmit() recurses. We | ||
94 | * detect it by checking xmit owner and drop the packet when | ||
95 | * deadloop is detected. Return OK to try the next skb. | ||
96 | */ | ||
97 | kfree_skb(skb); | ||
98 | if (net_ratelimit()) | ||
99 | printk(KERN_WARNING "Dead loop on netdevice %s, " | ||
100 | "fix it urgently!\n", dev->name); | ||
101 | ret = qdisc_qlen(q); | ||
102 | } else { | ||
103 | /* | ||
104 | * Another cpu is holding lock, requeue & delay xmits for | ||
105 | * some time. | ||
106 | */ | ||
107 | __get_cpu_var(netdev_rx_stat).cpu_collision++; | ||
108 | ret = dev_requeue_skb(skb, dev, q); | ||
109 | } | ||
110 | |||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * NOTE: Called under dev->queue_lock with locally disabled BH. | ||
116 | * | ||
117 | * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this | ||
118 | * device at a time. dev->queue_lock serializes queue accesses for | ||
119 | * this device AND dev->qdisc pointer itself. | ||
120 | * | ||
121 | * netif_tx_lock serializes accesses to device driver. | ||
122 | * | ||
123 | * dev->queue_lock and netif_tx_lock are mutually exclusive, | ||
124 | * if one is grabbed, another must be free. | ||
125 | * | ||
126 | * Note, that this procedure can be called by a watchdog timer | ||
127 | * | ||
128 | * Returns to the caller: | ||
129 | * 0 - queue is empty or throttled. | ||
130 | * >0 - queue is not empty. | ||
131 | * | ||
132 | */ | ||
81 | static inline int qdisc_restart(struct net_device *dev) | 133 | static inline int qdisc_restart(struct net_device *dev) |
82 | { | 134 | { |
83 | struct Qdisc *q = dev->qdisc; | 135 | struct Qdisc *q = dev->qdisc; |
84 | struct sk_buff *skb; | 136 | struct sk_buff *skb; |
137 | unsigned lockless; | ||
138 | int ret; | ||
85 | 139 | ||
86 | /* Dequeue packet */ | 140 | /* Dequeue packet */ |
87 | if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { | 141 | if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) |
88 | unsigned nolock = (dev->features & NETIF_F_LLTX); | 142 | return 0; |
143 | |||
144 | /* | ||
145 | * When the driver has LLTX set, it does its own locking in | ||
146 | * start_xmit. These checks are worth it because even uncongested | ||
147 | * locks can be quite expensive. The driver can do a trylock, as | ||
148 | * is being done here; in case of lock contention it should return | ||
149 | * NETDEV_TX_LOCKED and the packet will be requeued. | ||
150 | */ | ||
151 | lockless = (dev->features & NETIF_F_LLTX); | ||
89 | 152 | ||
90 | dev->gso_skb = NULL; | 153 | if (!lockless && !netif_tx_trylock(dev)) { |
154 | /* Another CPU grabbed the driver tx lock */ | ||
155 | return handle_dev_cpu_collision(skb, dev, q); | ||
156 | } | ||
91 | 157 | ||
92 | /* | 158 | /* And release queue */ |
93 | * When the driver has LLTX set it does its own locking | 159 | spin_unlock(&dev->queue_lock); |
94 | * in start_xmit. No need to add additional overhead by | ||
95 | * locking again. These checks are worth it because | ||
96 | * even uncongested locks can be quite expensive. | ||
97 | * The driver can do trylock like here too, in case | ||
98 | * of lock congestion it should return -1 and the packet | ||
99 | * will be requeued. | ||
100 | */ | ||
101 | if (!nolock) { | ||
102 | if (!netif_tx_trylock(dev)) { | ||
103 | collision: | ||
104 | /* So, someone grabbed the driver. */ | ||
105 | |||
106 | /* It may be transient configuration error, | ||
107 | when hard_start_xmit() recurses. We detect | ||
108 | it by checking xmit owner and drop the | ||
109 | packet when deadloop is detected. | ||
110 | */ | ||
111 | if (dev->xmit_lock_owner == smp_processor_id()) { | ||
112 | kfree_skb(skb); | ||
113 | if (net_ratelimit()) | ||
114 | printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); | ||
115 | goto out; | ||
116 | } | ||
117 | __get_cpu_var(netdev_rx_stat).cpu_collision++; | ||
118 | goto requeue; | ||
119 | } | ||
120 | } | ||
121 | 160 | ||
122 | { | 161 | ret = dev_hard_start_xmit(skb, dev); |
123 | /* And release queue */ | ||
124 | spin_unlock(&dev->queue_lock); | ||
125 | |||
126 | if (!netif_queue_stopped(dev)) { | ||
127 | int ret; | ||
128 | |||
129 | ret = dev_hard_start_xmit(skb, dev); | ||
130 | if (ret == NETDEV_TX_OK) { | ||
131 | if (!nolock) { | ||
132 | netif_tx_unlock(dev); | ||
133 | } | ||
134 | spin_lock(&dev->queue_lock); | ||
135 | q = dev->qdisc; | ||
136 | goto out; | ||
137 | } | ||
138 | if (ret == NETDEV_TX_LOCKED && nolock) { | ||
139 | spin_lock(&dev->queue_lock); | ||
140 | q = dev->qdisc; | ||
141 | goto collision; | ||
142 | } | ||
143 | } | ||
144 | 162 | ||
145 | /* NETDEV_TX_BUSY - we need to requeue */ | 163 | if (!lockless) |
146 | /* Release the driver */ | 164 | netif_tx_unlock(dev); |
147 | if (!nolock) { | ||
148 | netif_tx_unlock(dev); | ||
149 | } | ||
150 | spin_lock(&dev->queue_lock); | ||
151 | q = dev->qdisc; | ||
152 | } | ||
153 | 165 | ||
154 | /* Device kicked us out :( | 166 | spin_lock(&dev->queue_lock); |
155 | This is possible in three cases: | 167 | q = dev->qdisc; |
156 | 168 | ||
157 | 0. driver is locked | 169 | switch (ret) { |
158 | 1. fastroute is enabled | 170 | case NETDEV_TX_OK: |
159 | 2. device cannot determine busy state | 171 | /* Driver sent out skb successfully */ |
160 | before start of transmission (f.e. dialout) | 172 | ret = qdisc_qlen(q); |
161 | 3. device is buggy (ppp) | 173 | break; |
162 | */ | ||
163 | 174 | ||
164 | requeue: | 175 | case NETDEV_TX_LOCKED: |
165 | if (unlikely(q == &noop_qdisc)) | 176 | /* Driver try lock failed */ |
166 | kfree_skb(skb); | 177 | ret = handle_dev_cpu_collision(skb, dev, q); |
167 | else if (skb->next) | 178 | break; |
168 | dev->gso_skb = skb; | 179 | |
169 | else | 180 | default: |
170 | q->ops->requeue(skb, q); | 181 | /* Driver returned NETDEV_TX_BUSY - requeue skb */ |
171 | netif_schedule(dev); | 182 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) |
183 | printk(KERN_WARNING "BUG %s code %d qlen %d\n", | ||
184 | dev->name, ret, q->q.qlen); | ||
185 | |||
186 | ret = dev_requeue_skb(skb, dev, q); | ||
187 | break; | ||
172 | } | 188 | } |
173 | return 0; | ||
174 | 189 | ||
175 | out: | 190 | return ret; |
176 | BUG_ON((int) q->q.qlen < 0); | ||
177 | return q->q.qlen; | ||
178 | } | 191 | } |
179 | 192 | ||
180 | void __qdisc_run(struct net_device *dev) | 193 | void __qdisc_run(struct net_device *dev) |
@@ -493,9 +506,7 @@ void qdisc_destroy(struct Qdisc *qdisc) | |||
493 | return; | 506 | return; |
494 | 507 | ||
495 | list_del(&qdisc->list); | 508 | list_del(&qdisc->list); |
496 | #ifdef CONFIG_NET_ESTIMATOR | ||
497 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | 509 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); |
498 | #endif | ||
499 | if (ops->reset) | 510 | if (ops->reset) |
500 | ops->reset(qdisc); | 511 | ops->reset(qdisc); |
501 | if (ops->destroy) | 512 | if (ops->destroy) |