diff options
author | Jamal Hadi Salim <hadi@cyberus.ca> | 2007-06-10 20:31:24 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-07-11 01:06:16 -0400 |
commit | c716a81ab946c68a8d84022ee32eb14674e72650 (patch) | |
tree | be9e428265d85a605ee0fe003fab7c14d516ef61 /net | |
parent | 05646c91109bfd129361d57dc5d98464ab6f6578 (diff) |
[NET_SCHED]: Cleanup readability of qdisc restart
Over the years this code has gotten hairier. Resulting in many long
discussions over long summer days and patches that get it wrong.
This patch helps tame that code so normal people will understand it.
Thanks to Thomas Graf, Peter J. waskiewicz Jr, and Patrick McHardy
for their valuable reviews.
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/sched/sch_generic.c | 199 |
1 files changed, 109 insertions, 90 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f4d34480a093..9461e8ae0529 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -34,6 +34,9 @@ | |||
34 | #include <net/sock.h> | 34 | #include <net/sock.h> |
35 | #include <net/pkt_sched.h> | 35 | #include <net/pkt_sched.h> |
36 | 36 | ||
37 | #define SCHED_TX_DROP -2 | ||
38 | #define SCHED_TX_QUEUE -3 | ||
39 | |||
37 | /* Main transmission queue. */ | 40 | /* Main transmission queue. */ |
38 | 41 | ||
39 | /* Modifications to data participating in scheduling must be protected with | 42 | /* Modifications to data participating in scheduling must be protected with |
@@ -59,7 +62,74 @@ void qdisc_unlock_tree(struct net_device *dev) | |||
59 | spin_unlock_bh(&dev->queue_lock); | 62 | spin_unlock_bh(&dev->queue_lock); |
60 | } | 63 | } |
61 | 64 | ||
65 | static inline int qdisc_qlen(struct Qdisc *q) | ||
66 | { | ||
67 | BUG_ON((int) q->q.qlen < 0); | ||
68 | return q->q.qlen; | ||
69 | } | ||
70 | |||
71 | static inline int handle_dev_cpu_collision(struct net_device *dev) | ||
72 | { | ||
73 | if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { | ||
74 | if (net_ratelimit()) | ||
75 | printk(KERN_WARNING | ||
76 | "Dead loop on netdevice %s, fix it urgently!\n", | ||
77 | dev->name); | ||
78 | return SCHED_TX_DROP; | ||
79 | } | ||
80 | __get_cpu_var(netdev_rx_stat).cpu_collision++; | ||
81 | return SCHED_TX_QUEUE; | ||
82 | } | ||
83 | |||
84 | static inline int | ||
85 | do_dev_requeue(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) | ||
86 | { | ||
87 | |||
88 | if (unlikely(skb->next)) | ||
89 | dev->gso_skb = skb; | ||
90 | else | ||
91 | q->ops->requeue(skb, q); | ||
92 | /* XXX: Could netif_schedule fail? Or is the fact we are | ||
93 | * requeueing imply the hardware path is closed | ||
94 | * and even if we fail, some interupt will wake us | ||
95 | */ | ||
96 | netif_schedule(dev); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static inline struct sk_buff * | ||
101 | try_get_tx_pkt(struct net_device *dev, struct Qdisc *q) | ||
102 | { | ||
103 | struct sk_buff *skb = dev->gso_skb; | ||
104 | |||
105 | if (skb) | ||
106 | dev->gso_skb = NULL; | ||
107 | else | ||
108 | skb = q->dequeue(q); | ||
109 | |||
110 | return skb; | ||
111 | } | ||
112 | |||
113 | static inline int | ||
114 | tx_islocked(struct sk_buff *skb, struct net_device *dev, struct Qdisc *q) | ||
115 | { | ||
116 | int ret = handle_dev_cpu_collision(dev); | ||
117 | |||
118 | if (ret == SCHED_TX_DROP) { | ||
119 | kfree_skb(skb); | ||
120 | return qdisc_qlen(q); | ||
121 | } | ||
122 | |||
123 | return do_dev_requeue(skb, dev, q); | ||
124 | } | ||
125 | |||
126 | |||
62 | /* | 127 | /* |
128 | NOTE: Called under dev->queue_lock with locally disabled BH. | ||
129 | |||
130 | __LINK_STATE_QDISC_RUNNING guarantees only one CPU | ||
131 | can enter this region at a time. | ||
132 | |||
63 | dev->queue_lock serializes queue accesses for this device | 133 | dev->queue_lock serializes queue accesses for this device |
64 | AND dev->qdisc pointer itself. | 134 | AND dev->qdisc pointer itself. |
65 | 135 | ||
@@ -67,116 +137,65 @@ void qdisc_unlock_tree(struct net_device *dev) | |||
67 | 137 | ||
68 | dev->queue_lock and netif_tx_lock are mutually exclusive, | 138 | dev->queue_lock and netif_tx_lock are mutually exclusive, |
69 | if one is grabbed, another must be free. | 139 | if one is grabbed, another must be free. |
70 | */ | ||
71 | 140 | ||
141 | Multiple CPUs may contend for the two locks. | ||
72 | 142 | ||
73 | /* Kick device. | 143 | Note, that this procedure can be called by a watchdog timer |
74 | 144 | ||
145 | Returns to the caller: | ||
75 | Returns: 0 - queue is empty or throttled. | 146 | Returns: 0 - queue is empty or throttled. |
76 | >0 - queue is not empty. | 147 | >0 - queue is not empty. |
77 | 148 | ||
78 | NOTE: Called under dev->queue_lock with locally disabled BH. | ||
79 | */ | 149 | */ |
80 | 150 | ||
81 | static inline int qdisc_restart(struct net_device *dev) | 151 | static inline int qdisc_restart(struct net_device *dev) |
82 | { | 152 | { |
83 | struct Qdisc *q = dev->qdisc; | 153 | struct Qdisc *q = dev->qdisc; |
154 | unsigned lockless = (dev->features & NETIF_F_LLTX); | ||
84 | struct sk_buff *skb; | 155 | struct sk_buff *skb; |
156 | int ret; | ||
85 | 157 | ||
86 | /* Dequeue packet */ | 158 | skb = try_get_tx_pkt(dev, q); |
87 | if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { | 159 | if (skb == NULL) |
88 | unsigned nolock = (dev->features & NETIF_F_LLTX); | 160 | return 0; |
89 | |||
90 | dev->gso_skb = NULL; | ||
91 | 161 | ||
92 | /* | 162 | /* we have a packet to send */ |
93 | * When the driver has LLTX set it does its own locking | 163 | if (!lockless) { |
94 | * in start_xmit. No need to add additional overhead by | 164 | if (!netif_tx_trylock(dev)) |
95 | * locking again. These checks are worth it because | 165 | return tx_islocked(skb, dev, q); |
96 | * even uncongested locks can be quite expensive. | 166 | } |
97 | * The driver can do trylock like here too, in case | 167 | /* all clear .. */ |
98 | * of lock congestion it should return -1 and the packet | 168 | spin_unlock(&dev->queue_lock); |
99 | * will be requeued. | ||
100 | */ | ||
101 | if (!nolock) { | ||
102 | if (!netif_tx_trylock(dev)) { | ||
103 | collision: | ||
104 | /* So, someone grabbed the driver. */ | ||
105 | |||
106 | /* It may be transient configuration error, | ||
107 | when hard_start_xmit() recurses. We detect | ||
108 | it by checking xmit owner and drop the | ||
109 | packet when deadloop is detected. | ||
110 | */ | ||
111 | if (dev->xmit_lock_owner == smp_processor_id()) { | ||
112 | kfree_skb(skb); | ||
113 | if (net_ratelimit()) | ||
114 | printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); | ||
115 | goto out; | ||
116 | } | ||
117 | __get_cpu_var(netdev_rx_stat).cpu_collision++; | ||
118 | goto requeue; | ||
119 | } | ||
120 | } | ||
121 | 169 | ||
122 | { | 170 | ret = NETDEV_TX_BUSY; |
123 | /* And release queue */ | 171 | if (!netif_queue_stopped(dev)) |
124 | spin_unlock(&dev->queue_lock); | 172 | /* churn baby churn .. */ |
125 | 173 | ret = dev_hard_start_xmit(skb, dev); | |
126 | if (!netif_queue_stopped(dev)) { | ||
127 | int ret; | ||
128 | |||
129 | ret = dev_hard_start_xmit(skb, dev); | ||
130 | if (ret == NETDEV_TX_OK) { | ||
131 | if (!nolock) { | ||
132 | netif_tx_unlock(dev); | ||
133 | } | ||
134 | spin_lock(&dev->queue_lock); | ||
135 | q = dev->qdisc; | ||
136 | goto out; | ||
137 | } | ||
138 | if (ret == NETDEV_TX_LOCKED && nolock) { | ||
139 | spin_lock(&dev->queue_lock); | ||
140 | q = dev->qdisc; | ||
141 | goto collision; | ||
142 | } | ||
143 | } | ||
144 | 174 | ||
145 | /* NETDEV_TX_BUSY - we need to requeue */ | 175 | if (!lockless) |
146 | /* Release the driver */ | 176 | netif_tx_unlock(dev); |
147 | if (!nolock) { | ||
148 | netif_tx_unlock(dev); | ||
149 | } | ||
150 | spin_lock(&dev->queue_lock); | ||
151 | q = dev->qdisc; | ||
152 | } | ||
153 | 177 | ||
154 | /* Device kicked us out :( | 178 | spin_lock(&dev->queue_lock); |
155 | This is possible in three cases: | ||
156 | |||
157 | 0. driver is locked | ||
158 | 1. fastroute is enabled | ||
159 | 2. device cannot determine busy state | ||
160 | before start of transmission (f.e. dialout) | ||
161 | 3. device is buggy (ppp) | ||
162 | */ | ||
163 | |||
164 | requeue: | ||
165 | if (unlikely(q == &noop_qdisc)) | ||
166 | kfree_skb(skb); | ||
167 | else if (skb->next) | ||
168 | dev->gso_skb = skb; | ||
169 | else | ||
170 | q->ops->requeue(skb, q); | ||
171 | netif_schedule(dev); | ||
172 | } | ||
173 | return 0; | ||
174 | 179 | ||
175 | out: | 180 | /* we need to refresh q because it may be invalid since |
176 | BUG_ON((int) q->q.qlen < 0); | 181 | * we dropped dev->queue_lock earlier ... |
177 | return q->q.qlen; | 182 | * So dont try to be clever grasshopper |
183 | */ | ||
184 | q = dev->qdisc; | ||
185 | /* most likely result, packet went ok */ | ||
186 | if (ret == NETDEV_TX_OK) | ||
187 | return qdisc_qlen(q); | ||
188 | /* only for lockless drivers .. */ | ||
189 | if (ret == NETDEV_TX_LOCKED && lockless) | ||
190 | return tx_islocked(skb, dev, q); | ||
191 | |||
192 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) | ||
193 | printk(KERN_WARNING " BUG %s code %d qlen %d\n",dev->name, ret, q->q.qlen); | ||
194 | |||
195 | return do_dev_requeue(skb, dev, q); | ||
178 | } | 196 | } |
179 | 197 | ||
198 | |||
180 | void __qdisc_run(struct net_device *dev) | 199 | void __qdisc_run(struct net_device *dev) |
181 | { | 200 | { |
182 | do { | 201 | do { |