aboutsummaryrefslogtreecommitdiffstats
path: root/net/atm
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2012-04-08 05:55:43 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-13 13:03:45 -0400
commit9d02daf754238adac48fa075ee79e7edd3d79ed3 (patch)
tree6edff9e75f53762ef0c201a9ae8e871a050dc07a /net/atm
parent39abbaef19cd0a30be93794aa4773c779c3eb1f3 (diff)
pppoatm: Fix excessive queue bloat
We discovered that PPPoATM has an excessively deep transmit queue. A queue the size of the default socket send buffer (wmem_default) is maintained between the PPP generic core and the ATM device. Fix it to queue a maximum of *two* packets. The one the ATM device is currently working on, and one more for the ATM driver to process immediately in its TX done interrupt handler. The PPP core is designed to feed packets to the channel with minimal latency, so that really ought to be enough to keep the ATM device busy. While we're at it, fix the fact that we were triggering the wakeup tasklet on *every* pppoatm_pop() call. The comment saying "this is inefficient, but doing it right is too hard" turns out to be overly pessimistic... I think :) On machines like the Traverse Geos, with a slow Geode CPU and two high-speed ADSL2+ interfaces, there were reports of extremely high CPU usage which could partly be attributed to the extra wakeups. (The wakeup handling could actually be made a whole lot easier if we stop checking sk->sk_sndbuf altogether. Given that we now only queue *two* packets ever, one wonders what the point is. As it is, you could already deadlock the thing by setting the sk_sndbuf to a value lower than the MTU of the device, and it'd just block for ever.) Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/atm')
-rw-r--r--net/atm/pppoatm.c95
1 files changed, 85 insertions, 10 deletions
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 614d3fc47ede..ce1e59fdae7b 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -62,12 +62,25 @@ struct pppoatm_vcc {
62 void (*old_pop)(struct atm_vcc *, struct sk_buff *); 62 void (*old_pop)(struct atm_vcc *, struct sk_buff *);
63 /* keep old push/pop for detaching */ 63 /* keep old push/pop for detaching */
64 enum pppoatm_encaps encaps; 64 enum pppoatm_encaps encaps;
65 atomic_t inflight;
66 unsigned long blocked;
65 int flags; /* SC_COMP_PROT - compress protocol */ 67 int flags; /* SC_COMP_PROT - compress protocol */
66 struct ppp_channel chan; /* interface to generic ppp layer */ 68 struct ppp_channel chan; /* interface to generic ppp layer */
67 struct tasklet_struct wakeup_tasklet; 69 struct tasklet_struct wakeup_tasklet;
68}; 70};
69 71
70/* 72/*
73 * We want to allow two packets in the queue. The one that's currently in
74 * flight, and *one* queued up ready for the ATM device to send immediately
75 * from its TX done IRQ. We want to be able to use atomic_inc_not_zero(), so
76 * inflight == -2 represents an empty queue, -1 one packet, and zero means
77 * there are two packets in the queue.
78 */
79#define NONE_INFLIGHT -2
80
81#define BLOCKED 0
82
83/*
71 * Header used for LLC Encapsulated PPP (4 bytes) followed by the LCP protocol 84 * Header used for LLC Encapsulated PPP (4 bytes) followed by the LCP protocol
72 * ID (0xC021) used in autodetection 85 * ID (0xC021) used in autodetection
73 */ 86 */
@@ -102,16 +115,30 @@ static void pppoatm_wakeup_sender(unsigned long arg)
102static void pppoatm_pop(struct atm_vcc *atmvcc, struct sk_buff *skb) 115static void pppoatm_pop(struct atm_vcc *atmvcc, struct sk_buff *skb)
103{ 116{
104 struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc); 117 struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
118
105 pvcc->old_pop(atmvcc, skb); 119 pvcc->old_pop(atmvcc, skb);
120 atomic_dec(&pvcc->inflight);
121
106 /* 122 /*
107 * We don't really always want to do this since it's 123 * We always used to run the wakeup tasklet unconditionally here, for
108 * really inefficient - it would be much better if we could 124 * fear of race conditions where we clear the BLOCKED flag just as we
109 * test if we had actually throttled the generic layer. 125 * refuse another packet in pppoatm_send(). This was quite inefficient.
110 * Unfortunately then there would be a nasty SMP race where 126 *
111 * we could clear that flag just as we refuse another packet. 127 * In fact it's OK. The PPP core will only ever call pppoatm_send()
112 * For now we do the safe thing. 128 * while holding the channel->downl lock. And ppp_output_wakeup() as
129 * called by the tasklet will *also* grab that lock. So even if another
130 * CPU is in pppoatm_send() right now, the tasklet isn't going to race
131 * with it. The wakeup *will* happen after the other CPU is safely out
132 * of pppoatm_send() again.
133 *
134 * So if the CPU in pppoatm_send() has already set the BLOCKED bit and
135 * it about to return, that's fine. We trigger a wakeup which will
136 * happen later. And if the CPU in pppoatm_send() *hasn't* set the
137 * BLOCKED bit yet, that's fine too because of the double check in
138 * pppoatm_may_send() which is commented there.
113 */ 139 */
114 tasklet_schedule(&pvcc->wakeup_tasklet); 140 if (test_and_clear_bit(BLOCKED, &pvcc->blocked))
141 tasklet_schedule(&pvcc->wakeup_tasklet);
115} 142}
116 143
117/* 144/*
@@ -184,6 +211,51 @@ error:
184 ppp_input_error(&pvcc->chan, 0); 211 ppp_input_error(&pvcc->chan, 0);
185} 212}
186 213
214static inline int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
215{
216 /*
217 * It's not clear that we need to bother with using atm_may_send()
218 * to check we don't exceed sk->sk_sndbuf. If userspace sets a
219 * value of sk_sndbuf which is lower than the MTU, we're going to
220 * block for ever. But the code always did that before we introduced
221 * the packet count limit, so...
222 */
223 if (atm_may_send(pvcc->atmvcc, size) &&
224 atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
225 return 1;
226
227 /*
228 * We use test_and_set_bit() rather than set_bit() here because
229 * we need to ensure there's a memory barrier after it. The bit
230 * *must* be set before we do the atomic_inc() on pvcc->inflight.
231 * There's no smp_mb__after_set_bit(), so it's this or abuse
232 * smp_mb__after_clear_bit().
233 */
234 test_and_set_bit(BLOCKED, &pvcc->blocked);
235
236 /*
237 * We may have raced with pppoatm_pop(). If it ran for the
238 * last packet in the queue, *just* before we set the BLOCKED
239 * bit, then it might never run again and the channel could
240 * remain permanently blocked. Cope with that race by checking
241 * *again*. If it did run in that window, we'll have space on
242 * the queue now and can return success. It's harmless to leave
243 * the BLOCKED flag set, since it's only used as a trigger to
244 * run the wakeup tasklet. Another wakeup will never hurt.
245 * If pppoatm_pop() is running but hasn't got as far as making
246 * space on the queue yet, then it hasn't checked the BLOCKED
247 * flag yet either, so we're safe in that case too. It'll issue
248 * an "immediate" wakeup... where "immediate" actually involves
249 * taking the PPP channel's ->downl lock, which is held by the
250 * code path that calls pppoatm_send(), and is thus going to
251 * wait for us to finish.
252 */
253 if (atm_may_send(pvcc->atmvcc, size) &&
254 atomic_inc_not_zero(&pvcc->inflight))
255 return 1;
256
257 return 0;
258}
187/* 259/*
188 * Called by the ppp_generic.c to send a packet - returns true if packet 260 * Called by the ppp_generic.c to send a packet - returns true if packet
189 * was accepted. If we return false, then it's our job to call 261 * was accepted. If we return false, then it's our job to call
@@ -207,7 +279,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
207 struct sk_buff *n; 279 struct sk_buff *n;
208 n = skb_realloc_headroom(skb, LLC_LEN); 280 n = skb_realloc_headroom(skb, LLC_LEN);
209 if (n != NULL && 281 if (n != NULL &&
210 !atm_may_send(pvcc->atmvcc, n->truesize)) { 282 !pppoatm_may_send(pvcc, n->truesize)) {
211 kfree_skb(n); 283 kfree_skb(n);
212 goto nospace; 284 goto nospace;
213 } 285 }
@@ -215,12 +287,12 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
215 skb = n; 287 skb = n;
216 if (skb == NULL) 288 if (skb == NULL)
217 return DROP_PACKET; 289 return DROP_PACKET;
218 } else if (!atm_may_send(pvcc->atmvcc, skb->truesize)) 290 } else if (!pppoatm_may_send(pvcc, skb->truesize))
219 goto nospace; 291 goto nospace;
220 memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN); 292 memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN);
221 break; 293 break;
222 case e_vc: 294 case e_vc:
223 if (!atm_may_send(pvcc->atmvcc, skb->truesize)) 295 if (!pppoatm_may_send(pvcc, skb->truesize))
224 goto nospace; 296 goto nospace;
225 break; 297 break;
226 case e_autodetect: 298 case e_autodetect:
@@ -285,6 +357,9 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
285 if (pvcc == NULL) 357 if (pvcc == NULL)
286 return -ENOMEM; 358 return -ENOMEM;
287 pvcc->atmvcc = atmvcc; 359 pvcc->atmvcc = atmvcc;
360
361 /* Maximum is zero, so that we can use atomic_inc_not_zero() */
362 atomic_set(&pvcc->inflight, NONE_INFLIGHT);
288 pvcc->old_push = atmvcc->push; 363 pvcc->old_push = atmvcc->push;
289 pvcc->old_pop = atmvcc->pop; 364 pvcc->old_pop = atmvcc->pop;
290 pvcc->encaps = (enum pppoatm_encaps) be.encaps; 365 pvcc->encaps = (enum pppoatm_encaps) be.encaps;