aboutsummaryrefslogtreecommitdiffstats
path: root/net/atm/br2684.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2012-11-25 07:06:52 -0500
committerDavid S. Miller <davem@davemloft.net>2012-11-26 17:13:56 -0500
commitae088d663beebb3cad0e7abaac67ee61a7c578d5 (patch)
tree5b46bae47a0cd42a04541b32b6da41f84345f6c6 /net/atm/br2684.c
parentb3422a314c27d2189c05b69cc1654b71315d3d21 (diff)
atm: br2684: Fix excessive queue bloat
There's really no excuse for an additional wmem_default of buffering between the netdev queue and the ATM device. Two packets (one in-flight, and one ready to send) ought to be fine. It's not as if it should take long to get another from the netdev queue when we need it. If necessary we can make the queue space configurable later, but I don't think it's likely to be necessary. cf. commit 9d02daf754238adac48fa075ee79e7edd3d79ed3 (pppoatm: Fix excessive queue bloat) which did something very similar for PPPoATM. Note that there is a tremendously unlikely race condition which may result in qspace temporarily going negative. If a CPU running the br2684_pop() function goes off into the weeds for a long period of time after incrementing qspace to 1, but before calling netdev_wake_queue()... and another CPU ends up calling br2684_start_xmit() and *stopping* the queue again before the first CPU comes back, the netdev queue could end up being woken when qspace has already reached zero. An alternative approach to coping with this race would be to check in br2684_start_xmit() for qspace==0 and return NETDEV_TX_BUSY, but just using '> 0' and '< 1' for comparison instead of '== 0' and '!= 0' is simpler. It just warranted a mention of *why* we do it that way... Move the call to atmvcc->send() to happen *after* the accounting and potentially stopping the netdev queue, in br2684_xmit_vcc(). This matters if the ->send() call suffers an immediate failure, because it'll call br2684_pop() with the offending skb before returning. We want that to happen *after* we've done the initial accounting for the packet in question. Also make it return an appropriate success/failure indication while we're at it. Tested by running 'ping -l 1000 bottomless.aaisp.net.uk' from within my network, with only a single PPPoE-over-BR2684 link running. And after setting txqueuelen on the nas0 interface to something low (5, in fact). Before the patch, we'd see about 15 packets being queued and a resulting latency of ~56ms being reached. After the patch, we see only about 8, which is fairly much what we expect. And a max latency of ~36ms. On this OpenWRT box, wmem_default is 163840. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Reviewed-by: Krzysztof Mazur <krzysiek@podlesie.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/atm/br2684.c')
-rw-r--r--net/atm/br2684.c36
1 files changed, 22 insertions, 14 deletions
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 4819d31533e0..8eb6fbe8d8dd 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -74,6 +74,7 @@ struct br2684_vcc {
74 struct br2684_filter filter; 74 struct br2684_filter filter;
75#endif /* CONFIG_ATM_BR2684_IPFILTER */ 75#endif /* CONFIG_ATM_BR2684_IPFILTER */
76 unsigned int copies_needed, copies_failed; 76 unsigned int copies_needed, copies_failed;
77 atomic_t qspace;
77}; 78};
78 79
79struct br2684_dev { 80struct br2684_dev {
@@ -181,18 +182,15 @@ static struct notifier_block atm_dev_notifier = {
181static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb) 182static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
182{ 183{
183 struct br2684_vcc *brvcc = BR2684_VCC(vcc); 184 struct br2684_vcc *brvcc = BR2684_VCC(vcc);
184 struct net_device *net_dev = skb->dev;
185 185
186 pr_debug("(vcc %p ; net_dev %p )\n", vcc, net_dev); 186 pr_debug("(vcc %p ; net_dev %p )\n", vcc, brvcc->device);
187 brvcc->old_pop(vcc, skb); 187 brvcc->old_pop(vcc, skb);
188 188
189 if (!net_dev) 189 /* If the queue space just went up from zero, wake */
190 return; 190 if (atomic_inc_return(&brvcc->qspace) == 1)
191 191 netif_wake_queue(brvcc->device);
192 if (atm_may_send(vcc, 0))
193 netif_wake_queue(net_dev);
194
195} 192}
193
196/* 194/*
197 * Send a packet out a particular vcc. Not to useful right now, but paves 195 * Send a packet out a particular vcc. Not to useful right now, but paves
198 * the way for multiple vcc's per itf. Returns true if we can send, 196 * the way for multiple vcc's per itf. Returns true if we can send,
@@ -256,16 +254,19 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
256 ATM_SKB(skb)->atm_options = atmvcc->atm_options; 254 ATM_SKB(skb)->atm_options = atmvcc->atm_options;
257 dev->stats.tx_packets++; 255 dev->stats.tx_packets++;
258 dev->stats.tx_bytes += skb->len; 256 dev->stats.tx_bytes += skb->len;
259 atmvcc->send(atmvcc, skb);
260 257
261 if (!atm_may_send(atmvcc, 0)) { 258 if (atomic_dec_return(&brvcc->qspace) < 1) {
259 /* No more please! */
262 netif_stop_queue(brvcc->device); 260 netif_stop_queue(brvcc->device);
263 /*check for race with br2684_pop*/ 261 /* We might have raced with br2684_pop() */
264 if (atm_may_send(atmvcc, 0)) 262 if (unlikely(atomic_read(&brvcc->qspace) > 0))
265 netif_start_queue(brvcc->device); 263 netif_wake_queue(brvcc->device);
266 } 264 }
267 265
268 return 1; 266 /* If this fails immediately, the skb will be freed and br2684_pop()
267 will wake the queue if appropriate. Just return an error so that
268 the stats are updated correctly */
269 return !atmvcc->send(atmvcc, skb);
269} 270}
270 271
271static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb, 272static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb,
@@ -504,6 +505,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
504 brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL); 505 brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL);
505 if (!brvcc) 506 if (!brvcc)
506 return -ENOMEM; 507 return -ENOMEM;
508 /*
509 * Allow two packets in the ATM queue. One actually being sent, and one
510 * for the ATM 'TX done' handler to send. It shouldn't take long to get
511 * the next one from the netdev queue, when we need it. More than that
512 * would be bufferbloat.
513 */
514 atomic_set(&brvcc->qspace, 2);
507 write_lock_irq(&devs_lock); 515 write_lock_irq(&devs_lock);
508 net_dev = br2684_find_dev(&be.ifspec); 516 net_dev = br2684_find_dev(&be.ifspec);
509 if (net_dev == NULL) { 517 if (net_dev == NULL) {