aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2018-03-25 01:25:06 -0400
committerDavid S. Miller <davem@davemloft.net>2018-03-26 12:36:23 -0400
commiteb82a994479245a79647d302f9b4eb8e7c9d7ca6 (patch)
treef2b46c8bc079ea5f443c9ecbd082e053b39c0f42
parent743989254ea9f132517806d8893ca9b6cf9dc86b (diff)
net: sched, fix OOO packets with pfifo_fast
After the qdisc lock was dropped in pfifo_fast we allow multiple enqueue threads and dequeue threads to run in parallel. On the enqueue side the skb bit ooo_okay is used to ensure all related skbs are enqueued in-order. On the dequeue side though there is no similar logic. What we observe is with fewer queues than CPUs it is possible to re-order packets when two instances of __qdisc_run() are running in parallel. Each thread will dequeue a skb and then whichever thread calls the ndo op first will be sent on the wire. This doesn't typically happen because qdisc_run() is usually triggered by the same core that did the enqueue. However, drivers will trigger __netif_schedule() when queues are transitioning from stopped to awake using the netif_tx_wake_* APIs. When this happens netif_schedule() calls qdisc_run() on the same CPU that did the netif_tx_wake_* which is usually done in the interrupt completion context. This CPU is selected with the irq affinity which is unrelated to the enqueue operations. To resolve this we add a RUNNING bit to the qdisc to ensure only a single dequeue per qdisc is running. Enqueue and dequeue operations can still run in parallel and also on multi queue NICs we can still have a dequeue in-flight per qdisc, which is typically per CPU. Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array") Reported-by: Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com> Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sch_generic.h1
-rw-r--r--net/sched/sch_generic.c17
2 files changed, 14 insertions, 4 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2092d33194dd..8da32678ce18 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -30,6 +30,7 @@ struct qdisc_rate_table {
30enum qdisc_state_t { 30enum qdisc_state_t {
31 __QDISC_STATE_SCHED, 31 __QDISC_STATE_SCHED,
32 __QDISC_STATE_DEACTIVATED, 32 __QDISC_STATE_DEACTIVATED,
33 __QDISC_STATE_RUNNING,
33}; 34};
34 35
35struct qdisc_size_table { 36struct qdisc_size_table {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7e3fbe9cc936..39c144b6ff98 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -373,24 +373,33 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
373 */ 373 */
374static inline bool qdisc_restart(struct Qdisc *q, int *packets) 374static inline bool qdisc_restart(struct Qdisc *q, int *packets)
375{ 375{
376 bool more, validate, nolock = q->flags & TCQ_F_NOLOCK;
376 spinlock_t *root_lock = NULL; 377 spinlock_t *root_lock = NULL;
377 struct netdev_queue *txq; 378 struct netdev_queue *txq;
378 struct net_device *dev; 379 struct net_device *dev;
379 struct sk_buff *skb; 380 struct sk_buff *skb;
380 bool validate;
381 381
382 /* Dequeue packet */ 382 /* Dequeue packet */
383 if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
384 return false;
385
383 skb = dequeue_skb(q, &validate, packets); 386 skb = dequeue_skb(q, &validate, packets);
384 if (unlikely(!skb)) 387 if (unlikely(!skb)) {
388 if (nolock)
389 clear_bit(__QDISC_STATE_RUNNING, &q->state);
385 return false; 390 return false;
391 }
386 392
387 if (!(q->flags & TCQ_F_NOLOCK)) 393 if (!nolock)
388 root_lock = qdisc_lock(q); 394 root_lock = qdisc_lock(q);
389 395
390 dev = qdisc_dev(q); 396 dev = qdisc_dev(q);
391 txq = skb_get_tx_queue(dev, skb); 397 txq = skb_get_tx_queue(dev, skb);
392 398
393 return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); 399 more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
400 if (nolock)
401 clear_bit(__QDISC_STATE_RUNNING, &q->state);
402 return more;
394} 403}
395 404
396void __qdisc_run(struct Qdisc *q) 405void __qdisc_run(struct Qdisc *q)