diff options
author | John Fastabend <john.fastabend@gmail.com> | 2018-03-25 01:25:06 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-26 12:36:23 -0400 |
commit | eb82a994479245a79647d302f9b4eb8e7c9d7ca6 (patch) | |
tree | f2b46c8bc079ea5f443c9ecbd082e053b39c0f42 | |
parent | 743989254ea9f132517806d8893ca9b6cf9dc86b (diff) |
net: sched, fix OOO packets with pfifo_fast
After the qdisc lock was dropped in pfifo_fast we allow multiple
enqueue threads and dequeue threads to run in parallel. On the
enqueue side the skb bit ooo_okay is used to ensure all related
skbs are enqueued in-order. On the dequeue side though there is
no similar logic. What we observe is with fewer queues than CPUs
it is possible to re-order packets when two instances of
__qdisc_run() are running in parallel. Each thread will dequeue
a skb and then whichever thread calls the ndo op first will
be sent on the wire. This doesn't typically happen because
qdisc_run() is usually triggered by the same core that did the
enqueue. However, drivers will trigger __netif_schedule()
when queues are transitioning from stopped to awake using the
netif_tx_wake_* APIs. When this happens netif_schedule() calls
qdisc_run() on the same CPU that did the netif_tx_wake_* which
is usually done in the interrupt completion context. This CPU
is selected with the irq affinity which is unrelated to the
enqueue operations.
To resolve this we add a RUNNING bit to the qdisc to ensure
only a single dequeue per qdisc is running. Enqueue and dequeue
operations can still run in parallel and also on multi queue
NICs we can still have a dequeue in-flight per qdisc, which
is typically per CPU.
Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Reported-by: Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sch_generic.h | 1 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 17 |
2 files changed, 14 insertions, 4 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2092d33194dd..8da32678ce18 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h | |||
@@ -30,6 +30,7 @@ struct qdisc_rate_table { | |||
30 | enum qdisc_state_t { | 30 | enum qdisc_state_t { |
31 | __QDISC_STATE_SCHED, | 31 | __QDISC_STATE_SCHED, |
32 | __QDISC_STATE_DEACTIVATED, | 32 | __QDISC_STATE_DEACTIVATED, |
33 | __QDISC_STATE_RUNNING, | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | struct qdisc_size_table { | 36 | struct qdisc_size_table { |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7e3fbe9cc936..39c144b6ff98 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -373,24 +373,33 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, | |||
373 | */ | 373 | */ |
374 | static inline bool qdisc_restart(struct Qdisc *q, int *packets) | 374 | static inline bool qdisc_restart(struct Qdisc *q, int *packets) |
375 | { | 375 | { |
376 | bool more, validate, nolock = q->flags & TCQ_F_NOLOCK; | ||
376 | spinlock_t *root_lock = NULL; | 377 | spinlock_t *root_lock = NULL; |
377 | struct netdev_queue *txq; | 378 | struct netdev_queue *txq; |
378 | struct net_device *dev; | 379 | struct net_device *dev; |
379 | struct sk_buff *skb; | 380 | struct sk_buff *skb; |
380 | bool validate; | ||
381 | 381 | ||
382 | /* Dequeue packet */ | 382 | /* Dequeue packet */ |
383 | if (nolock && test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) | ||
384 | return false; | ||
385 | |||
383 | skb = dequeue_skb(q, &validate, packets); | 386 | skb = dequeue_skb(q, &validate, packets); |
384 | if (unlikely(!skb)) | 387 | if (unlikely(!skb)) { |
388 | if (nolock) | ||
389 | clear_bit(__QDISC_STATE_RUNNING, &q->state); | ||
385 | return false; | 390 | return false; |
391 | } | ||
386 | 392 | ||
387 | if (!(q->flags & TCQ_F_NOLOCK)) | 393 | if (!nolock) |
388 | root_lock = qdisc_lock(q); | 394 | root_lock = qdisc_lock(q); |
389 | 395 | ||
390 | dev = qdisc_dev(q); | 396 | dev = qdisc_dev(q); |
391 | txq = skb_get_tx_queue(dev, skb); | 397 | txq = skb_get_tx_queue(dev, skb); |
392 | 398 | ||
393 | return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); | 399 | more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate); |
400 | if (nolock) | ||
401 | clear_bit(__QDISC_STATE_RUNNING, &q->state); | ||
402 | return more; | ||
394 | } | 403 | } |
395 | 404 | ||
396 | void __qdisc_run(struct Qdisc *q) | 405 | void __qdisc_run(struct Qdisc *q) |