aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-02-28 15:55:43 -0500
committerDavid S. Miller <davem@davemloft.net>2019-03-02 17:10:18 -0500
commit46b1c18f9deb326a7e18348e668e4c7ab7c7458b (patch)
tree9ab8b4996f340482b8bdcf0a0f9ac11f79117520
parente7c42a89e9f16039684418dfe3c43b068734ff8f (diff)
net: sched: put back q.qlen into a single location
In the series fc8b81a5981f ("Merge branch 'lockless-qdisc-series'") John made the assumption that the data path had no need to read the qdisc qlen (number of packets in the qdisc). It is true when pfifo_fast is used as the root qdisc, or as direct MQ/MQPRIO children. But pfifo_fast can be used as leaf in class full qdiscs, and existing logic needs to access the child qlen in an efficient way. HTB breaks badly, since it uses cl->leaf.q->q.qlen in : htb_activate() -> WARN_ON() htb_dequeue_tree() to decide if a class can be htb_deactivated when it has no more packets. HFSC, DRR, CBQ, QFQ have similar issues, and some calls to qdisc_tree_reduce_backlog() also read q.qlen directly. Using qdisc_qlen_sum() (which iterates over all possible cpus) in the data path is a non starter. It seems we have to put back qlen in a central location, at least for stable kernels. For all qdisc but pfifo_fast, qlen is guarded by the qdisc lock, so the existing q.qlen{++|--} are correct. For 'lockless' qdisc (pfifo_fast so far), we need to use atomic_{inc|dec}() because the spinlock might be not held (for example from pfifo_fast_enqueue() and pfifo_fast_dequeue()) This patch adds atomic_qlen (in the same location than qlen) and renames the following helpers, since we want to express they can be used without qdisc lock, and that qlen is no longer percpu. - qdisc_qstats_cpu_qlen_dec -> qdisc_qstats_atomic_qlen_dec() - qdisc_qstats_cpu_qlen_inc -> qdisc_qstats_atomic_qlen_inc() Later (net-next) we might revert this patch by tracking all these qlen uses and replace them by a more efficient method (not having to access a precise qlen, but an empty/non_empty status that might be less expensive to maintain/track). Another possibility is to have a legacy pfifo_fast version that would be used when used a a child qdisc, since the parent qdisc needs a spinlock anyway. But then, future lockless qdiscs would also have the same problem. Fixes: 7e66016f2c65 ("net: sched: helpers to sum qlen and qlen for per cpu logic") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: John Fastabend <john.fastabend@gmail.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Cong Wang <xiyou.wangcong@gmail.com> Cc: Jiri Pirko <jiri@resnulli.us> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sch_generic.h31
-rw-r--r--net/core/gen_stats.c2
-rw-r--r--net/sched/sch_generic.c13
3 files changed, 19 insertions, 27 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9481f2c142e2..e7eb4aa6ccc9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -51,7 +51,10 @@ struct qdisc_size_table {
51struct qdisc_skb_head { 51struct qdisc_skb_head {
52 struct sk_buff *head; 52 struct sk_buff *head;
53 struct sk_buff *tail; 53 struct sk_buff *tail;
54 __u32 qlen; 54 union {
55 u32 qlen;
56 atomic_t atomic_qlen;
57 };
55 spinlock_t lock; 58 spinlock_t lock;
56}; 59};
57 60
@@ -408,27 +411,19 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
408 BUILD_BUG_ON(sizeof(qcb->data) < sz); 411 BUILD_BUG_ON(sizeof(qcb->data) < sz);
409} 412}
410 413
411static inline int qdisc_qlen_cpu(const struct Qdisc *q)
412{
413 return this_cpu_ptr(q->cpu_qstats)->qlen;
414}
415
416static inline int qdisc_qlen(const struct Qdisc *q) 414static inline int qdisc_qlen(const struct Qdisc *q)
417{ 415{
418 return q->q.qlen; 416 return q->q.qlen;
419} 417}
420 418
421static inline int qdisc_qlen_sum(const struct Qdisc *q) 419static inline u32 qdisc_qlen_sum(const struct Qdisc *q)
422{ 420{
423 __u32 qlen = q->qstats.qlen; 421 u32 qlen = q->qstats.qlen;
424 int i;
425 422
426 if (q->flags & TCQ_F_NOLOCK) { 423 if (q->flags & TCQ_F_NOLOCK)
427 for_each_possible_cpu(i) 424 qlen += atomic_read(&q->q.atomic_qlen);
428 qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; 425 else
429 } else {
430 qlen += q->q.qlen; 426 qlen += q->q.qlen;
431 }
432 427
433 return qlen; 428 return qlen;
434} 429}
@@ -825,14 +820,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
825 this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); 820 this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
826} 821}
827 822
828static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) 823static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch)
829{ 824{
830 this_cpu_inc(sch->cpu_qstats->qlen); 825 atomic_inc(&sch->q.atomic_qlen);
831} 826}
832 827
833static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) 828static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch)
834{ 829{
835 this_cpu_dec(sch->cpu_qstats->qlen); 830 atomic_dec(&sch->q.atomic_qlen);
836} 831}
837 832
838static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) 833static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 9bf1b9ad1780..ac679f74ba47 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -291,7 +291,6 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
291 for_each_possible_cpu(i) { 291 for_each_possible_cpu(i) {
292 const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); 292 const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
293 293
294 qstats->qlen = 0;
295 qstats->backlog += qcpu->backlog; 294 qstats->backlog += qcpu->backlog;
296 qstats->drops += qcpu->drops; 295 qstats->drops += qcpu->drops;
297 qstats->requeues += qcpu->requeues; 296 qstats->requeues += qcpu->requeues;
@@ -307,7 +306,6 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
307 if (cpu) { 306 if (cpu) {
308 __gnet_stats_copy_queue_cpu(qstats, cpu); 307 __gnet_stats_copy_queue_cpu(qstats, cpu);
309 } else { 308 } else {
310 qstats->qlen = q->qlen;
311 qstats->backlog = q->backlog; 309 qstats->backlog = q->backlog;
312 qstats->drops = q->drops; 310 qstats->drops = q->drops;
313 qstats->requeues = q->requeues; 311 qstats->requeues = q->requeues;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 968a85fe4d4a..de31f2f3b973 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
68 skb = __skb_dequeue(&q->skb_bad_txq); 68 skb = __skb_dequeue(&q->skb_bad_txq);
69 if (qdisc_is_percpu_stats(q)) { 69 if (qdisc_is_percpu_stats(q)) {
70 qdisc_qstats_cpu_backlog_dec(q, skb); 70 qdisc_qstats_cpu_backlog_dec(q, skb);
71 qdisc_qstats_cpu_qlen_dec(q); 71 qdisc_qstats_atomic_qlen_dec(q);
72 } else { 72 } else {
73 qdisc_qstats_backlog_dec(q, skb); 73 qdisc_qstats_backlog_dec(q, skb);
74 q->q.qlen--; 74 q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
108 108
109 if (qdisc_is_percpu_stats(q)) { 109 if (qdisc_is_percpu_stats(q)) {
110 qdisc_qstats_cpu_backlog_inc(q, skb); 110 qdisc_qstats_cpu_backlog_inc(q, skb);
111 qdisc_qstats_cpu_qlen_inc(q); 111 qdisc_qstats_atomic_qlen_inc(q);
112 } else { 112 } else {
113 qdisc_qstats_backlog_inc(q, skb); 113 qdisc_qstats_backlog_inc(q, skb);
114 q->q.qlen++; 114 q->q.qlen++;
@@ -147,7 +147,7 @@ static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
147 147
148 qdisc_qstats_cpu_requeues_inc(q); 148 qdisc_qstats_cpu_requeues_inc(q);
149 qdisc_qstats_cpu_backlog_inc(q, skb); 149 qdisc_qstats_cpu_backlog_inc(q, skb);
150 qdisc_qstats_cpu_qlen_inc(q); 150 qdisc_qstats_atomic_qlen_inc(q);
151 151
152 skb = next; 152 skb = next;
153 } 153 }
@@ -252,7 +252,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
252 skb = __skb_dequeue(&q->gso_skb); 252 skb = __skb_dequeue(&q->gso_skb);
253 if (qdisc_is_percpu_stats(q)) { 253 if (qdisc_is_percpu_stats(q)) {
254 qdisc_qstats_cpu_backlog_dec(q, skb); 254 qdisc_qstats_cpu_backlog_dec(q, skb);
255 qdisc_qstats_cpu_qlen_dec(q); 255 qdisc_qstats_atomic_qlen_dec(q);
256 } else { 256 } else {
257 qdisc_qstats_backlog_dec(q, skb); 257 qdisc_qstats_backlog_dec(q, skb);
258 q->q.qlen--; 258 q->q.qlen--;
@@ -645,7 +645,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
645 if (unlikely(err)) 645 if (unlikely(err))
646 return qdisc_drop_cpu(skb, qdisc, to_free); 646 return qdisc_drop_cpu(skb, qdisc, to_free);
647 647
648 qdisc_qstats_cpu_qlen_inc(qdisc); 648 qdisc_qstats_atomic_qlen_inc(qdisc);
649 /* Note: skb can not be used after skb_array_produce(), 649 /* Note: skb can not be used after skb_array_produce(),
650 * so we better not use qdisc_qstats_cpu_backlog_inc() 650 * so we better not use qdisc_qstats_cpu_backlog_inc()
651 */ 651 */
@@ -670,7 +670,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
670 if (likely(skb)) { 670 if (likely(skb)) {
671 qdisc_qstats_cpu_backlog_dec(qdisc, skb); 671 qdisc_qstats_cpu_backlog_dec(qdisc, skb);
672 qdisc_bstats_cpu_update(qdisc, skb); 672 qdisc_bstats_cpu_update(qdisc, skb);
673 qdisc_qstats_cpu_qlen_dec(qdisc); 673 qdisc_qstats_atomic_qlen_dec(qdisc);
674 } 674 }
675 675
676 return skb; 676 return skb;
@@ -714,7 +714,6 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
714 struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); 714 struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
715 715
716 q->backlog = 0; 716 q->backlog = 0;
717 q->qlen = 0;
718 } 717 }
719} 718}
720 719