diff options
author | Eric Dumazet <edumazet@google.com> | 2016-06-06 12:37:16 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-07 19:37:14 -0400 |
commit | edb09eb17ed89eaa82a52dd306beac93e292b485 (patch) | |
tree | 1f241506d6b781b65d1033925f1c1ce6a39c3394 /net/sched/sch_hfsc.c | |
parent | f9eb8aea2a1e12fc2f584d1627deeb957435a801 (diff) |
net: sched: do not acquire qdisc spinlock in qdisc/class stats dump
Large tc dumps (tc -s {qdisc|class} sh dev ethX) done by Google BwE host
agent [1] are problematic at scale :
For each qdisc/class found in the dump, we currently lock the root qdisc
spinlock in order to get stats. Sampling stats every 5 seconds from
thousands of HTB classes is a challenge when the root qdisc spinlock is
under high pressure. Not only the dumps take time, they also slow
down the fast path (queue/dequeue packets) by 10 % to 20 % in some cases.
An audit of existing qdiscs showed that sch_fq_codel is the only qdisc
that might need the qdisc lock in fq_codel_dump_stats() and
fq_codel_dump_class_stats()
In v2 of this patch, I now use the Qdisc running seqcount to provide
consistent reads of packets/bytes counters, regardless of 32/64 bit arches.
I also changed rate estimators to use the same infrastructure
so that they no longer need to lock root qdisc lock.
[1]
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43838.pdf
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kevin Athey <kda@google.com>
Cc: Xiaotian Pei <xiaotian@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched/sch_hfsc.c')
-rw-r--r-- | net/sched/sch_hfsc.c | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7cc3348..74813dd49053 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c | |||
@@ -1015,11 +1015,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
1015 | cur_time = psched_get_time(); | 1015 | cur_time = psched_get_time(); |
1016 | 1016 | ||
1017 | if (tca[TCA_RATE]) { | 1017 | if (tca[TCA_RATE]) { |
1018 | spinlock_t *lock = qdisc_root_sleeping_lock(sch); | ||
1019 | |||
1020 | err = gen_replace_estimator(&cl->bstats, NULL, | 1018 | err = gen_replace_estimator(&cl->bstats, NULL, |
1021 | &cl->rate_est, | 1019 | &cl->rate_est, |
1022 | lock, | 1020 | NULL, |
1021 | qdisc_root_sleeping_running(sch), | ||
1023 | tca[TCA_RATE]); | 1022 | tca[TCA_RATE]); |
1024 | if (err) | 1023 | if (err) |
1025 | return err; | 1024 | return err; |
@@ -1068,7 +1067,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
1068 | 1067 | ||
1069 | if (tca[TCA_RATE]) { | 1068 | if (tca[TCA_RATE]) { |
1070 | err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, | 1069 | err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, |
1071 | qdisc_root_sleeping_lock(sch), | 1070 | NULL, |
1071 | qdisc_root_sleeping_running(sch), | ||
1072 | tca[TCA_RATE]); | 1072 | tca[TCA_RATE]); |
1073 | if (err) { | 1073 | if (err) { |
1074 | kfree(cl); | 1074 | kfree(cl); |
@@ -1373,7 +1373,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, | |||
1373 | xstats.work = cl->cl_total; | 1373 | xstats.work = cl->cl_total; |
1374 | xstats.rtwork = cl->cl_cumul; | 1374 | xstats.rtwork = cl->cl_cumul; |
1375 | 1375 | ||
1376 | if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || | 1376 | if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || |
1377 | gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || | 1377 | gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || |
1378 | gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) | 1378 | gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) |
1379 | return -1; | 1379 | return -1; |