aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-09-20 21:01:30 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-26 00:26:48 -0400
commit4a8e320c929991c9480a7b936512c57ea02d87b2 (patch)
treedf1c9960aa28db003cdea1558246a3e873c2f055 /net/sched
parent9fb426a642a166730a8c916cb38c5461dbc28ffb (diff)
net: sched: use pinned timers
While using a MQ + NETEM setup, I had confirmation that the default timer migration ( /proc/sys/kernel/timer_migration ) is killing us. Installing this on a receiver side of a TCP_STREAM test, (NIC has 8 TX queues) : EST="est 1sec 4sec" for ETH in eth1 do tc qd del dev $ETH root 2>/dev/null tc qd add dev $ETH root handle 1: mq tc qd add dev $ETH parent 1:1 $EST netem limit 70000 delay 6ms tc qd add dev $ETH parent 1:2 $EST netem limit 70000 delay 8ms tc qd add dev $ETH parent 1:3 $EST netem limit 70000 delay 10ms tc qd add dev $ETH parent 1:4 $EST netem limit 70000 delay 12ms tc qd add dev $ETH parent 1:5 $EST netem limit 70000 delay 14ms tc qd add dev $ETH parent 1:6 $EST netem limit 70000 delay 16ms tc qd add dev $ETH parent 1:7 $EST netem limit 80000 delay 18ms tc qd add dev $ETH parent 1:8 $EST netem limit 90000 delay 20ms done We can see that timers get migrated into a single cpu, presumably idle at the time timers are set up. Then all qdisc dequeues run from this cpu and huge lock contention happens. This single cpu is stuck in softirq mode and cannot dequeue fast enough. 39.24% [kernel] [k] _raw_spin_lock 2.65% [kernel] [k] netem_enqueue 1.80% [kernel] [k] netem_dequeue 1.63% [kernel] [k] copy_user_enhanced_fast_string 1.45% [kernel] [k] _raw_spin_lock_bh By pinning qdisc timers on the cpu running the qdisc, we respect proper XPS setting and remove this lock contention. 5.84% [kernel] [k] netem_enqueue 4.83% [kernel] [k] _raw_spin_lock 2.92% [kernel] [k] copy_user_enhanced_fast_string Current Qdiscs that benefit from this change are : netem, cbq, fq, hfsc, tbf, htb. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/sch_api.c4
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_htb.c2
3 files changed, 5 insertions, 5 deletions
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ca6248345937..15e7beee266c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -586,7 +586,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
586 586
587void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 587void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
588{ 588{
589 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 589 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
590 wd->timer.function = qdisc_watchdog; 590 wd->timer.function = qdisc_watchdog;
591 wd->qdisc = qdisc; 591 wd->qdisc = qdisc;
592} 592}
@@ -602,7 +602,7 @@ void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
602 602
603 hrtimer_start(&wd->timer, 603 hrtimer_start(&wd->timer,
604 ns_to_ktime(expires), 604 ns_to_ktime(expires),
605 HRTIMER_MODE_ABS); 605 HRTIMER_MODE_ABS_PINNED);
606} 606}
607EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); 607EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
608 608
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index a3244a800501..d2cd981ba60d 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -617,7 +617,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
617 617
618 time = ktime_set(0, 0); 618 time = ktime_set(0, 0);
619 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay)); 619 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
620 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); 620 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
621 } 621 }
622 622
623 qdisc_unthrottled(sch); 623 qdisc_unthrottled(sch);
@@ -1386,7 +1386,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1386 q->link.minidle = -0x7FFFFFFF; 1386 q->link.minidle = -0x7FFFFFFF;
1387 1387
1388 qdisc_watchdog_init(&q->watchdog, sch); 1388 qdisc_watchdog_init(&q->watchdog, sch);
1389 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1389 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
1390 q->delay_timer.function = cbq_undelay; 1390 q->delay_timer.function = cbq_undelay;
1391 q->toplevel = TC_CBQ_MAXLEVEL; 1391 q->toplevel = TC_CBQ_MAXLEVEL;
1392 q->now = psched_get_time(); 1392 q->now = psched_get_time();
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 14408f262143..063e953d9848 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -932,7 +932,7 @@ ok:
932 ktime_t time = ns_to_ktime(next_event); 932 ktime_t time = ns_to_ktime(next_event);
933 qdisc_throttled(q->watchdog.qdisc); 933 qdisc_throttled(q->watchdog.qdisc);
934 hrtimer_start(&q->watchdog.timer, time, 934 hrtimer_start(&q->watchdog.timer, time,
935 HRTIMER_MODE_ABS); 935 HRTIMER_MODE_ABS_PINNED);
936 } 936 }
937 } else { 937 } else {
938 schedule_work(&q->work); 938 schedule_work(&q->work);