summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-05-17 06:53:50 -0400
committerIngo Molnar <mingo@kernel.org>2017-06-08 04:25:17 -0400
commit1ad3aaf3fcd2444406628a19a9b9e0922b95e2d4 (patch)
treee3db01782e505b85244eeb7e799e535b9c7b319a
parent9b01d43170aa70a435105f6413759e2ab7e00219 (diff)
sched/core: Implement new approach to scale select_idle_cpu()
Hackbench recently suffered a bunch of pain, first by commit: 4c77b18cf8b7 ("sched/fair: Make select_idle_cpu() more aggressive") and then by commit: c743f0a5c50f ("sched/fair, cpumask: Export for_each_cpu_wrap()") which fixed a bug in the initial for_each_cpu_wrap() implementation that made select_idle_cpu() even more expensive. The bug was that it would skip over CPUs when bits were consequtive in the bitmask. This however gave me an idea to fix select_idle_cpu(); where the old scheme was a cliff-edge throttle on idle scanning, this introduces a more gradual approach. Instead of stopping to scan entirely, we limit how many CPUs we scan. Initial benchmarks show that it mostly recovers hackbench while not hurting anything else, except Mason's schbench, but not as bad as the old thing. It also appears to recover the tbench high-end, which also suffered like hackbench. Tested-by: Matt Fleming <matt@codeblueprint.co.uk> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Chris Mason <clm@fb.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: hpa@zytor.com Cc: kitsunyan <kitsunyan@inbox.ru> Cc: linux-kernel@vger.kernel.org Cc: lvenanci@redhat.com Cc: riel@redhat.com Cc: xiaolong.ye@intel.com Link: http://lkml.kernel.org/r/20170517105350.hk5m4h4jb6dfr65a@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/fair.c21
-rw-r--r--kernel/sched/features.h1
2 files changed, 17 insertions, 5 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 47a0c552c77b..396bca9c7996 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5794,27 +5794,38 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
5794static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) 5794static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
5795{ 5795{
5796 struct sched_domain *this_sd; 5796 struct sched_domain *this_sd;
5797 u64 avg_cost, avg_idle = this_rq()->avg_idle; 5797 u64 avg_cost, avg_idle;
5798 u64 time, cost; 5798 u64 time, cost;
5799 s64 delta; 5799 s64 delta;
5800 int cpu; 5800 int cpu, nr = INT_MAX;
5801 5801
5802 this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); 5802 this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
5803 if (!this_sd) 5803 if (!this_sd)
5804 return -1; 5804 return -1;
5805 5805
5806 avg_cost = this_sd->avg_scan_cost;
5807
5808 /* 5806 /*
5809 * Due to large variance we need a large fuzz factor; hackbench in 5807 * Due to large variance we need a large fuzz factor; hackbench in
5810 * particularly is sensitive here. 5808 * particularly is sensitive here.
5811 */ 5809 */
5812 if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) 5810 avg_idle = this_rq()->avg_idle / 512;
5811 avg_cost = this_sd->avg_scan_cost + 1;
5812
5813 if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost)
5813 return -1; 5814 return -1;
5814 5815
5816 if (sched_feat(SIS_PROP)) {
5817 u64 span_avg = sd->span_weight * avg_idle;
5818 if (span_avg > 4*avg_cost)
5819 nr = div_u64(span_avg, avg_cost);
5820 else
5821 nr = 4;
5822 }
5823
5815 time = local_clock(); 5824 time = local_clock();
5816 5825
5817 for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { 5826 for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
5827 if (!--nr)
5828 return -1;
5818 if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) 5829 if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
5819 continue; 5830 continue;
5820 if (idle_cpu(cpu)) 5831 if (idle_cpu(cpu))
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index dc4d1483b038..d3fb15555291 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -55,6 +55,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
55 * When doing wakeups, attempt to limit superfluous scans of the LLC domain. 55 * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
56 */ 56 */
57SCHED_FEAT(SIS_AVG_CPU, false) 57SCHED_FEAT(SIS_AVG_CPU, false)
58SCHED_FEAT(SIS_PROP, true)
58 59
59/* 60/*
60 * Issue a WARN when we do multiple update_rq_clock() calls 61 * Issue a WARN when we do multiple update_rq_clock() calls