summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorQuentin Perret <quentin.perret@arm.com>2018-12-03 04:56:21 -0500
committerIngo Molnar <mingo@kernel.org>2018-12-11 09:17:00 -0500
commit531b5c9f5cd05ead53324f419b32685a22eebe8b (patch)
tree4611a0ddcdbec5c7dbaab6428c129d77b474c51c /kernel/sched
parentb68a4c0dba3b1e1dda1ede49f3c2fc72d3b54567 (diff)
sched/topology: Make Energy Aware Scheduling depend on schedutil
Energy Aware Scheduling (EAS) is designed with the assumption that frequencies of CPUs follow their utilization value. When using a CPUFreq governor other than schedutil, the chances of this assumption being true are small, if any. When schedutil is being used, EAS' predictions are at least consistent with the frequency requests. Although those requests have no guarantees to be honored by the hardware, they should at least guide DVFS in the right direction and provide some hope in regards to the EAS model being accurate. To make sure EAS is only used in a sane configuration, create a strong dependency on schedutil being used. Since having sugov compiled-in does not provide that guarantee, make CPUFreq call a scheduler function on governor changes hence letting it rebuild the scheduling domains, check the governors of the online CPUs, and enable/disable EAS accordingly. Signed-off-by: Quentin Perret <quentin.perret@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: adharmap@codeaurora.org Cc: chris.redpath@arm.com Cc: currojerez@riseup.net Cc: dietmar.eggemann@arm.com Cc: edubezval@gmail.com Cc: gregkh@linuxfoundation.org Cc: javi.merino@kernel.org Cc: joel@joelfernandes.org Cc: juri.lelli@redhat.com Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: pkondeti@codeaurora.org Cc: skannan@codeaurora.org Cc: smuckle@google.com Cc: srinivas.pandruvada@linux.intel.com Cc: thara.gopinath@linaro.org Cc: tkjos@google.com Cc: valentin.schneider@arm.com Cc: vincent.guittot@linaro.org Cc: viresh.kumar@linaro.org Link: https://lkml.kernel.org/r/20181203095628.11858-9-quentin.perret@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/cpufreq_schedutil.c37
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/sched/topology.c28
3 files changed, 60 insertions, 9 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 90128be27712..c2e53d1a3143 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -624,7 +624,7 @@ static struct kobj_type sugov_tunables_ktype = {
624 624
625/********************** cpufreq governor interface *********************/ 625/********************** cpufreq governor interface *********************/
626 626
627static struct cpufreq_governor schedutil_gov; 627struct cpufreq_governor schedutil_gov;
628 628
629static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) 629static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
630{ 630{
@@ -883,7 +883,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
883 sg_policy->need_freq_update = true; 883 sg_policy->need_freq_update = true;
884} 884}
885 885
886static struct cpufreq_governor schedutil_gov = { 886struct cpufreq_governor schedutil_gov = {
887 .name = "schedutil", 887 .name = "schedutil",
888 .owner = THIS_MODULE, 888 .owner = THIS_MODULE,
889 .dynamic_switching = true, 889 .dynamic_switching = true,
@@ -906,3 +906,36 @@ static int __init sugov_register(void)
906 return cpufreq_register_governor(&schedutil_gov); 906 return cpufreq_register_governor(&schedutil_gov);
907} 907}
908fs_initcall(sugov_register); 908fs_initcall(sugov_register);
909
910#ifdef CONFIG_ENERGY_MODEL
911extern bool sched_energy_update;
912extern struct mutex sched_energy_mutex;
913
914static void rebuild_sd_workfn(struct work_struct *work)
915{
916 mutex_lock(&sched_energy_mutex);
917 sched_energy_update = true;
918 rebuild_sched_domains();
919 sched_energy_update = false;
920 mutex_unlock(&sched_energy_mutex);
921}
922static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
923
924/*
925 * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
926 * on governor changes to make sure the scheduler knows about it.
927 */
928void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
929 struct cpufreq_governor *old_gov)
930{
931 if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
932 /*
933 * When called from the cpufreq_register_driver() path, the
934 * cpu_hotplug_lock is already held, so use a work item to
935 * avoid nested locking in rebuild_sched_domains().
936 */
937 schedule_work(&rebuild_sd_work);
938 }
939
940}
941#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 75c403674706..fd84900b0b21 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2291,10 +2291,8 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
2291} 2291}
2292#endif 2292#endif
2293 2293
2294#ifdef CONFIG_SMP 2294#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
2295#ifdef CONFIG_ENERGY_MODEL
2296#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus))) 2295#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
2297#else 2296#else
2298#define perf_domain_span(pd) NULL 2297#define perf_domain_span(pd) NULL
2299#endif 2298#endif
2300#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 6ddb804b2dec..0a5a1d3a4eae 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -201,7 +201,10 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
201 return 1; 201 return 1;
202} 202}
203 203
204#ifdef CONFIG_ENERGY_MODEL 204#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
205DEFINE_MUTEX(sched_energy_mutex);
206bool sched_energy_update;
207
205static void free_pd(struct perf_domain *pd) 208static void free_pd(struct perf_domain *pd)
206{ 209{
207 struct perf_domain *tmp; 210 struct perf_domain *tmp;
@@ -275,6 +278,7 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
275 * 1. an Energy Model (EM) is available; 278 * 1. an Energy Model (EM) is available;
276 * 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy. 279 * 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy.
277 * 3. the EM complexity is low enough to keep scheduling overheads low; 280 * 3. the EM complexity is low enough to keep scheduling overheads low;
281 * 4. schedutil is driving the frequency of all CPUs of the rd;
278 * 282 *
279 * The complexity of the Energy Model is defined as: 283 * The complexity of the Energy Model is defined as:
280 * 284 *
@@ -294,12 +298,15 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
294 */ 298 */
295#define EM_MAX_COMPLEXITY 2048 299#define EM_MAX_COMPLEXITY 2048
296 300
301extern struct cpufreq_governor schedutil_gov;
297static void build_perf_domains(const struct cpumask *cpu_map) 302static void build_perf_domains(const struct cpumask *cpu_map)
298{ 303{
299 int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map); 304 int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
300 struct perf_domain *pd = NULL, *tmp; 305 struct perf_domain *pd = NULL, *tmp;
301 int cpu = cpumask_first(cpu_map); 306 int cpu = cpumask_first(cpu_map);
302 struct root_domain *rd = cpu_rq(cpu)->rd; 307 struct root_domain *rd = cpu_rq(cpu)->rd;
308 struct cpufreq_policy *policy;
309 struct cpufreq_governor *gov;
303 310
304 /* EAS is enabled for asymmetric CPU capacity topologies. */ 311 /* EAS is enabled for asymmetric CPU capacity topologies. */
305 if (!per_cpu(sd_asym_cpucapacity, cpu)) { 312 if (!per_cpu(sd_asym_cpucapacity, cpu)) {
@@ -315,6 +322,19 @@ static void build_perf_domains(const struct cpumask *cpu_map)
315 if (find_pd(pd, i)) 322 if (find_pd(pd, i))
316 continue; 323 continue;
317 324
325 /* Do not attempt EAS if schedutil is not being used. */
326 policy = cpufreq_cpu_get(i);
327 if (!policy)
328 goto free;
329 gov = policy->governor;
330 cpufreq_cpu_put(policy);
331 if (gov != &schedutil_gov) {
332 if (rd->pd)
333 pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
334 cpumask_pr_args(cpu_map));
335 goto free;
336 }
337
318 /* Create the new pd and add it to the local list. */ 338 /* Create the new pd and add it to the local list. */
319 tmp = pd_init(i); 339 tmp = pd_init(i);
320 if (!tmp) 340 if (!tmp)
@@ -356,7 +376,7 @@ free:
356} 376}
357#else 377#else
358static void free_pd(struct perf_domain *pd) { } 378static void free_pd(struct perf_domain *pd) { }
359#endif /* CONFIG_ENERGY_MODEL */ 379#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/
360 380
361static void free_rootdomain(struct rcu_head *rcu) 381static void free_rootdomain(struct rcu_head *rcu)
362{ 382{
@@ -2152,10 +2172,10 @@ match2:
2152 ; 2172 ;
2153 } 2173 }
2154 2174
2155#ifdef CONFIG_ENERGY_MODEL 2175#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
2156 /* Build perf. domains: */ 2176 /* Build perf. domains: */
2157 for (i = 0; i < ndoms_new; i++) { 2177 for (i = 0; i < ndoms_new; i++) {
2158 for (j = 0; j < n; j++) { 2178 for (j = 0; j < n && !sched_energy_update; j++) {
2159 if (cpumask_equal(doms_new[i], doms_cur[j]) && 2179 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2160 cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) 2180 cpu_rq(cpumask_first(doms_cur[j]))->rd->pd)
2161 goto match3; 2181 goto match3;