aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick Bellasi <patrick.bellasi@arm.com>2019-06-21 04:42:12 -0400
committerIngo Molnar <mingo@kernel.org>2019-06-24 13:23:49 -0400
commitaf24bde8df2029f067dc46aff0393c8f18ff6e2f (patch)
tree4e1d39e6059f35eb6164c1fb62332f7f9f3bbc39
parent9d20ad7dfc9a5cc64e33d725902d3863d350a66a (diff)
sched/uclamp: Add uclamp support to energy_compute()
The Energy Aware Scheduler (EAS) estimates the energy impact of waking up a task on a given CPU. This estimation is based on: a) an (active) power consumption defined for each CPU frequency b) an estimation of which frequency will be used on each CPU c) an estimation of the busy time (utilization) of each CPU Utilization clamping can affect both b) and c). A CPU is expected to run: - on an higher than required frequency, but for a shorter time, in case its estimated utilization will be smaller than the minimum utilization enforced by uclamp - on a smaller than required frequency, but for a longer time, in case its estimated utilization is bigger than the maximum utilization enforced by uclamp While compute_energy() already accounts clamping effects on busy time, the clamping effects on frequency selection are currently ignored. Fix it by considering how CPU clamp values will be affected by a task waking up and being RUNNABLE on that CPU. Do that by refactoring schedutil_freq_util() to take an additional task_struct* which allows EAS to evaluate the impact on clamp values of a task being eventually queued in a CPU. Clamp values are applied to the RT+CFS utilization only when a FREQUENCY_UTIL is required by compute_energy(). Do note that switching from ENERGY_UTIL to FREQUENCY_UTIL in the computation of the cpu_util signal implies that we are more likely to estimate the highest OPP when a RT task is running in another CPU of the same performance domain. This can have an impact on energy estimation but: - it's not easy to say which approach is better, since it depends on the use case - the original approach could still be obtained by setting a smaller task-specific util_min whenever required Since we are at that: - rename schedutil_freq_util() into schedutil_cpu_util(), since it's not only used for frequency selection. Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alessio Balsini <balsini@android.com> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Morten Rasmussen <morten.rasmussen@arm.com> Cc: Paul Turner <pjt@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Quentin Perret <quentin.perret@arm.com> Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com> Cc: Steve Muckle <smuckle@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Todd Kjos <tkjos@google.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Viresh Kumar <viresh.kumar@linaro.org> Link: https://lkml.kernel.org/r/20190621084217.8167-12-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/cpufreq_schedutil.c9
-rw-r--r--kernel/sched/fair.c40
-rw-r--r--kernel/sched/sched.h21
3 files changed, 48 insertions, 22 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d84e036a7536..636ca6f88c8e 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -196,8 +196,9 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
196 * based on the task model parameters and gives the minimal utilization 196 * based on the task model parameters and gives the minimal utilization
197 * required to meet deadlines. 197 * required to meet deadlines.
198 */ 198 */
199unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, 199unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
200 unsigned long max, enum schedutil_type type) 200 unsigned long max, enum schedutil_type type,
201 struct task_struct *p)
201{ 202{
202 unsigned long dl_util, util, irq; 203 unsigned long dl_util, util, irq;
203 struct rq *rq = cpu_rq(cpu); 204 struct rq *rq = cpu_rq(cpu);
@@ -230,7 +231,7 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
230 */ 231 */
231 util = util_cfs + cpu_util_rt(rq); 232 util = util_cfs + cpu_util_rt(rq);
232 if (type == FREQUENCY_UTIL) 233 if (type == FREQUENCY_UTIL)
233 util = uclamp_util(rq, util); 234 util = uclamp_util_with(rq, util, p);
234 235
235 dl_util = cpu_util_dl(rq); 236 dl_util = cpu_util_dl(rq);
236 237
@@ -290,7 +291,7 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
290 sg_cpu->max = max; 291 sg_cpu->max = max;
291 sg_cpu->bw_dl = cpu_bw_dl(rq); 292 sg_cpu->bw_dl = cpu_bw_dl(rq);
292 293
293 return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL); 294 return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
294} 295}
295 296
296/** 297/**
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 28db7ce5c3a6..b798fe7ff7cd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6231,11 +6231,21 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
6231static long 6231static long
6232compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) 6232compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
6233{ 6233{
6234 long util, max_util, sum_util, energy = 0; 6234 unsigned int max_util, util_cfs, cpu_util, cpu_cap;
6235 unsigned long sum_util, energy = 0;
6236 struct task_struct *tsk;
6235 int cpu; 6237 int cpu;
6236 6238
6237 for (; pd; pd = pd->next) { 6239 for (; pd; pd = pd->next) {
6240 struct cpumask *pd_mask = perf_domain_span(pd);
6241
6242 /*
6243 * The energy model mandates all the CPUs of a performance
6244 * domain have the same capacity.
6245 */
6246 cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
6238 max_util = sum_util = 0; 6247 max_util = sum_util = 0;
6248
6239 /* 6249 /*
6240 * The capacity state of CPUs of the current rd can be driven by 6250 * The capacity state of CPUs of the current rd can be driven by
6241 * CPUs of another rd if they belong to the same performance 6251 * CPUs of another rd if they belong to the same performance
@@ -6246,11 +6256,29 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
6246 * it will not appear in its pd list and will not be accounted 6256 * it will not appear in its pd list and will not be accounted
6247 * by compute_energy(). 6257 * by compute_energy().
6248 */ 6258 */
6249 for_each_cpu_and(cpu, perf_domain_span(pd), cpu_online_mask) { 6259 for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
6250 util = cpu_util_next(cpu, p, dst_cpu); 6260 util_cfs = cpu_util_next(cpu, p, dst_cpu);
6251 util = schedutil_energy_util(cpu, util); 6261
6252 max_util = max(util, max_util); 6262 /*
6253 sum_util += util; 6263 * Busy time computation: utilization clamping is not
6264 * required since the ratio (sum_util / cpu_capacity)
6265 * is already enough to scale the EM reported power
6266 * consumption at the (eventually clamped) cpu_capacity.
6267 */
6268 sum_util += schedutil_cpu_util(cpu, util_cfs, cpu_cap,
6269 ENERGY_UTIL, NULL);
6270
6271 /*
6272 * Performance domain frequency: utilization clamping
6273 * must be considered since it affects the selection
6274 * of the performance domain frequency.
6275 * NOTE: in case RT tasks are running, by default the
6276 * FREQUENCY_UTIL's utilization can be max OPP.
6277 */
6278 tsk = cpu == dst_cpu ? p : NULL;
6279 cpu_util = schedutil_cpu_util(cpu, util_cfs, cpu_cap,
6280 FREQUENCY_UTIL, tsk);
6281 max_util = max(max_util, cpu_util);
6254 } 6282 }
6255 6283
6256 energy += em_pd_energy(pd->em_pd, max_util, sum_util); 6284 energy += em_pd_energy(pd->em_pd, max_util, sum_util);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1783f6b4c2e0..802b1f3405f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2322,7 +2322,6 @@ static inline unsigned long capacity_orig_of(int cpu)
2322} 2322}
2323#endif 2323#endif
2324 2324
2325#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
2326/** 2325/**
2327 * enum schedutil_type - CPU utilization type 2326 * enum schedutil_type - CPU utilization type
2328 * @FREQUENCY_UTIL: Utilization used to select frequency 2327 * @FREQUENCY_UTIL: Utilization used to select frequency
@@ -2338,15 +2337,11 @@ enum schedutil_type {
2338 ENERGY_UTIL, 2337 ENERGY_UTIL,
2339}; 2338};
2340 2339
2341unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, 2340#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
2342 unsigned long max, enum schedutil_type type);
2343 2341
2344static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs) 2342unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
2345{ 2343 unsigned long max, enum schedutil_type type,
2346 unsigned long max = arch_scale_cpu_capacity(cpu); 2344 struct task_struct *p);
2347
2348 return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL);
2349}
2350 2345
2351static inline unsigned long cpu_bw_dl(struct rq *rq) 2346static inline unsigned long cpu_bw_dl(struct rq *rq)
2352{ 2347{
@@ -2375,11 +2370,13 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
2375 return READ_ONCE(rq->avg_rt.util_avg); 2370 return READ_ONCE(rq->avg_rt.util_avg);
2376} 2371}
2377#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ 2372#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
2378static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs) 2373static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
2374 unsigned long max, enum schedutil_type type,
2375 struct task_struct *p)
2379{ 2376{
2380 return cfs; 2377 return 0;
2381} 2378}
2382#endif 2379#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
2383 2380
2384#ifdef CONFIG_HAVE_SCHED_AVG_IRQ 2381#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
2385static inline unsigned long cpu_util_irq(struct rq *rq) 2382static inline unsigned long cpu_util_irq(struct rq *rq)