sched/uclamp: Add uclamp support to energy_compute()

The Energy Aware Scheduler (EAS) estimates the energy impact of waking up a task on a given CPU. This estimation is based on: a) an (active) power consumption defined for each CPU frequency b) an estimation of which frequency will be used on each CPU c) an estimation of the busy time (utilization) of each CPU Utilization clamping can affect both b) and c). A CPU is expected to run: - on an higher than required frequency, but for a shorter time, in case its estimated utilization will be smaller than the minimum utilization enforced by uclamp - on a smaller than required frequency, but for a longer time, in case its estimated utilization is bigger than the maximum utilization enforced by uclamp While compute_energy() already accounts clamping effects on busy time, the clamping effects on frequency selection are currently ignored. Fix it by considering how CPU clamp values will be affected by a task waking up and being RUNNABLE on that CPU. Do that by refactoring schedutil_freq_util() to take an additional task_struct* which allows EAS to evaluate the impact on clamp values of a task being eventually queued in a CPU. Clamp values are applied to the RT+CFS utilization only when a FREQUENCY_UTIL is required by compute_energy(). Do note that switching from ENERGY_UTIL to FREQUENCY_UTIL in the computation of the cpu_util signal implies that we are more likely to estimate the highest OPP when a RT task is running in another CPU of the same performance domain. This can have an impact on energy estimation but: - it's not easy to say which approach is better, since it depends on the use case - the original approach could still be obtained by setting a smaller task-specific util_min whenever required Since we are at that: - rename schedutil_freq_util() into schedutil_cpu_util(), since it's not only used for frequency selection. Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alessio Balsini <balsini@android.com> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Morten Rasmussen <morten.rasmussen@arm.com> Cc: Paul Turner <pjt@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Quentin Perret <quentin.perret@arm.com> Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com> Cc: Steve Muckle <smuckle@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Todd Kjos <tkjos@google.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Viresh Kumar <viresh.kumar@linaro.org> Link: https://lkml.kernel.org/r/20190621084217.8167-12-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Patrick Bellasi <patrick.bellasi@arm.com> 2019-06-21 04:42:12 -0400
committer: Ingo Molnar <mingo@kernel.org> 2019-06-24 13:23:49 -0400
commit: af24bde8df2029f067dc46aff0393c8f18ff6e2f (patch)
tree: 4e1d39e6059f35eb6164c1fb62332f7f9f3bbc39
parent: 9d20ad7dfc9a5cc64e33d725902d3863d350a66a (diff)
3 files changed, 48 insertions, 22 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d84e036a7536..636ca6f88c8e 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -196,8 +196,9 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 * based on the task model parameters and gives the minimal utilization
 * required to meet deadlines.
 */
-unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
-                                  unsigned long max, enum schedutil_type type)
+                                 unsigned long max, enum schedutil_type type,
+                                 struct task_struct *p)
 {
        unsigned long dl_util, util, irq;
        struct rq *rq = cpu_rq(cpu);
@@ -230,7 +231,7 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
         */
        util = util_cfs + cpu_util_rt(rq);
        if (type == FREQUENCY_UTIL)
-                util = uclamp_util(rq, util);
+                util = uclamp_util_with(rq, util, p);
        dl_util = cpu_util_dl(rq);
@@ -290,7 +291,7 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
        sg_cpu->max = max;
        sg_cpu->bw_dl = cpu_bw_dl(rq);
-        return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL);
+        return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
 }
 /**
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 28db7ce5c3a6..b798fe7ff7cd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6231,11 +6231,21 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
 static long
 compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 {
-        long util, max_util, sum_util, energy = 0;
+        unsigned int max_util, util_cfs, cpu_util, cpu_cap;
+        unsigned long sum_util, energy = 0;
+        struct task_struct *tsk;
        int cpu;
        for (; pd; pd = pd->next) {
+                struct cpumask *pd_mask = perf_domain_span(pd);
+                /*
+                 * The energy model mandates all the CPUs of a performance
+                 * domain have the same capacity.
+                 */
+                cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
                max_util = sum_util = 0;
                /*
                 * The capacity state of CPUs of the current rd can be driven by
                 * CPUs of another rd if they belong to the same performance
@@ -6246,11 +6256,29 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
                 * it will not appear in its pd list and will not be accounted
                 * by compute_energy().
                 */
-                for_each_cpu_and(cpu, perf_domain_span(pd), cpu_online_mask) {
+                for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
-                        util = cpu_util_next(cpu, p, dst_cpu);
+                        util_cfs = cpu_util_next(cpu, p, dst_cpu);
-                        util = schedutil_energy_util(cpu, util);
-                        max_util = max(util, max_util);
+                        /*
-                        sum_util += util;
+                         * Busy time computation: utilization clamping is not
+                         * required since the ratio (sum_util / cpu_capacity)
+                         * is already enough to scale the EM reported power
+                         * consumption at the (eventually clamped) cpu_capacity.
+                         */
+                        sum_util += schedutil_cpu_util(cpu, util_cfs, cpu_cap,
+                                                       ENERGY_UTIL, NULL);
+                        /*
+                         * Performance domain frequency: utilization clamping
+                         * must be considered since it affects the selection
+                         * of the performance domain frequency.
+                         * NOTE: in case RT tasks are running, by default the
+                         * FREQUENCY_UTIL's utilization can be max OPP.
+                         */
+                        tsk = cpu == dst_cpu ? p : NULL;
+                        cpu_util = schedutil_cpu_util(cpu, util_cfs, cpu_cap,
+                                                      FREQUENCY_UTIL, tsk);
+                        max_util = max(max_util, cpu_util);
                }
                energy += em_pd_energy(pd->em_pd, max_util, sum_util);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1783f6b4c2e0..802b1f3405f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2322,7 +2322,6 @@ static inline unsigned long capacity_orig_of(int cpu)
 }
 #endif
-#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
 /**
 * enum schedutil_type - CPU utilization type
 * @FREQUENCY_UTIL:     Utilization used to select frequency
@@ -2338,15 +2337,11 @@ enum schedutil_type {
        ENERGY_UTIL,
 };
-unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
-                                  unsigned long max, enum schedutil_type type);
-static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)
+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
-{
+                                 unsigned long max, enum schedutil_type type,
-        unsigned long max = arch_scale_cpu_capacity(cpu);
+                                 struct task_struct *p);
-        return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL);
-}
 static inline unsigned long cpu_bw_dl(struct rq *rq)
 {
@@ -2375,11 +2370,13 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
        return READ_ONCE(rq->avg_rt.util_avg);
 }
 #else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
-static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)
+static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
+                                 unsigned long max, enum schedutil_type type,
+                                 struct task_struct *p)
 {
-        return cfs;
+        return 0;
 }
-#endif
+#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 static inline unsigned long cpu_util_irq(struct rq *rq)
author	Patrick Bellasi <patrick.bellasi@arm.com>	2019-06-21 04:42:12 -0400
committer	Ingo Molnar <mingo@kernel.org>	2019-06-24 13:23:49 -0400
commit	af24bde8df2029f067dc46aff0393c8f18ff6e2f (patch)
tree	4e1d39e6059f35eb6164c1fb62332f7f9f3bbc39
parent	9d20ad7dfc9a5cc64e33d725902d3863d350a66a (diff)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d84e036a7536..636ca6f88c8e 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c
@@ -196,8 +196,9 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
196	* based on the task model parameters and gives the minimal utilization	196	* based on the task model parameters and gives the minimal utilization
197	* required to meet deadlines.	197	* required to meet deadlines.
198	*/	198	*/
199	unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,	199	unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
200	unsigned long max, enum schedutil_type type)	200	unsigned long max, enum schedutil_type type,
		201	struct task_struct *p)
201	{	202	{
202	unsigned long dl_util, util, irq;	203	unsigned long dl_util, util, irq;
203	struct rq *rq = cpu_rq(cpu);	204	struct rq *rq = cpu_rq(cpu);
@@ -230,7 +231,7 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
230	*/	231	*/
231	util = util_cfs + cpu_util_rt(rq);	232	util = util_cfs + cpu_util_rt(rq);
232	if (type == FREQUENCY_UTIL)	233	if (type == FREQUENCY_UTIL)
233	util = uclamp_util(rq, util);	234	util = uclamp_util_with(rq, util, p);
234		235
235	dl_util = cpu_util_dl(rq);	236	dl_util = cpu_util_dl(rq);
236		237
@@ -290,7 +291,7 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
290	sg_cpu->max = max;	291	sg_cpu->max = max;
291	sg_cpu->bw_dl = cpu_bw_dl(rq);	292	sg_cpu->bw_dl = cpu_bw_dl(rq);
292		293
293	return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL);	294	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
294	}	295	}
295		296
296	/**	297	/**


diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 28db7ce5c3a6..b798fe7ff7cd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -6231,11 +6231,21 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
6231	static long	6231	static long
6232	compute_energy(struct task_struct p, int dst_cpu, struct perf_domain pd)	6232	compute_energy(struct task_struct p, int dst_cpu, struct perf_domain pd)
6233	{	6233	{
6234	long util, max_util, sum_util, energy = 0;	6234	unsigned int max_util, util_cfs, cpu_util, cpu_cap;
		6235	unsigned long sum_util, energy = 0;
		6236	struct task_struct *tsk;
6235	int cpu;	6237	int cpu;
6236		6238
6237	for (; pd; pd = pd->next) {	6239	for (; pd; pd = pd->next) {
		6240	struct cpumask *pd_mask = perf_domain_span(pd);
		6241
		6242	/*
		6243	* The energy model mandates all the CPUs of a performance
		6244	* domain have the same capacity.
		6245	*/
		6246	cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
6238	max_util = sum_util = 0;	6247	max_util = sum_util = 0;
		6248
6239	/*	6249	/*
6240	* The capacity state of CPUs of the current rd can be driven by	6250	* The capacity state of CPUs of the current rd can be driven by
6241	* CPUs of another rd if they belong to the same performance	6251	* CPUs of another rd if they belong to the same performance
@@ -6246,11 +6256,29 @@ compute_energy(struct task_struct p, int dst_cpu, struct perf_domain pd)
6246	* it will not appear in its pd list and will not be accounted	6256	* it will not appear in its pd list and will not be accounted
6247	* by compute_energy().	6257	* by compute_energy().
6248	*/	6258	*/
6249	for_each_cpu_and(cpu, perf_domain_span(pd), cpu_online_mask) {	6259	for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
6250	util = cpu_util_next(cpu, p, dst_cpu);	6260	util_cfs = cpu_util_next(cpu, p, dst_cpu);
6251	util = schedutil_energy_util(cpu, util);	6261
6252	max_util = max(util, max_util);	6262	/*
6253	sum_util += util;	6263	* Busy time computation: utilization clamping is not
		6264	* required since the ratio (sum_util / cpu_capacity)
		6265	* is already enough to scale the EM reported power
		6266	* consumption at the (eventually clamped) cpu_capacity.
		6267	*/
		6268	sum_util += schedutil_cpu_util(cpu, util_cfs, cpu_cap,
		6269	ENERGY_UTIL, NULL);
		6270
		6271	/*
		6272	* Performance domain frequency: utilization clamping
		6273	* must be considered since it affects the selection
		6274	* of the performance domain frequency.
		6275	* NOTE: in case RT tasks are running, by default the
		6276	* FREQUENCY_UTIL's utilization can be max OPP.
		6277	*/
		6278	tsk = cpu == dst_cpu ? p : NULL;
		6279	cpu_util = schedutil_cpu_util(cpu, util_cfs, cpu_cap,
		6280	FREQUENCY_UTIL, tsk);
		6281	max_util = max(max_util, cpu_util);
6254	}	6282	}
6255		6283
6256	energy += em_pd_energy(pd->em_pd, max_util, sum_util);	6284	energy += em_pd_energy(pd->em_pd, max_util, sum_util);


diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1783f6b4c2e0..802b1f3405f2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -2322,7 +2322,6 @@ static inline unsigned long capacity_orig_of(int cpu)
2322	}	2322	}
2323	#endif	2323	#endif
2324		2324
2325	#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
2326	/**	2325	/**
2327	* enum schedutil_type - CPU utilization type	2326	* enum schedutil_type - CPU utilization type
2328	* @FREQUENCY_UTIL: Utilization used to select frequency	2327	* @FREQUENCY_UTIL: Utilization used to select frequency
@@ -2338,15 +2337,11 @@ enum schedutil_type {
2338	ENERGY_UTIL,	2337	ENERGY_UTIL,
2339	};	2338	};
2340		2339
2341	unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,	2340	#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
2342	unsigned long max, enum schedutil_type type);
2343		2341
2344	static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)	2342	unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
2345	{	2343	unsigned long max, enum schedutil_type type,
2346	unsigned long max = arch_scale_cpu_capacity(cpu);	2344	struct task_struct *p);
2347
2348	return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL);
2349	}
2350		2345
2351	static inline unsigned long cpu_bw_dl(struct rq *rq)	2346	static inline unsigned long cpu_bw_dl(struct rq *rq)
2352	{	2347	{
@@ -2375,11 +2370,13 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
2375	return READ_ONCE(rq->avg_rt.util_avg);	2370	return READ_ONCE(rq->avg_rt.util_avg);
2376	}	2371	}
2377	#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */	2372	#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
2378	static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)	2373	static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
		2374	unsigned long max, enum schedutil_type type,
		2375	struct task_struct *p)
2379	{	2376	{
2380	return cfs;	2377	return 0;
2381	}	2378	}
2382	#endif	2379	#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
2383		2380
2384	#ifdef CONFIG_HAVE_SCHED_AVG_IRQ	2381	#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
2385	static inline unsigned long cpu_util_irq(struct rq *rq)	2382	static inline unsigned long cpu_util_irq(struct rq *rq)