aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGautham R Shenoy <ego@in.ibm.com>2008-12-18 12:56:09 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-19 03:21:46 -0500
commitafb8a9b70b86866a60e08b2956ae4e1406390336 (patch)
treee4f8918230f4a4eb696c32edb888bced183001db
parent716707b29906e1d8d190defe3d646610b097a861 (diff)
sched: framework for sched_mc/smt_power_savings=N
Impact: extend range of /sys/devices/system/cpu/sched_mc_power_savings Currently the sched_mc/smt_power_savings variable is a boolean, which either enables or disables topology based power savings. This patch extends the behaviour of the variable from boolean to multivalued, such that based on the value, we decide how aggressively do we want to perform powersavings balance at appropriate sched domain based on topology. Variable levels of power saving tunable would benefit end user to match the required level of power savings vs performance trade-off depending on the system configuration and workloads. This version makes the sched_mc_power_savings global variable to take more values (0,1,2). Later versions can have a single tunable called sched_power_savings instead of sched_{mc,smt}_power_savings. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h11
-rw-r--r--kernel/sched.c17
2 files changed, 25 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1210fb0e45ff..a96726658eca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -763,6 +763,17 @@ enum cpu_idle_type {
763#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ 763#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
764#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ 764#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
765 765
766enum powersavings_balance_level {
767 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
768 POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package
769 * first for long running threads
770 */
771 POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle
772 * cpu package for power savings
773 */
774 MAX_POWERSAVINGS_BALANCE_LEVELS
775};
776
766extern int sched_mc_power_savings, sched_smt_power_savings; 777extern int sched_mc_power_savings, sched_smt_power_savings;
767 778
768static inline int sd_balance_for_mc_power(void) 779static inline int sd_balance_for_mc_power(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index b309027bf9e8..56b285cd5350 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7906,14 +7906,25 @@ int arch_reinit_sched_domains(void)
7906static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7906static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7907{ 7907{
7908 int ret; 7908 int ret;
7909 unsigned int level = 0;
7909 7910
7910 if (buf[0] != '0' && buf[0] != '1') 7911 if (sscanf(buf, "%u", &level) != 1)
7912 return -EINVAL;
7913
7914 /*
7915 * level is always be positive so don't check for
7916 * level < POWERSAVINGS_BALANCE_NONE which is 0
7917 * What happens on 0 or 1 byte write,
7918 * need to check for count as well?
7919 */
7920
7921 if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
7911 return -EINVAL; 7922 return -EINVAL;
7912 7923
7913 if (smt) 7924 if (smt)
7914 sched_smt_power_savings = (buf[0] == '1'); 7925 sched_smt_power_savings = level;
7915 else 7926 else
7916 sched_mc_power_savings = (buf[0] == '1'); 7927 sched_mc_power_savings = level;
7917 7928
7918 ret = arch_reinit_sched_domains(); 7929 ret = arch_reinit_sched_domains();
7919 7930