aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu25
-rw-r--r--Documentation/scheduler/sched-domains.txt4
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--drivers/base/cpu.c4
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/sched.h47
-rw-r--r--include/linux/topology.h5
-rw-r--r--kernel/sched/core.c94
-rw-r--r--kernel/sched/fair.c275
-rw-r--r--tools/power/cpupower/man/cpupower-set.19
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.c35
11 files changed, 5 insertions, 498 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index e7be75b96e4b..5dab36448b44 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -9,31 +9,6 @@ Description:
9 9
10 /sys/devices/system/cpu/cpu#/ 10 /sys/devices/system/cpu/cpu#/
11 11
12What: /sys/devices/system/cpu/sched_mc_power_savings
13 /sys/devices/system/cpu/sched_smt_power_savings
14Date: June 2006
15Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
16Description: Discover and adjust the kernel's multi-core scheduler support.
17
18 Possible values are:
19
20 0 - No power saving load balance (default value)
21 1 - Fill one thread/core/package first for long running threads
22 2 - Also bias task wakeups to semi-idle cpu package for power
23 savings
24
25 sched_mc_power_savings is dependent upon SCHED_MC, which is
26 itself architecture dependent.
27
28 sched_smt_power_savings is dependent upon SCHED_SMT, which
29 is itself architecture dependent.
30
31 The two files are independent of each other. It is possible
32 that one file may be present without the other.
33
34 Introduced by git commit 5c45bf27.
35
36
37What: /sys/devices/system/cpu/kernel_max 12What: /sys/devices/system/cpu/kernel_max
38 /sys/devices/system/cpu/offline 13 /sys/devices/system/cpu/offline
39 /sys/devices/system/cpu/online 14 /sys/devices/system/cpu/online
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt
index b7ee379b651b..443f0c76bab4 100644
--- a/Documentation/scheduler/sched-domains.txt
+++ b/Documentation/scheduler/sched-domains.txt
@@ -61,10 +61,6 @@ The implementor should read comments in include/linux/sched.h:
61struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of 61struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
62the specifics and what to tune. 62the specifics and what to tune.
63 63
64For SMT, the architecture must define CONFIG_SCHED_SMT and provide a
65cpumask_t cpu_sibling_map[NR_CPUS], where cpu_sibling_map[i] is the mask of
66all "i"'s siblings as well as "i" itself.
67
68Architectures may retain the regular override the default SD_*_INIT flags 64Architectures may retain the regular override the default SD_*_INIT flags
69while using the generic domain builder in kernel/sched.c if they wish to 65while using the generic domain builder in kernel/sched.c if they wish to
70retain the traditional SMT->SMP->NUMA topology (or some subset of that). This 66retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e84c1bbea339..256c20cc5e96 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -429,8 +429,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
429 * For perf, we return last level cache shared map. 429 * For perf, we return last level cache shared map.
430 * And for power savings, we return cpu_core_map 430 * And for power savings, we return cpu_core_map
431 */ 431 */
432 if ((sched_mc_power_savings || sched_smt_power_savings) && 432 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
433 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
434 return cpu_core_mask(cpu); 433 return cpu_core_mask(cpu);
435 else 434 else
436 return cpu_llc_shared_mask(cpu); 435 return cpu_llc_shared_mask(cpu);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index adf937bf4091..63452943abd1 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -330,8 +330,4 @@ void __init cpu_dev_init(void)
330 panic("Failed to register CPU subsystem"); 330 panic("Failed to register CPU subsystem");
331 331
332 cpu_dev_register_generic(); 332 cpu_dev_register_generic();
333
334#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
335 sched_create_sysfs_power_savings_entries(cpu_subsys.dev_root);
336#endif
337} 333}
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ee28844ae68e..7230bb59a06f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -36,8 +36,6 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
36extern int cpu_add_dev_attr_group(struct attribute_group *attrs); 36extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
37extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); 37extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
38 38
39extern int sched_create_sysfs_power_savings_entries(struct device *dev);
40
41#ifdef CONFIG_HOTPLUG_CPU 39#ifdef CONFIG_HOTPLUG_CPU
42extern void unregister_cpu(struct cpu *cpu); 40extern void unregister_cpu(struct cpu *cpu);
43extern ssize_t arch_cpu_probe(const char *, size_t); 41extern ssize_t arch_cpu_probe(const char *, size_t);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a559bf0622f..3d644809c9db 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -855,61 +855,14 @@ enum cpu_idle_type {
855#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ 855#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
856#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ 856#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */
857#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ 857#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
858#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
859#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ 858#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
860#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ 859#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
861#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ 860#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
862#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ 861#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
863#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ 862#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
864 863
865enum powersavings_balance_level {
866 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
867 POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package
868 * first for long running threads
869 */
870 POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle
871 * cpu package for power savings
872 */
873 MAX_POWERSAVINGS_BALANCE_LEVELS
874};
875
876extern int sched_mc_power_savings, sched_smt_power_savings;
877
878static inline int sd_balance_for_mc_power(void)
879{
880 if (sched_smt_power_savings)
881 return SD_POWERSAVINGS_BALANCE;
882
883 if (!sched_mc_power_savings)
884 return SD_PREFER_SIBLING;
885
886 return 0;
887}
888
889static inline int sd_balance_for_package_power(void)
890{
891 if (sched_mc_power_savings | sched_smt_power_savings)
892 return SD_POWERSAVINGS_BALANCE;
893
894 return SD_PREFER_SIBLING;
895}
896
897extern int __weak arch_sd_sibiling_asym_packing(void); 864extern int __weak arch_sd_sibiling_asym_packing(void);
898 865
899/*
900 * Optimise SD flags for power savings:
901 * SD_BALANCE_NEWIDLE helps aggressive task consolidation and power savings.
902 * Keep default SD flags if sched_{smt,mc}_power_saving=0
903 */
904
905static inline int sd_power_saving_flags(void)
906{
907 if (sched_mc_power_savings | sched_smt_power_savings)
908 return SD_BALANCE_NEWIDLE;
909
910 return 0;
911}
912
913struct sched_group_power { 866struct sched_group_power {
914 atomic_t ref; 867 atomic_t ref;
915 /* 868 /*
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 4f59bf36f0af..09558d1daacd 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -98,7 +98,6 @@ int arch_update_cpu_topology(void);
98 | 0*SD_BALANCE_WAKE \ 98 | 0*SD_BALANCE_WAKE \
99 | 1*SD_WAKE_AFFINE \ 99 | 1*SD_WAKE_AFFINE \
100 | 1*SD_SHARE_CPUPOWER \ 100 | 1*SD_SHARE_CPUPOWER \
101 | 0*SD_POWERSAVINGS_BALANCE \
102 | 1*SD_SHARE_PKG_RESOURCES \ 101 | 1*SD_SHARE_PKG_RESOURCES \
103 | 0*SD_SERIALIZE \ 102 | 0*SD_SERIALIZE \
104 | 0*SD_PREFER_SIBLING \ 103 | 0*SD_PREFER_SIBLING \
@@ -134,8 +133,6 @@ int arch_update_cpu_topology(void);
134 | 0*SD_SHARE_CPUPOWER \ 133 | 0*SD_SHARE_CPUPOWER \
135 | 1*SD_SHARE_PKG_RESOURCES \ 134 | 1*SD_SHARE_PKG_RESOURCES \
136 | 0*SD_SERIALIZE \ 135 | 0*SD_SERIALIZE \
137 | sd_balance_for_mc_power() \
138 | sd_power_saving_flags() \
139 , \ 136 , \
140 .last_balance = jiffies, \ 137 .last_balance = jiffies, \
141 .balance_interval = 1, \ 138 .balance_interval = 1, \
@@ -167,8 +164,6 @@ int arch_update_cpu_topology(void);
167 | 0*SD_SHARE_CPUPOWER \ 164 | 0*SD_SHARE_CPUPOWER \
168 | 0*SD_SHARE_PKG_RESOURCES \ 165 | 0*SD_SHARE_PKG_RESOURCES \
169 | 0*SD_SERIALIZE \ 166 | 0*SD_SERIALIZE \
170 | sd_balance_for_package_power() \
171 | sd_power_saving_flags() \
172 , \ 167 , \
173 .last_balance = jiffies, \ 168 .last_balance = jiffies, \
174 .balance_interval = 1, \ 169 .balance_interval = 1, \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bd314d7cd9f8..24ca677b5457 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5929,8 +5929,6 @@ static const struct cpumask *cpu_cpu_mask(int cpu)
5929 return cpumask_of_node(cpu_to_node(cpu)); 5929 return cpumask_of_node(cpu_to_node(cpu));
5930} 5930}
5931 5931
5932int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
5933
5934struct sd_data { 5932struct sd_data {
5935 struct sched_domain **__percpu sd; 5933 struct sched_domain **__percpu sd;
5936 struct sched_group **__percpu sg; 5934 struct sched_group **__percpu sg;
@@ -6322,7 +6320,6 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
6322 | 0*SD_WAKE_AFFINE 6320 | 0*SD_WAKE_AFFINE
6323 | 0*SD_PREFER_LOCAL 6321 | 0*SD_PREFER_LOCAL
6324 | 0*SD_SHARE_CPUPOWER 6322 | 0*SD_SHARE_CPUPOWER
6325 | 0*SD_POWERSAVINGS_BALANCE
6326 | 0*SD_SHARE_PKG_RESOURCES 6323 | 0*SD_SHARE_PKG_RESOURCES
6327 | 1*SD_SERIALIZE 6324 | 1*SD_SERIALIZE
6328 | 0*SD_PREFER_SIBLING 6325 | 0*SD_PREFER_SIBLING
@@ -6819,97 +6816,6 @@ match2:
6819 mutex_unlock(&sched_domains_mutex); 6816 mutex_unlock(&sched_domains_mutex);
6820} 6817}
6821 6818
6822#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
6823static void reinit_sched_domains(void)
6824{
6825 get_online_cpus();
6826
6827 /* Destroy domains first to force the rebuild */
6828 partition_sched_domains(0, NULL, NULL);
6829
6830 rebuild_sched_domains();
6831 put_online_cpus();
6832}
6833
6834static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
6835{
6836 unsigned int level = 0;
6837
6838 if (sscanf(buf, "%u", &level) != 1)
6839 return -EINVAL;
6840
6841 /*
6842 * level is always be positive so don't check for
6843 * level < POWERSAVINGS_BALANCE_NONE which is 0
6844 * What happens on 0 or 1 byte write,
6845 * need to check for count as well?
6846 */
6847
6848 if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
6849 return -EINVAL;
6850
6851 if (smt)
6852 sched_smt_power_savings = level;
6853 else
6854 sched_mc_power_savings = level;
6855
6856 reinit_sched_domains();
6857
6858 return count;
6859}
6860
6861#ifdef CONFIG_SCHED_MC
6862static ssize_t sched_mc_power_savings_show(struct device *dev,
6863 struct device_attribute *attr,
6864 char *buf)
6865{
6866 return sprintf(buf, "%u\n", sched_mc_power_savings);
6867}
6868static ssize_t sched_mc_power_savings_store(struct device *dev,
6869 struct device_attribute *attr,
6870 const char *buf, size_t count)
6871{
6872 return sched_power_savings_store(buf, count, 0);
6873}
6874static DEVICE_ATTR(sched_mc_power_savings, 0644,
6875 sched_mc_power_savings_show,
6876 sched_mc_power_savings_store);
6877#endif
6878
6879#ifdef CONFIG_SCHED_SMT
6880static ssize_t sched_smt_power_savings_show(struct device *dev,
6881 struct device_attribute *attr,
6882 char *buf)
6883{
6884 return sprintf(buf, "%u\n", sched_smt_power_savings);
6885}
6886static ssize_t sched_smt_power_savings_store(struct device *dev,
6887 struct device_attribute *attr,
6888 const char *buf, size_t count)
6889{
6890 return sched_power_savings_store(buf, count, 1);
6891}
6892static DEVICE_ATTR(sched_smt_power_savings, 0644,
6893 sched_smt_power_savings_show,
6894 sched_smt_power_savings_store);
6895#endif
6896
6897int __init sched_create_sysfs_power_savings_entries(struct device *dev)
6898{
6899 int err = 0;
6900
6901#ifdef CONFIG_SCHED_SMT
6902 if (smt_capable())
6903 err = device_create_file(dev, &dev_attr_sched_smt_power_savings);
6904#endif
6905#ifdef CONFIG_SCHED_MC
6906 if (!err && mc_capable())
6907 err = device_create_file(dev, &dev_attr_sched_mc_power_savings);
6908#endif
6909 return err;
6910}
6911#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
6912
6913/* 6819/*
6914 * Update cpusets according to cpu_active mask. If cpusets are 6820 * Update cpusets according to cpu_active mask. If cpusets are
6915 * disabled, cpuset_update_active_cpus() becomes a simple wrapper 6821 * disabled, cpuset_update_active_cpus() becomes a simple wrapper
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0b42f4487329..940e6d17cf96 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2721,7 +2721,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2721 * If power savings logic is enabled for a domain, see if we 2721 * If power savings logic is enabled for a domain, see if we
2722 * are not overloaded, if so, don't balance wider. 2722 * are not overloaded, if so, don't balance wider.
2723 */ 2723 */
2724 if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { 2724 if (tmp->flags & (SD_PREFER_LOCAL)) {
2725 unsigned long power = 0; 2725 unsigned long power = 0;
2726 unsigned long nr_running = 0; 2726 unsigned long nr_running = 0;
2727 unsigned long capacity; 2727 unsigned long capacity;
@@ -2734,9 +2734,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2734 2734
2735 capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); 2735 capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
2736 2736
2737 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
2738 nr_running /= 2;
2739
2740 if (nr_running < capacity) 2737 if (nr_running < capacity)
2741 want_sd = 0; 2738 want_sd = 0;
2742 } 2739 }
@@ -3435,14 +3432,6 @@ struct sd_lb_stats {
3435 unsigned int busiest_group_weight; 3432 unsigned int busiest_group_weight;
3436 3433
3437 int group_imb; /* Is there imbalance in this sd */ 3434 int group_imb; /* Is there imbalance in this sd */
3438#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
3439 int power_savings_balance; /* Is powersave balance needed for this sd */
3440 struct sched_group *group_min; /* Least loaded group in sd */
3441 struct sched_group *group_leader; /* Group which relieves group_min */
3442 unsigned long min_load_per_task; /* load_per_task in group_min */
3443 unsigned long leader_nr_running; /* Nr running of group_leader */
3444 unsigned long min_nr_running; /* Nr running of group_min */
3445#endif
3446}; 3435};
3447 3436
3448/* 3437/*
@@ -3486,147 +3475,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
3486 return load_idx; 3475 return load_idx;
3487} 3476}
3488 3477
3489
3490#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
3491/**
3492 * init_sd_power_savings_stats - Initialize power savings statistics for
3493 * the given sched_domain, during load balancing.
3494 *
3495 * @sd: Sched domain whose power-savings statistics are to be initialized.
3496 * @sds: Variable containing the statistics for sd.
3497 * @idle: Idle status of the CPU at which we're performing load-balancing.
3498 */
3499static inline void init_sd_power_savings_stats(struct sched_domain *sd,
3500 struct sd_lb_stats *sds, enum cpu_idle_type idle)
3501{
3502 /*
3503 * Busy processors will not participate in power savings
3504 * balance.
3505 */
3506 if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
3507 sds->power_savings_balance = 0;
3508 else {
3509 sds->power_savings_balance = 1;
3510 sds->min_nr_running = ULONG_MAX;
3511 sds->leader_nr_running = 0;
3512 }
3513}
3514
3515/**
3516 * update_sd_power_savings_stats - Update the power saving stats for a
3517 * sched_domain while performing load balancing.
3518 *
3519 * @group: sched_group belonging to the sched_domain under consideration.
3520 * @sds: Variable containing the statistics of the sched_domain
3521 * @local_group: Does group contain the CPU for which we're performing
3522 * load balancing ?
3523 * @sgs: Variable containing the statistics of the group.
3524 */
3525static inline void update_sd_power_savings_stats(struct sched_group *group,
3526 struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
3527{
3528
3529 if (!sds->power_savings_balance)
3530 return;
3531
3532 /*
3533 * If the local group is idle or completely loaded
3534 * no need to do power savings balance at this domain
3535 */
3536 if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
3537 !sds->this_nr_running))
3538 sds->power_savings_balance = 0;
3539
3540 /*
3541 * If a group is already running at full capacity or idle,
3542 * don't include that group in power savings calculations
3543 */
3544 if (!sds->power_savings_balance ||
3545 sgs->sum_nr_running >= sgs->group_capacity ||
3546 !sgs->sum_nr_running)
3547 return;
3548
3549 /*
3550 * Calculate the group which has the least non-idle load.
3551 * This is the group from where we need to pick up the load
3552 * for saving power
3553 */
3554 if ((sgs->sum_nr_running < sds->min_nr_running) ||
3555 (sgs->sum_nr_running == sds->min_nr_running &&
3556 group_first_cpu(group) > group_first_cpu(sds->group_min))) {
3557 sds->group_min = group;
3558 sds->min_nr_running = sgs->sum_nr_running;
3559 sds->min_load_per_task = sgs->sum_weighted_load /
3560 sgs->sum_nr_running;
3561 }
3562
3563 /*
3564 * Calculate the group which is almost near its
3565 * capacity but still has some space to pick up some load
3566 * from other group and save more power
3567 */
3568 if (sgs->sum_nr_running + 1 > sgs->group_capacity)
3569 return;
3570
3571 if (sgs->sum_nr_running > sds->leader_nr_running ||
3572 (sgs->sum_nr_running == sds->leader_nr_running &&
3573 group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
3574 sds->group_leader = group;
3575 sds->leader_nr_running = sgs->sum_nr_running;
3576 }
3577}
3578
3579/**
3580 * check_power_save_busiest_group - see if there is potential for some power-savings balance
3581 * @env: load balance environment
3582 * @sds: Variable containing the statistics of the sched_domain
3583 * under consideration.
3584 *
3585 * Description:
3586 * Check if we have potential to perform some power-savings balance.
3587 * If yes, set the busiest group to be the least loaded group in the
3588 * sched_domain, so that it's CPUs can be put to idle.
3589 *
3590 * Returns 1 if there is potential to perform power-savings balance.
3591 * Else returns 0.
3592 */
3593static inline
3594int check_power_save_busiest_group(struct lb_env *env, struct sd_lb_stats *sds)
3595{
3596 if (!sds->power_savings_balance)
3597 return 0;
3598
3599 if (sds->this != sds->group_leader ||
3600 sds->group_leader == sds->group_min)
3601 return 0;
3602
3603 env->imbalance = sds->min_load_per_task;
3604 sds->busiest = sds->group_min;
3605
3606 return 1;
3607
3608}
3609#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
3610static inline void init_sd_power_savings_stats(struct sched_domain *sd,
3611 struct sd_lb_stats *sds, enum cpu_idle_type idle)
3612{
3613 return;
3614}
3615
3616static inline void update_sd_power_savings_stats(struct sched_group *group,
3617 struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
3618{
3619 return;
3620}
3621
3622static inline
3623int check_power_save_busiest_group(struct lb_env *env, struct sd_lb_stats *sds)
3624{
3625 return 0;
3626}
3627#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
3628
3629
3630unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) 3478unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
3631{ 3479{
3632 return SCHED_POWER_SCALE; 3480 return SCHED_POWER_SCALE;
@@ -3932,7 +3780,6 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3932 if (child && child->flags & SD_PREFER_SIBLING) 3780 if (child && child->flags & SD_PREFER_SIBLING)
3933 prefer_sibling = 1; 3781 prefer_sibling = 1;
3934 3782
3935 init_sd_power_savings_stats(env->sd, sds, env->idle);
3936 load_idx = get_sd_load_idx(env->sd, env->idle); 3783 load_idx = get_sd_load_idx(env->sd, env->idle);
3937 3784
3938 do { 3785 do {
@@ -3981,7 +3828,6 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3981 sds->group_imb = sgs.group_imb; 3828 sds->group_imb = sgs.group_imb;
3982 } 3829 }
3983 3830
3984 update_sd_power_savings_stats(sg, sds, local_group, &sgs);
3985 sg = sg->next; 3831 sg = sg->next;
3986 } while (sg != env->sd->groups); 3832 } while (sg != env->sd->groups);
3987} 3833}
@@ -4276,12 +4122,6 @@ force_balance:
4276 return sds.busiest; 4122 return sds.busiest;
4277 4123
4278out_balanced: 4124out_balanced:
4279 /*
4280 * There is no obvious imbalance. But check if we can do some balancing
4281 * to save power.
4282 */
4283 if (check_power_save_busiest_group(env, &sds))
4284 return sds.busiest;
4285ret: 4125ret:
4286 env->imbalance = 0; 4126 env->imbalance = 0;
4287 return NULL; 4127 return NULL;
@@ -4359,28 +4199,6 @@ static int need_active_balance(struct lb_env *env)
4359 */ 4199 */
4360 if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu) 4200 if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu)
4361 return 1; 4201 return 1;
4362
4363 /*
4364 * The only task running in a non-idle cpu can be moved to this
4365 * cpu in an attempt to completely freeup the other CPU
4366 * package.
4367 *
4368 * The package power saving logic comes from
4369 * find_busiest_group(). If there are no imbalance, then
4370 * f_b_g() will return NULL. However when sched_mc={1,2} then
4371 * f_b_g() will select a group from which a running task may be
4372 * pulled to this cpu in order to make the other package idle.
4373 * If there is no opportunity to make a package idle and if
4374 * there are no imbalance, then f_b_g() will return NULL and no
4375 * action will be taken in load_balance_newidle().
4376 *
4377 * Under normal task pull operation due to imbalance, there
4378 * will be more than one task in the source run queue and
4379 * move_tasks() will succeed. ld_moved will be true and this
4380 * active balance code will not be triggered.
4381 */
4382 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
4383 return 0;
4384 } 4202 }
4385 4203
4386 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); 4204 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
@@ -4700,104 +4518,15 @@ static struct {
4700 unsigned long next_balance; /* in jiffy units */ 4518 unsigned long next_balance; /* in jiffy units */
4701} nohz ____cacheline_aligned; 4519} nohz ____cacheline_aligned;
4702 4520
4703#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 4521static inline int find_new_ilb(int call_cpu)
4704/**
4705 * lowest_flag_domain - Return lowest sched_domain containing flag.
4706 * @cpu: The cpu whose lowest level of sched domain is to
4707 * be returned.
4708 * @flag: The flag to check for the lowest sched_domain
4709 * for the given cpu.
4710 *
4711 * Returns the lowest sched_domain of a cpu which contains the given flag.
4712 */
4713static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
4714{
4715 struct sched_domain *sd;
4716
4717 for_each_domain(cpu, sd)
4718 if (sd->flags & flag)
4719 break;
4720
4721 return sd;
4722}
4723
4724/**
4725 * for_each_flag_domain - Iterates over sched_domains containing the flag.
4726 * @cpu: The cpu whose domains we're iterating over.
4727 * @sd: variable holding the value of the power_savings_sd
4728 * for cpu.
4729 * @flag: The flag to filter the sched_domains to be iterated.
4730 *
4731 * Iterates over all the scheduler domains for a given cpu that has the 'flag'
4732 * set, starting from the lowest sched_domain to the highest.
4733 */
4734#define for_each_flag_domain(cpu, sd, flag) \
4735 for (sd = lowest_flag_domain(cpu, flag); \
4736 (sd && (sd->flags & flag)); sd = sd->parent)
4737
4738/**
4739 * find_new_ilb - Finds the optimum idle load balancer for nomination.
4740 * @cpu: The cpu which is nominating a new idle_load_balancer.
4741 *
4742 * Returns: Returns the id of the idle load balancer if it exists,
4743 * Else, returns >= nr_cpu_ids.
4744 *
4745 * This algorithm picks the idle load balancer such that it belongs to a
4746 * semi-idle powersavings sched_domain. The idea is to try and avoid
4747 * completely idle packages/cores just for the purpose of idle load balancing
4748 * when there are other idle cpu's which are better suited for that job.
4749 */
4750static int find_new_ilb(int cpu)
4751{ 4522{
4752 int ilb = cpumask_first(nohz.idle_cpus_mask); 4523 int ilb = cpumask_first(nohz.idle_cpus_mask);
4753 struct sched_group *ilbg;
4754 struct sched_domain *sd;
4755 4524
4756 /*
4757 * Have idle load balancer selection from semi-idle packages only
4758 * when power-aware load balancing is enabled
4759 */
4760 if (!(sched_smt_power_savings || sched_mc_power_savings))
4761 goto out_done;
4762
4763 /*
4764 * Optimize for the case when we have no idle CPUs or only one
4765 * idle CPU. Don't walk the sched_domain hierarchy in such cases
4766 */
4767 if (cpumask_weight(nohz.idle_cpus_mask) < 2)
4768 goto out_done;
4769
4770 rcu_read_lock();
4771 for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
4772 ilbg = sd->groups;
4773
4774 do {
4775 if (ilbg->group_weight !=
4776 atomic_read(&ilbg->sgp->nr_busy_cpus)) {
4777 ilb = cpumask_first_and(nohz.idle_cpus_mask,
4778 sched_group_cpus(ilbg));
4779 goto unlock;
4780 }
4781
4782 ilbg = ilbg->next;
4783
4784 } while (ilbg != sd->groups);
4785 }
4786unlock:
4787 rcu_read_unlock();
4788
4789out_done:
4790 if (ilb < nr_cpu_ids && idle_cpu(ilb)) 4525 if (ilb < nr_cpu_ids && idle_cpu(ilb))
4791 return ilb; 4526 return ilb;
4792 4527
4793 return nr_cpu_ids; 4528 return nr_cpu_ids;
4794} 4529}
4795#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
4796static inline int find_new_ilb(int call_cpu)
4797{
4798 return nr_cpu_ids;
4799}
4800#endif
4801 4530
4802/* 4531/*
4803 * Kick a CPU to do the nohz balancing, if it is time for it. We pick the 4532 * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1
index c4954a9fe4e7..9dbd536518ab 100644
--- a/tools/power/cpupower/man/cpupower-set.1
+++ b/tools/power/cpupower/man/cpupower-set.1
@@ -85,15 +85,6 @@ Possible values are:
85savings 85savings
86.RE 86.RE
87 87
88sched_mc_power_savings is dependent upon SCHED_MC, which is
89itself architecture dependent.
90
91sched_smt_power_savings is dependent upon SCHED_SMT, which
92is itself architecture dependent.
93
94The two files are independent of each other. It is possible
95that one file may be present without the other.
96
97.SH "SEE ALSO" 88.SH "SEE ALSO"
98cpupower-info(1), cpupower-monitor(1), powertop(1) 89cpupower-info(1), cpupower-monitor(1), powertop(1)
99.PP 90.PP
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c
index c6343024a611..96e28c124b5c 100644
--- a/tools/power/cpupower/utils/helpers/sysfs.c
+++ b/tools/power/cpupower/utils/helpers/sysfs.c
@@ -362,22 +362,7 @@ char *sysfs_get_cpuidle_driver(void)
362 */ 362 */
363int sysfs_get_sched(const char *smt_mc) 363int sysfs_get_sched(const char *smt_mc)
364{ 364{
365 unsigned long value; 365 return -ENODEV;
366 char linebuf[MAX_LINE_LEN];
367 char *endp;
368 char path[SYSFS_PATH_MAX];
369
370 if (strcmp("mc", smt_mc) && strcmp("smt", smt_mc))
371 return -EINVAL;
372
373 snprintf(path, sizeof(path),
374 PATH_TO_CPU "sched_%s_power_savings", smt_mc);
375 if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
376 return -1;
377 value = strtoul(linebuf, &endp, 0);
378 if (endp == linebuf || errno == ERANGE)
379 return -1;
380 return value;
381} 366}
382 367
383/* 368/*
@@ -388,21 +373,5 @@ int sysfs_get_sched(const char *smt_mc)
388 */ 373 */
389int sysfs_set_sched(const char *smt_mc, int val) 374int sysfs_set_sched(const char *smt_mc, int val)
390{ 375{
391 char linebuf[MAX_LINE_LEN]; 376 return -ENODEV;
392 char path[SYSFS_PATH_MAX];
393 struct stat statbuf;
394
395 if (strcmp("mc", smt_mc) && strcmp("smt", smt_mc))
396 return -EINVAL;
397
398 snprintf(path, sizeof(path),
399 PATH_TO_CPU "sched_%s_power_savings", smt_mc);
400 sprintf(linebuf, "%d", val);
401
402 if (stat(path, &statbuf) != 0)
403 return -ENODEV;
404
405 if (sysfs_write_file(path, linebuf, MAX_LINE_LEN) == 0)
406 return -1;
407 return 0;
408} 377}