diff options
| -rw-r--r-- | Documentation/ABI/testing/sysfs-devices-system-cpu | 25 | ||||
| -rw-r--r-- | Documentation/scheduler/sched-domains.txt | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/smpboot.c | 3 | ||||
| -rw-r--r-- | drivers/base/cpu.c | 4 | ||||
| -rw-r--r-- | include/linux/cpu.h | 2 | ||||
| -rw-r--r-- | include/linux/sched.h | 47 | ||||
| -rw-r--r-- | include/linux/topology.h | 5 | ||||
| -rw-r--r-- | kernel/sched/core.c | 94 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 275 | ||||
| -rw-r--r-- | tools/power/cpupower/man/cpupower-set.1 | 9 | ||||
| -rw-r--r-- | tools/power/cpupower/utils/helpers/sysfs.c | 35 |
11 files changed, 5 insertions, 498 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index e7be75b96e4b..5dab36448b44 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | |||
| @@ -9,31 +9,6 @@ Description: | |||
| 9 | 9 | ||
| 10 | /sys/devices/system/cpu/cpu#/ | 10 | /sys/devices/system/cpu/cpu#/ |
| 11 | 11 | ||
| 12 | What: /sys/devices/system/cpu/sched_mc_power_savings | ||
| 13 | /sys/devices/system/cpu/sched_smt_power_savings | ||
| 14 | Date: June 2006 | ||
| 15 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
| 16 | Description: Discover and adjust the kernel's multi-core scheduler support. | ||
| 17 | |||
| 18 | Possible values are: | ||
| 19 | |||
| 20 | 0 - No power saving load balance (default value) | ||
| 21 | 1 - Fill one thread/core/package first for long running threads | ||
| 22 | 2 - Also bias task wakeups to semi-idle cpu package for power | ||
| 23 | savings | ||
| 24 | |||
| 25 | sched_mc_power_savings is dependent upon SCHED_MC, which is | ||
| 26 | itself architecture dependent. | ||
| 27 | |||
| 28 | sched_smt_power_savings is dependent upon SCHED_SMT, which | ||
| 29 | is itself architecture dependent. | ||
| 30 | |||
| 31 | The two files are independent of each other. It is possible | ||
| 32 | that one file may be present without the other. | ||
| 33 | |||
| 34 | Introduced by git commit 5c45bf27. | ||
| 35 | |||
| 36 | |||
| 37 | What: /sys/devices/system/cpu/kernel_max | 12 | What: /sys/devices/system/cpu/kernel_max |
| 38 | /sys/devices/system/cpu/offline | 13 | /sys/devices/system/cpu/offline |
| 39 | /sys/devices/system/cpu/online | 14 | /sys/devices/system/cpu/online |
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index b7ee379b651b..443f0c76bab4 100644 --- a/Documentation/scheduler/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt | |||
| @@ -61,10 +61,6 @@ The implementor should read comments in include/linux/sched.h: | |||
| 61 | struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of | 61 | struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of |
| 62 | the specifics and what to tune. | 62 | the specifics and what to tune. |
| 63 | 63 | ||
| 64 | For SMT, the architecture must define CONFIG_SCHED_SMT and provide a | ||
| 65 | cpumask_t cpu_sibling_map[NR_CPUS], where cpu_sibling_map[i] is the mask of | ||
| 66 | all "i"'s siblings as well as "i" itself. | ||
| 67 | |||
| 68 | Architectures may retain the regular override the default SD_*_INIT flags | 64 | Architectures may retain the regular override the default SD_*_INIT flags |
| 69 | while using the generic domain builder in kernel/sched.c if they wish to | 65 | while using the generic domain builder in kernel/sched.c if they wish to |
| 70 | retain the traditional SMT->SMP->NUMA topology (or some subset of that). This | 66 | retain the traditional SMT->SMP->NUMA topology (or some subset of that). This |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e84c1bbea339..256c20cc5e96 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -429,8 +429,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
| 429 | * For perf, we return last level cache shared map. | 429 | * For perf, we return last level cache shared map. |
| 430 | * And for power savings, we return cpu_core_map | 430 | * And for power savings, we return cpu_core_map |
| 431 | */ | 431 | */ |
| 432 | if ((sched_mc_power_savings || sched_smt_power_savings) && | 432 | if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) |
| 433 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
| 434 | return cpu_core_mask(cpu); | 433 | return cpu_core_mask(cpu); |
| 435 | else | 434 | else |
| 436 | return cpu_llc_shared_mask(cpu); | 435 | return cpu_llc_shared_mask(cpu); |
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index adf937bf4091..63452943abd1 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c | |||
| @@ -330,8 +330,4 @@ void __init cpu_dev_init(void) | |||
| 330 | panic("Failed to register CPU subsystem"); | 330 | panic("Failed to register CPU subsystem"); |
| 331 | 331 | ||
| 332 | cpu_dev_register_generic(); | 332 | cpu_dev_register_generic(); |
| 333 | |||
| 334 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 335 | sched_create_sysfs_power_savings_entries(cpu_subsys.dev_root); | ||
| 336 | #endif | ||
| 337 | } | 333 | } |
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index ee28844ae68e..7230bb59a06f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
| @@ -36,8 +36,6 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); | |||
| 36 | extern int cpu_add_dev_attr_group(struct attribute_group *attrs); | 36 | extern int cpu_add_dev_attr_group(struct attribute_group *attrs); |
| 37 | extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); | 37 | extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); |
| 38 | 38 | ||
| 39 | extern int sched_create_sysfs_power_savings_entries(struct device *dev); | ||
| 40 | |||
| 41 | #ifdef CONFIG_HOTPLUG_CPU | 39 | #ifdef CONFIG_HOTPLUG_CPU |
| 42 | extern void unregister_cpu(struct cpu *cpu); | 40 | extern void unregister_cpu(struct cpu *cpu); |
| 43 | extern ssize_t arch_cpu_probe(const char *, size_t); | 41 | extern ssize_t arch_cpu_probe(const char *, size_t); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a559bf0622f..3d644809c9db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -855,61 +855,14 @@ enum cpu_idle_type { | |||
| 855 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ | 855 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ |
| 856 | #define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ | 856 | #define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ |
| 857 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ | 857 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ |
| 858 | #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ | ||
| 859 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ | 858 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ |
| 860 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ | 859 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ |
| 861 | #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ | 860 | #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ |
| 862 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ | 861 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ |
| 863 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ | 862 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ |
| 864 | 863 | ||
| 865 | enum powersavings_balance_level { | ||
| 866 | POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ | ||
| 867 | POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package | ||
| 868 | * first for long running threads | ||
| 869 | */ | ||
| 870 | POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle | ||
| 871 | * cpu package for power savings | ||
| 872 | */ | ||
| 873 | MAX_POWERSAVINGS_BALANCE_LEVELS | ||
| 874 | }; | ||
| 875 | |||
| 876 | extern int sched_mc_power_savings, sched_smt_power_savings; | ||
| 877 | |||
| 878 | static inline int sd_balance_for_mc_power(void) | ||
| 879 | { | ||
| 880 | if (sched_smt_power_savings) | ||
| 881 | return SD_POWERSAVINGS_BALANCE; | ||
| 882 | |||
| 883 | if (!sched_mc_power_savings) | ||
| 884 | return SD_PREFER_SIBLING; | ||
| 885 | |||
| 886 | return 0; | ||
| 887 | } | ||
| 888 | |||
| 889 | static inline int sd_balance_for_package_power(void) | ||
| 890 | { | ||
| 891 | if (sched_mc_power_savings | sched_smt_power_savings) | ||
| 892 | return SD_POWERSAVINGS_BALANCE; | ||
| 893 | |||
| 894 | return SD_PREFER_SIBLING; | ||
| 895 | } | ||
| 896 | |||
| 897 | extern int __weak arch_sd_sibiling_asym_packing(void); | 864 | extern int __weak arch_sd_sibiling_asym_packing(void); |
| 898 | 865 | ||
| 899 | /* | ||
| 900 | * Optimise SD flags for power savings: | ||
| 901 | * SD_BALANCE_NEWIDLE helps aggressive task consolidation and power savings. | ||
| 902 | * Keep default SD flags if sched_{smt,mc}_power_saving=0 | ||
| 903 | */ | ||
| 904 | |||
| 905 | static inline int sd_power_saving_flags(void) | ||
| 906 | { | ||
| 907 | if (sched_mc_power_savings | sched_smt_power_savings) | ||
| 908 | return SD_BALANCE_NEWIDLE; | ||
| 909 | |||
| 910 | return 0; | ||
| 911 | } | ||
| 912 | |||
| 913 | struct sched_group_power { | 866 | struct sched_group_power { |
| 914 | atomic_t ref; | 867 | atomic_t ref; |
| 915 | /* | 868 | /* |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 4f59bf36f0af..09558d1daacd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -98,7 +98,6 @@ int arch_update_cpu_topology(void); | |||
| 98 | | 0*SD_BALANCE_WAKE \ | 98 | | 0*SD_BALANCE_WAKE \ |
| 99 | | 1*SD_WAKE_AFFINE \ | 99 | | 1*SD_WAKE_AFFINE \ |
| 100 | | 1*SD_SHARE_CPUPOWER \ | 100 | | 1*SD_SHARE_CPUPOWER \ |
| 101 | | 0*SD_POWERSAVINGS_BALANCE \ | ||
| 102 | | 1*SD_SHARE_PKG_RESOURCES \ | 101 | | 1*SD_SHARE_PKG_RESOURCES \ |
| 103 | | 0*SD_SERIALIZE \ | 102 | | 0*SD_SERIALIZE \ |
| 104 | | 0*SD_PREFER_SIBLING \ | 103 | | 0*SD_PREFER_SIBLING \ |
| @@ -134,8 +133,6 @@ int arch_update_cpu_topology(void); | |||
| 134 | | 0*SD_SHARE_CPUPOWER \ | 133 | | 0*SD_SHARE_CPUPOWER \ |
| 135 | | 1*SD_SHARE_PKG_RESOURCES \ | 134 | | 1*SD_SHARE_PKG_RESOURCES \ |
| 136 | | 0*SD_SERIALIZE \ | 135 | | 0*SD_SERIALIZE \ |
| 137 | | sd_balance_for_mc_power() \ | ||
| 138 | | sd_power_saving_flags() \ | ||
| 139 | , \ | 136 | , \ |
| 140 | .last_balance = jiffies, \ | 137 | .last_balance = jiffies, \ |
| 141 | .balance_interval = 1, \ | 138 | .balance_interval = 1, \ |
| @@ -167,8 +164,6 @@ int arch_update_cpu_topology(void); | |||
| 167 | | 0*SD_SHARE_CPUPOWER \ | 164 | | 0*SD_SHARE_CPUPOWER \ |
| 168 | | 0*SD_SHARE_PKG_RESOURCES \ | 165 | | 0*SD_SHARE_PKG_RESOURCES \ |
| 169 | | 0*SD_SERIALIZE \ | 166 | | 0*SD_SERIALIZE \ |
| 170 | | sd_balance_for_package_power() \ | ||
| 171 | | sd_power_saving_flags() \ | ||
| 172 | , \ | 167 | , \ |
| 173 | .last_balance = jiffies, \ | 168 | .last_balance = jiffies, \ |
| 174 | .balance_interval = 1, \ | 169 | .balance_interval = 1, \ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bd314d7cd9f8..24ca677b5457 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -5929,8 +5929,6 @@ static const struct cpumask *cpu_cpu_mask(int cpu) | |||
| 5929 | return cpumask_of_node(cpu_to_node(cpu)); | 5929 | return cpumask_of_node(cpu_to_node(cpu)); |
| 5930 | } | 5930 | } |
| 5931 | 5931 | ||
| 5932 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | ||
| 5933 | |||
| 5934 | struct sd_data { | 5932 | struct sd_data { |
| 5935 | struct sched_domain **__percpu sd; | 5933 | struct sched_domain **__percpu sd; |
| 5936 | struct sched_group **__percpu sg; | 5934 | struct sched_group **__percpu sg; |
| @@ -6322,7 +6320,6 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | |||
| 6322 | | 0*SD_WAKE_AFFINE | 6320 | | 0*SD_WAKE_AFFINE |
| 6323 | | 0*SD_PREFER_LOCAL | 6321 | | 0*SD_PREFER_LOCAL |
| 6324 | | 0*SD_SHARE_CPUPOWER | 6322 | | 0*SD_SHARE_CPUPOWER |
| 6325 | | 0*SD_POWERSAVINGS_BALANCE | ||
| 6326 | | 0*SD_SHARE_PKG_RESOURCES | 6323 | | 0*SD_SHARE_PKG_RESOURCES |
| 6327 | | 1*SD_SERIALIZE | 6324 | | 1*SD_SERIALIZE |
| 6328 | | 0*SD_PREFER_SIBLING | 6325 | | 0*SD_PREFER_SIBLING |
| @@ -6819,97 +6816,6 @@ match2: | |||
| 6819 | mutex_unlock(&sched_domains_mutex); | 6816 | mutex_unlock(&sched_domains_mutex); |
| 6820 | } | 6817 | } |
| 6821 | 6818 | ||
| 6822 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 6823 | static void reinit_sched_domains(void) | ||
| 6824 | { | ||
| 6825 | get_online_cpus(); | ||
| 6826 | |||
| 6827 | /* Destroy domains first to force the rebuild */ | ||
| 6828 | partition_sched_domains(0, NULL, NULL); | ||
| 6829 | |||
| 6830 | rebuild_sched_domains(); | ||
| 6831 | put_online_cpus(); | ||
| 6832 | } | ||
| 6833 | |||
| 6834 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | ||
| 6835 | { | ||
| 6836 | unsigned int level = 0; | ||
| 6837 | |||
| 6838 | if (sscanf(buf, "%u", &level) != 1) | ||
| 6839 | return -EINVAL; | ||
| 6840 | |||
| 6841 | /* | ||
| 6842 | * level is always be positive so don't check for | ||
| 6843 | * level < POWERSAVINGS_BALANCE_NONE which is 0 | ||
| 6844 | * What happens on 0 or 1 byte write, | ||
| 6845 | * need to check for count as well? | ||
| 6846 | */ | ||
| 6847 | |||
| 6848 | if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) | ||
| 6849 | return -EINVAL; | ||
| 6850 | |||
| 6851 | if (smt) | ||
| 6852 | sched_smt_power_savings = level; | ||
| 6853 | else | ||
| 6854 | sched_mc_power_savings = level; | ||
| 6855 | |||
| 6856 | reinit_sched_domains(); | ||
| 6857 | |||
| 6858 | return count; | ||
| 6859 | } | ||
| 6860 | |||
| 6861 | #ifdef CONFIG_SCHED_MC | ||
| 6862 | static ssize_t sched_mc_power_savings_show(struct device *dev, | ||
| 6863 | struct device_attribute *attr, | ||
| 6864 | char *buf) | ||
| 6865 | { | ||
| 6866 | return sprintf(buf, "%u\n", sched_mc_power_savings); | ||
| 6867 | } | ||
| 6868 | static ssize_t sched_mc_power_savings_store(struct device *dev, | ||
| 6869 | struct device_attribute *attr, | ||
| 6870 | const char *buf, size_t count) | ||
| 6871 | { | ||
| 6872 | return sched_power_savings_store(buf, count, 0); | ||
| 6873 | } | ||
| 6874 | static DEVICE_ATTR(sched_mc_power_savings, 0644, | ||
| 6875 | sched_mc_power_savings_show, | ||
| 6876 | sched_mc_power_savings_store); | ||
| 6877 | #endif | ||
| 6878 | |||
| 6879 | #ifdef CONFIG_SCHED_SMT | ||
| 6880 | static ssize_t sched_smt_power_savings_show(struct device *dev, | ||
| 6881 | struct device_attribute *attr, | ||
| 6882 | char *buf) | ||
| 6883 | { | ||
| 6884 | return sprintf(buf, "%u\n", sched_smt_power_savings); | ||
| 6885 | } | ||
| 6886 | static ssize_t sched_smt_power_savings_store(struct device *dev, | ||
| 6887 | struct device_attribute *attr, | ||
| 6888 | const char *buf, size_t count) | ||
| 6889 | { | ||
| 6890 | return sched_power_savings_store(buf, count, 1); | ||
| 6891 | } | ||
| 6892 | static DEVICE_ATTR(sched_smt_power_savings, 0644, | ||
| 6893 | sched_smt_power_savings_show, | ||
| 6894 | sched_smt_power_savings_store); | ||
| 6895 | #endif | ||
| 6896 | |||
| 6897 | int __init sched_create_sysfs_power_savings_entries(struct device *dev) | ||
| 6898 | { | ||
| 6899 | int err = 0; | ||
| 6900 | |||
| 6901 | #ifdef CONFIG_SCHED_SMT | ||
| 6902 | if (smt_capable()) | ||
| 6903 | err = device_create_file(dev, &dev_attr_sched_smt_power_savings); | ||
| 6904 | #endif | ||
| 6905 | #ifdef CONFIG_SCHED_MC | ||
| 6906 | if (!err && mc_capable()) | ||
| 6907 | err = device_create_file(dev, &dev_attr_sched_mc_power_savings); | ||
| 6908 | #endif | ||
| 6909 | return err; | ||
| 6910 | } | ||
| 6911 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | ||
| 6912 | |||
| 6913 | /* | 6819 | /* |
| 6914 | * Update cpusets according to cpu_active mask. If cpusets are | 6820 | * Update cpusets according to cpu_active mask. If cpusets are |
| 6915 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper | 6821 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b42f4487329..940e6d17cf96 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -2721,7 +2721,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
| 2721 | * If power savings logic is enabled for a domain, see if we | 2721 | * If power savings logic is enabled for a domain, see if we |
| 2722 | * are not overloaded, if so, don't balance wider. | 2722 | * are not overloaded, if so, don't balance wider. |
| 2723 | */ | 2723 | */ |
| 2724 | if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { | 2724 | if (tmp->flags & (SD_PREFER_LOCAL)) { |
| 2725 | unsigned long power = 0; | 2725 | unsigned long power = 0; |
| 2726 | unsigned long nr_running = 0; | 2726 | unsigned long nr_running = 0; |
| 2727 | unsigned long capacity; | 2727 | unsigned long capacity; |
| @@ -2734,9 +2734,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
| 2734 | 2734 | ||
| 2735 | capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); | 2735 | capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); |
| 2736 | 2736 | ||
| 2737 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
| 2738 | nr_running /= 2; | ||
| 2739 | |||
| 2740 | if (nr_running < capacity) | 2737 | if (nr_running < capacity) |
| 2741 | want_sd = 0; | 2738 | want_sd = 0; |
| 2742 | } | 2739 | } |
| @@ -3435,14 +3432,6 @@ struct sd_lb_stats { | |||
| 3435 | unsigned int busiest_group_weight; | 3432 | unsigned int busiest_group_weight; |
| 3436 | 3433 | ||
| 3437 | int group_imb; /* Is there imbalance in this sd */ | 3434 | int group_imb; /* Is there imbalance in this sd */ |
| 3438 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 3439 | int power_savings_balance; /* Is powersave balance needed for this sd */ | ||
| 3440 | struct sched_group *group_min; /* Least loaded group in sd */ | ||
| 3441 | struct sched_group *group_leader; /* Group which relieves group_min */ | ||
| 3442 | unsigned long min_load_per_task; /* load_per_task in group_min */ | ||
| 3443 | unsigned long leader_nr_running; /* Nr running of group_leader */ | ||
| 3444 | unsigned long min_nr_running; /* Nr running of group_min */ | ||
| 3445 | #endif | ||
| 3446 | }; | 3435 | }; |
| 3447 | 3436 | ||
| 3448 | /* | 3437 | /* |
| @@ -3486,147 +3475,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd, | |||
| 3486 | return load_idx; | 3475 | return load_idx; |
| 3487 | } | 3476 | } |
| 3488 | 3477 | ||
| 3489 | |||
| 3490 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 3491 | /** | ||
| 3492 | * init_sd_power_savings_stats - Initialize power savings statistics for | ||
| 3493 | * the given sched_domain, during load balancing. | ||
| 3494 | * | ||
| 3495 | * @sd: Sched domain whose power-savings statistics are to be initialized. | ||
| 3496 | * @sds: Variable containing the statistics for sd. | ||
| 3497 | * @idle: Idle status of the CPU at which we're performing load-balancing. | ||
| 3498 | */ | ||
| 3499 | static inline void init_sd_power_savings_stats(struct sched_domain *sd, | ||
| 3500 | struct sd_lb_stats *sds, enum cpu_idle_type idle) | ||
| 3501 | { | ||
| 3502 | /* | ||
| 3503 | * Busy processors will not participate in power savings | ||
| 3504 | * balance. | ||
| 3505 | */ | ||
| 3506 | if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) | ||
| 3507 | sds->power_savings_balance = 0; | ||
| 3508 | else { | ||
| 3509 | sds->power_savings_balance = 1; | ||
| 3510 | sds->min_nr_running = ULONG_MAX; | ||
| 3511 | sds->leader_nr_running = 0; | ||
| 3512 | } | ||
| 3513 | } | ||
| 3514 | |||
| 3515 | /** | ||
| 3516 | * update_sd_power_savings_stats - Update the power saving stats for a | ||
| 3517 | * sched_domain while performing load balancing. | ||
| 3518 | * | ||
| 3519 | * @group: sched_group belonging to the sched_domain under consideration. | ||
| 3520 | * @sds: Variable containing the statistics of the sched_domain | ||
| 3521 | * @local_group: Does group contain the CPU for which we're performing | ||
| 3522 | * load balancing ? | ||
| 3523 | * @sgs: Variable containing the statistics of the group. | ||
| 3524 | */ | ||
| 3525 | static inline void update_sd_power_savings_stats(struct sched_group *group, | ||
| 3526 | struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs) | ||
| 3527 | { | ||
| 3528 | |||
| 3529 | if (!sds->power_savings_balance) | ||
| 3530 | return; | ||
| 3531 | |||
| 3532 | /* | ||
| 3533 | * If the local group is idle or completely loaded | ||
| 3534 | * no need to do power savings balance at this domain | ||
| 3535 | */ | ||
| 3536 | if (local_group && (sds->this_nr_running >= sgs->group_capacity || | ||
| 3537 | !sds->this_nr_running)) | ||
| 3538 | sds->power_savings_balance = 0; | ||
| 3539 | |||
| 3540 | /* | ||
| 3541 | * If a group is already running at full capacity or idle, | ||
| 3542 | * don't include that group in power savings calculations | ||
| 3543 | */ | ||
| 3544 | if (!sds->power_savings_balance || | ||
| 3545 | sgs->sum_nr_running >= sgs->group_capacity || | ||
| 3546 | !sgs->sum_nr_running) | ||
| 3547 | return; | ||
| 3548 | |||
| 3549 | /* | ||
| 3550 | * Calculate the group which has the least non-idle load. | ||
| 3551 | * This is the group from where we need to pick up the load | ||
| 3552 | * for saving power | ||
| 3553 | */ | ||
| 3554 | if ((sgs->sum_nr_running < sds->min_nr_running) || | ||
| 3555 | (sgs->sum_nr_running == sds->min_nr_running && | ||
| 3556 | group_first_cpu(group) > group_first_cpu(sds->group_min))) { | ||
| 3557 | sds->group_min = group; | ||
| 3558 | sds->min_nr_running = sgs->sum_nr_running; | ||
| 3559 | sds->min_load_per_task = sgs->sum_weighted_load / | ||
| 3560 | sgs->sum_nr_running; | ||
| 3561 | } | ||
| 3562 | |||
| 3563 | /* | ||
| 3564 | * Calculate the group which is almost near its | ||
| 3565 | * capacity but still has some space to pick up some load | ||
| 3566 | * from other group and save more power | ||
| 3567 | */ | ||
| 3568 | if (sgs->sum_nr_running + 1 > sgs->group_capacity) | ||
| 3569 | return; | ||
| 3570 | |||
| 3571 | if (sgs->sum_nr_running > sds->leader_nr_running || | ||
| 3572 | (sgs->sum_nr_running == sds->leader_nr_running && | ||
| 3573 | group_first_cpu(group) < group_first_cpu(sds->group_leader))) { | ||
| 3574 | sds->group_leader = group; | ||
| 3575 | sds->leader_nr_running = sgs->sum_nr_running; | ||
| 3576 | } | ||
| 3577 | } | ||
| 3578 | |||
| 3579 | /** | ||
| 3580 | * check_power_save_busiest_group - see if there is potential for some power-savings balance | ||
| 3581 | * @env: load balance environment | ||
| 3582 | * @sds: Variable containing the statistics of the sched_domain | ||
| 3583 | * under consideration. | ||
| 3584 | * | ||
| 3585 | * Description: | ||
| 3586 | * Check if we have potential to perform some power-savings balance. | ||
| 3587 | * If yes, set the busiest group to be the least loaded group in the | ||
| 3588 | * sched_domain, so that it's CPUs can be put to idle. | ||
| 3589 | * | ||
| 3590 | * Returns 1 if there is potential to perform power-savings balance. | ||
| 3591 | * Else returns 0. | ||
| 3592 | */ | ||
| 3593 | static inline | ||
| 3594 | int check_power_save_busiest_group(struct lb_env *env, struct sd_lb_stats *sds) | ||
| 3595 | { | ||
| 3596 | if (!sds->power_savings_balance) | ||
| 3597 | return 0; | ||
| 3598 | |||
| 3599 | if (sds->this != sds->group_leader || | ||
| 3600 | sds->group_leader == sds->group_min) | ||
| 3601 | return 0; | ||
| 3602 | |||
| 3603 | env->imbalance = sds->min_load_per_task; | ||
| 3604 | sds->busiest = sds->group_min; | ||
| 3605 | |||
| 3606 | return 1; | ||
| 3607 | |||
| 3608 | } | ||
| 3609 | #else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | ||
| 3610 | static inline void init_sd_power_savings_stats(struct sched_domain *sd, | ||
| 3611 | struct sd_lb_stats *sds, enum cpu_idle_type idle) | ||
| 3612 | { | ||
| 3613 | return; | ||
| 3614 | } | ||
| 3615 | |||
| 3616 | static inline void update_sd_power_savings_stats(struct sched_group *group, | ||
| 3617 | struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs) | ||
| 3618 | { | ||
| 3619 | return; | ||
| 3620 | } | ||
| 3621 | |||
| 3622 | static inline | ||
| 3623 | int check_power_save_busiest_group(struct lb_env *env, struct sd_lb_stats *sds) | ||
| 3624 | { | ||
| 3625 | return 0; | ||
| 3626 | } | ||
| 3627 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | ||
| 3628 | |||
| 3629 | |||
| 3630 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | 3478 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) |
| 3631 | { | 3479 | { |
| 3632 | return SCHED_POWER_SCALE; | 3480 | return SCHED_POWER_SCALE; |
| @@ -3932,7 +3780,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, | |||
| 3932 | if (child && child->flags & SD_PREFER_SIBLING) | 3780 | if (child && child->flags & SD_PREFER_SIBLING) |
| 3933 | prefer_sibling = 1; | 3781 | prefer_sibling = 1; |
| 3934 | 3782 | ||
| 3935 | init_sd_power_savings_stats(env->sd, sds, env->idle); | ||
| 3936 | load_idx = get_sd_load_idx(env->sd, env->idle); | 3783 | load_idx = get_sd_load_idx(env->sd, env->idle); |
| 3937 | 3784 | ||
| 3938 | do { | 3785 | do { |
| @@ -3981,7 +3828,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, | |||
| 3981 | sds->group_imb = sgs.group_imb; | 3828 | sds->group_imb = sgs.group_imb; |
| 3982 | } | 3829 | } |
| 3983 | 3830 | ||
| 3984 | update_sd_power_savings_stats(sg, sds, local_group, &sgs); | ||
| 3985 | sg = sg->next; | 3831 | sg = sg->next; |
| 3986 | } while (sg != env->sd->groups); | 3832 | } while (sg != env->sd->groups); |
| 3987 | } | 3833 | } |
| @@ -4276,12 +4122,6 @@ force_balance: | |||
| 4276 | return sds.busiest; | 4122 | return sds.busiest; |
| 4277 | 4123 | ||
| 4278 | out_balanced: | 4124 | out_balanced: |
| 4279 | /* | ||
| 4280 | * There is no obvious imbalance. But check if we can do some balancing | ||
| 4281 | * to save power. | ||
| 4282 | */ | ||
| 4283 | if (check_power_save_busiest_group(env, &sds)) | ||
| 4284 | return sds.busiest; | ||
| 4285 | ret: | 4125 | ret: |
| 4286 | env->imbalance = 0; | 4126 | env->imbalance = 0; |
| 4287 | return NULL; | 4127 | return NULL; |
| @@ -4359,28 +4199,6 @@ static int need_active_balance(struct lb_env *env) | |||
| 4359 | */ | 4199 | */ |
| 4360 | if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu) | 4200 | if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu) |
| 4361 | return 1; | 4201 | return 1; |
| 4362 | |||
| 4363 | /* | ||
| 4364 | * The only task running in a non-idle cpu can be moved to this | ||
| 4365 | * cpu in an attempt to completely freeup the other CPU | ||
| 4366 | * package. | ||
| 4367 | * | ||
| 4368 | * The package power saving logic comes from | ||
| 4369 | * find_busiest_group(). If there are no imbalance, then | ||
| 4370 | * f_b_g() will return NULL. However when sched_mc={1,2} then | ||
| 4371 | * f_b_g() will select a group from which a running task may be | ||
| 4372 | * pulled to this cpu in order to make the other package idle. | ||
| 4373 | * If there is no opportunity to make a package idle and if | ||
| 4374 | * there are no imbalance, then f_b_g() will return NULL and no | ||
| 4375 | * action will be taken in load_balance_newidle(). | ||
| 4376 | * | ||
| 4377 | * Under normal task pull operation due to imbalance, there | ||
| 4378 | * will be more than one task in the source run queue and | ||
| 4379 | * move_tasks() will succeed. ld_moved will be true and this | ||
| 4380 | * active balance code will not be triggered. | ||
| 4381 | */ | ||
| 4382 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) | ||
| 4383 | return 0; | ||
| 4384 | } | 4202 | } |
| 4385 | 4203 | ||
| 4386 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); | 4204 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); |
| @@ -4700,104 +4518,15 @@ static struct { | |||
| 4700 | unsigned long next_balance; /* in jiffy units */ | 4518 | unsigned long next_balance; /* in jiffy units */ |
| 4701 | } nohz ____cacheline_aligned; | 4519 | } nohz ____cacheline_aligned; |
| 4702 | 4520 | ||
| 4703 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 4521 | static inline int find_new_ilb(int call_cpu) |
| 4704 | /** | ||
| 4705 | * lowest_flag_domain - Return lowest sched_domain containing flag. | ||
| 4706 | * @cpu: The cpu whose lowest level of sched domain is to | ||
| 4707 | * be returned. | ||
| 4708 | * @flag: The flag to check for the lowest sched_domain | ||
| 4709 | * for the given cpu. | ||
| 4710 | * | ||
| 4711 | * Returns the lowest sched_domain of a cpu which contains the given flag. | ||
| 4712 | */ | ||
| 4713 | static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) | ||
| 4714 | { | ||
| 4715 | struct sched_domain *sd; | ||
| 4716 | |||
| 4717 | for_each_domain(cpu, sd) | ||
| 4718 | if (sd->flags & flag) | ||
| 4719 | break; | ||
| 4720 | |||
| 4721 | return sd; | ||
| 4722 | } | ||
| 4723 | |||
| 4724 | /** | ||
| 4725 | * for_each_flag_domain - Iterates over sched_domains containing the flag. | ||
| 4726 | * @cpu: The cpu whose domains we're iterating over. | ||
| 4727 | * @sd: variable holding the value of the power_savings_sd | ||
| 4728 | * for cpu. | ||
| 4729 | * @flag: The flag to filter the sched_domains to be iterated. | ||
| 4730 | * | ||
| 4731 | * Iterates over all the scheduler domains for a given cpu that has the 'flag' | ||
| 4732 | * set, starting from the lowest sched_domain to the highest. | ||
| 4733 | */ | ||
| 4734 | #define for_each_flag_domain(cpu, sd, flag) \ | ||
| 4735 | for (sd = lowest_flag_domain(cpu, flag); \ | ||
| 4736 | (sd && (sd->flags & flag)); sd = sd->parent) | ||
| 4737 | |||
| 4738 | /** | ||
| 4739 | * find_new_ilb - Finds the optimum idle load balancer for nomination. | ||
| 4740 | * @cpu: The cpu which is nominating a new idle_load_balancer. | ||
| 4741 | * | ||
| 4742 | * Returns: Returns the id of the idle load balancer if it exists, | ||
| 4743 | * Else, returns >= nr_cpu_ids. | ||
| 4744 | * | ||
| 4745 | * This algorithm picks the idle load balancer such that it belongs to a | ||
| 4746 | * semi-idle powersavings sched_domain. The idea is to try and avoid | ||
| 4747 | * completely idle packages/cores just for the purpose of idle load balancing | ||
| 4748 | * when there are other idle cpu's which are better suited for that job. | ||
| 4749 | */ | ||
| 4750 | static int find_new_ilb(int cpu) | ||
| 4751 | { | 4522 | { |
| 4752 | int ilb = cpumask_first(nohz.idle_cpus_mask); | 4523 | int ilb = cpumask_first(nohz.idle_cpus_mask); |
| 4753 | struct sched_group *ilbg; | ||
| 4754 | struct sched_domain *sd; | ||
| 4755 | 4524 | ||
| 4756 | /* | ||
| 4757 | * Have idle load balancer selection from semi-idle packages only | ||
| 4758 | * when power-aware load balancing is enabled | ||
| 4759 | */ | ||
| 4760 | if (!(sched_smt_power_savings || sched_mc_power_savings)) | ||
| 4761 | goto out_done; | ||
| 4762 | |||
| 4763 | /* | ||
| 4764 | * Optimize for the case when we have no idle CPUs or only one | ||
| 4765 | * idle CPU. Don't walk the sched_domain hierarchy in such cases | ||
| 4766 | */ | ||
| 4767 | if (cpumask_weight(nohz.idle_cpus_mask) < 2) | ||
| 4768 | goto out_done; | ||
| 4769 | |||
| 4770 | rcu_read_lock(); | ||
| 4771 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { | ||
| 4772 | ilbg = sd->groups; | ||
| 4773 | |||
| 4774 | do { | ||
| 4775 | if (ilbg->group_weight != | ||
| 4776 | atomic_read(&ilbg->sgp->nr_busy_cpus)) { | ||
| 4777 | ilb = cpumask_first_and(nohz.idle_cpus_mask, | ||
| 4778 | sched_group_cpus(ilbg)); | ||
| 4779 | goto unlock; | ||
| 4780 | } | ||
| 4781 | |||
| 4782 | ilbg = ilbg->next; | ||
| 4783 | |||
| 4784 | } while (ilbg != sd->groups); | ||
| 4785 | } | ||
| 4786 | unlock: | ||
| 4787 | rcu_read_unlock(); | ||
| 4788 | |||
| 4789 | out_done: | ||
| 4790 | if (ilb < nr_cpu_ids && idle_cpu(ilb)) | 4525 | if (ilb < nr_cpu_ids && idle_cpu(ilb)) |
| 4791 | return ilb; | 4526 | return ilb; |
| 4792 | 4527 | ||
| 4793 | return nr_cpu_ids; | 4528 | return nr_cpu_ids; |
| 4794 | } | 4529 | } |
| 4795 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ | ||
| 4796 | static inline int find_new_ilb(int call_cpu) | ||
| 4797 | { | ||
| 4798 | return nr_cpu_ids; | ||
| 4799 | } | ||
| 4800 | #endif | ||
| 4801 | 4530 | ||
| 4802 | /* | 4531 | /* |
| 4803 | * Kick a CPU to do the nohz balancing, if it is time for it. We pick the | 4532 | * Kick a CPU to do the nohz balancing, if it is time for it. We pick the |
diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index c4954a9fe4e7..9dbd536518ab 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 | |||
| @@ -85,15 +85,6 @@ Possible values are: | |||
| 85 | savings | 85 | savings |
| 86 | .RE | 86 | .RE |
| 87 | 87 | ||
| 88 | sched_mc_power_savings is dependent upon SCHED_MC, which is | ||
| 89 | itself architecture dependent. | ||
| 90 | |||
| 91 | sched_smt_power_savings is dependent upon SCHED_SMT, which | ||
| 92 | is itself architecture dependent. | ||
| 93 | |||
| 94 | The two files are independent of each other. It is possible | ||
| 95 | that one file may be present without the other. | ||
| 96 | |||
| 97 | .SH "SEE ALSO" | 88 | .SH "SEE ALSO" |
| 98 | cpupower-info(1), cpupower-monitor(1), powertop(1) | 89 | cpupower-info(1), cpupower-monitor(1), powertop(1) |
| 99 | .PP | 90 | .PP |
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index c6343024a611..96e28c124b5c 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c | |||
| @@ -362,22 +362,7 @@ char *sysfs_get_cpuidle_driver(void) | |||
| 362 | */ | 362 | */ |
| 363 | int sysfs_get_sched(const char *smt_mc) | 363 | int sysfs_get_sched(const char *smt_mc) |
| 364 | { | 364 | { |
| 365 | unsigned long value; | 365 | return -ENODEV; |
| 366 | char linebuf[MAX_LINE_LEN]; | ||
| 367 | char *endp; | ||
| 368 | char path[SYSFS_PATH_MAX]; | ||
| 369 | |||
| 370 | if (strcmp("mc", smt_mc) && strcmp("smt", smt_mc)) | ||
| 371 | return -EINVAL; | ||
| 372 | |||
| 373 | snprintf(path, sizeof(path), | ||
| 374 | PATH_TO_CPU "sched_%s_power_savings", smt_mc); | ||
| 375 | if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) | ||
| 376 | return -1; | ||
| 377 | value = strtoul(linebuf, &endp, 0); | ||
| 378 | if (endp == linebuf || errno == ERANGE) | ||
| 379 | return -1; | ||
| 380 | return value; | ||
| 381 | } | 366 | } |
| 382 | 367 | ||
| 383 | /* | 368 | /* |
| @@ -388,21 +373,5 @@ int sysfs_get_sched(const char *smt_mc) | |||
| 388 | */ | 373 | */ |
| 389 | int sysfs_set_sched(const char *smt_mc, int val) | 374 | int sysfs_set_sched(const char *smt_mc, int val) |
| 390 | { | 375 | { |
| 391 | char linebuf[MAX_LINE_LEN]; | 376 | return -ENODEV; |
| 392 | char path[SYSFS_PATH_MAX]; | ||
| 393 | struct stat statbuf; | ||
| 394 | |||
| 395 | if (strcmp("mc", smt_mc) && strcmp("smt", smt_mc)) | ||
| 396 | return -EINVAL; | ||
| 397 | |||
| 398 | snprintf(path, sizeof(path), | ||
| 399 | PATH_TO_CPU "sched_%s_power_savings", smt_mc); | ||
| 400 | sprintf(linebuf, "%d", val); | ||
| 401 | |||
| 402 | if (stat(path, &statbuf) != 0) | ||
| 403 | return -ENODEV; | ||
| 404 | |||
| 405 | if (sysfs_write_file(path, linebuf, MAX_LINE_LEN) == 0) | ||
| 406 | return -1; | ||
| 407 | return 0; | ||
| 408 | } | 377 | } |
