diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-devices-system-cpu | 11 | ||||
-rw-r--r-- | Documentation/cpu-freq/boost.txt | 93 | ||||
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 177 |
3 files changed, 281 insertions, 0 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 5dab36448b44..6943133afcb8 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | |||
@@ -176,3 +176,14 @@ Description: Disable L3 cache indices | |||
176 | All AMD processors with L3 caches provide this functionality. | 176 | All AMD processors with L3 caches provide this functionality. |
177 | For details, see BKDGs at | 177 | For details, see BKDGs at |
178 | http://developer.amd.com/documentation/guides/Pages/default.aspx | 178 | http://developer.amd.com/documentation/guides/Pages/default.aspx |
179 | |||
180 | |||
181 | What: /sys/devices/system/cpu/cpufreq/boost | ||
182 | Date: August 2012 | ||
183 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
184 | Description: Processor frequency boosting control | ||
185 | |||
186 | This switch controls the boost setting for the whole system. | ||
187 | Boosting allows the CPU and the firmware to run at a frequency | ||
188 | beyound it's nominal limit. | ||
189 | More details can be found in Documentation/cpu-freq/boost.txt | ||
diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt new file mode 100644 index 000000000000..9b4edfcf486f --- /dev/null +++ b/Documentation/cpu-freq/boost.txt | |||
@@ -0,0 +1,93 @@ | |||
1 | Processor boosting control | ||
2 | |||
3 | - information for users - | ||
4 | |||
5 | Quick guide for the impatient: | ||
6 | -------------------- | ||
7 | /sys/devices/system/cpu/cpufreq/boost | ||
8 | controls the boost setting for the whole system. You can read and write | ||
9 | that file with either "0" (boosting disabled) or "1" (boosting allowed). | ||
10 | Reading or writing 1 does not mean that the system is boosting at this | ||
11 | very moment, but only that the CPU _may_ raise the frequency at it's | ||
12 | discretion. | ||
13 | -------------------- | ||
14 | |||
15 | Introduction | ||
16 | ------------- | ||
17 | Some CPUs support a functionality to raise the operating frequency of | ||
18 | some cores in a multi-core package if certain conditions apply, mostly | ||
19 | if the whole chip is not fully utilized and below it's intended thermal | ||
20 | budget. This is done without operating system control by a combination | ||
21 | of hardware and firmware. | ||
22 | On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core", | ||
23 | in technical documentation "Core performance boost". In Linux we use | ||
24 | the term "boost" for convenience. | ||
25 | |||
26 | Rationale for disable switch | ||
27 | ---------------------------- | ||
28 | |||
29 | Though the idea is to just give better performance without any user | ||
30 | intervention, sometimes the need arises to disable this functionality. | ||
31 | Most systems offer a switch in the (BIOS) firmware to disable the | ||
32 | functionality at all, but a more fine-grained and dynamic control would | ||
33 | be desirable: | ||
34 | 1. While running benchmarks, reproducible results are important. Since | ||
35 | the boosting functionality depends on the load of the whole package, | ||
36 | single thread performance can vary. By explicitly disabling the boost | ||
37 | functionality at least for the benchmark's run-time the system will run | ||
38 | at a fixed frequency and results are reproducible again. | ||
39 | 2. To examine the impact of the boosting functionality it is helpful | ||
40 | to do tests with and without boosting. | ||
41 | 3. Boosting means overclocking the processor, though under controlled | ||
42 | conditions. By raising the frequency and the voltage the processor | ||
43 | will consume more power than without the boosting, which may be | ||
44 | undesirable for instance for mobile users. Disabling boosting may | ||
45 | save power here, though this depends on the workload. | ||
46 | |||
47 | |||
48 | User controlled switch | ||
49 | ---------------------- | ||
50 | |||
51 | To allow the user to toggle the boosting functionality, the acpi-cpufreq | ||
52 | driver exports a sysfs knob to disable it. There is a file: | ||
53 | /sys/devices/system/cpu/cpufreq/boost | ||
54 | which can either read "0" (boosting disabled) or "1" (boosting enabled). | ||
55 | Reading the file is always supported, even if the processor does not | ||
56 | support boosting. In this case the file will be read-only and always | ||
57 | reads as "0". Explicitly changing the permissions and writing to that | ||
58 | file anyway will return EINVAL. | ||
59 | |||
60 | On supported CPUs one can write either a "0" or a "1" into this file. | ||
61 | This will either disable the boost functionality on all cores in the | ||
62 | whole system (0) or will allow the hardware to boost at will (1). | ||
63 | |||
64 | Writing a "1" does not explicitly boost the system, but just allows the | ||
65 | CPU (and the firmware) to boost at their discretion. Some implementations | ||
66 | take external factors like the chip's temperature into account, so | ||
67 | boosting once does not necessarily mean that it will occur every time | ||
68 | even using the exact same software setup. | ||
69 | |||
70 | |||
71 | AMD legacy cpb switch | ||
72 | --------------------- | ||
73 | The AMD powernow-k8 driver used to support a very similar switch to | ||
74 | disable or enable the "Core Performance Boost" feature of some AMD CPUs. | ||
75 | This switch was instantiated in each CPU's cpufreq directory | ||
76 | (/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb". | ||
77 | Though the per CPU existence hints at a more fine grained control, the | ||
78 | actual implementation only supported a system-global switch semantics, | ||
79 | which was simply reflected into each CPU's file. Writing a 0 or 1 into it | ||
80 | would pull the other CPUs to the same state. | ||
81 | For compatibility reasons this file and its behavior is still supported | ||
82 | on AMD CPUs, though it is now protected by a config switch | ||
83 | (X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created, | ||
84 | even with the config option set. | ||
85 | This functionality is considered legacy and will be removed in some future | ||
86 | kernel version. | ||
87 | |||
88 | More fine grained boosting control | ||
89 | ---------------------------------- | ||
90 | |||
91 | Technically it is possible to switch the boosting functionality at least | ||
92 | on a per package basis, for some CPUs even per core. Currently the driver | ||
93 | does not support it, but this may be implemented in the future. | ||
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 70e717305c29..dffa7af1db71 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c | |||
@@ -63,6 +63,8 @@ enum { | |||
63 | #define INTEL_MSR_RANGE (0xffff) | 63 | #define INTEL_MSR_RANGE (0xffff) |
64 | #define AMD_MSR_RANGE (0x7) | 64 | #define AMD_MSR_RANGE (0x7) |
65 | 65 | ||
66 | #define MSR_K7_HWCR_CPB_DIS (1ULL << 25) | ||
67 | |||
66 | struct acpi_cpufreq_data { | 68 | struct acpi_cpufreq_data { |
67 | struct acpi_processor_performance *acpi_data; | 69 | struct acpi_processor_performance *acpi_data; |
68 | struct cpufreq_frequency_table *freq_table; | 70 | struct cpufreq_frequency_table *freq_table; |
@@ -78,6 +80,96 @@ static struct acpi_processor_performance __percpu *acpi_perf_data; | |||
78 | static struct cpufreq_driver acpi_cpufreq_driver; | 80 | static struct cpufreq_driver acpi_cpufreq_driver; |
79 | 81 | ||
80 | static unsigned int acpi_pstate_strict; | 82 | static unsigned int acpi_pstate_strict; |
83 | static bool boost_enabled, boost_supported; | ||
84 | static struct msr __percpu *msrs; | ||
85 | |||
86 | static bool boost_state(unsigned int cpu) | ||
87 | { | ||
88 | u32 lo, hi; | ||
89 | u64 msr; | ||
90 | |||
91 | switch (boot_cpu_data.x86_vendor) { | ||
92 | case X86_VENDOR_INTEL: | ||
93 | rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi); | ||
94 | msr = lo | ((u64)hi << 32); | ||
95 | return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); | ||
96 | case X86_VENDOR_AMD: | ||
97 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
98 | msr = lo | ((u64)hi << 32); | ||
99 | return !(msr & MSR_K7_HWCR_CPB_DIS); | ||
100 | } | ||
101 | return false; | ||
102 | } | ||
103 | |||
104 | static void boost_set_msrs(bool enable, const struct cpumask *cpumask) | ||
105 | { | ||
106 | u32 cpu; | ||
107 | u32 msr_addr; | ||
108 | u64 msr_mask; | ||
109 | |||
110 | switch (boot_cpu_data.x86_vendor) { | ||
111 | case X86_VENDOR_INTEL: | ||
112 | msr_addr = MSR_IA32_MISC_ENABLE; | ||
113 | msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE; | ||
114 | break; | ||
115 | case X86_VENDOR_AMD: | ||
116 | msr_addr = MSR_K7_HWCR; | ||
117 | msr_mask = MSR_K7_HWCR_CPB_DIS; | ||
118 | break; | ||
119 | default: | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | rdmsr_on_cpus(cpumask, msr_addr, msrs); | ||
124 | |||
125 | for_each_cpu(cpu, cpumask) { | ||
126 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
127 | if (enable) | ||
128 | reg->q &= ~msr_mask; | ||
129 | else | ||
130 | reg->q |= msr_mask; | ||
131 | } | ||
132 | |||
133 | wrmsr_on_cpus(cpumask, msr_addr, msrs); | ||
134 | } | ||
135 | |||
136 | static ssize_t store_global_boost(struct kobject *kobj, struct attribute *attr, | ||
137 | const char *buf, size_t count) | ||
138 | { | ||
139 | int ret; | ||
140 | unsigned long val = 0; | ||
141 | |||
142 | if (!boost_supported) | ||
143 | return -EINVAL; | ||
144 | |||
145 | ret = kstrtoul(buf, 10, &val); | ||
146 | if (ret || (val > 1)) | ||
147 | return -EINVAL; | ||
148 | |||
149 | if ((val && boost_enabled) || (!val && !boost_enabled)) | ||
150 | return count; | ||
151 | |||
152 | get_online_cpus(); | ||
153 | |||
154 | boost_set_msrs(val, cpu_online_mask); | ||
155 | |||
156 | put_online_cpus(); | ||
157 | |||
158 | boost_enabled = val; | ||
159 | pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); | ||
160 | |||
161 | return count; | ||
162 | } | ||
163 | |||
164 | static ssize_t show_global_boost(struct kobject *kobj, | ||
165 | struct attribute *attr, char *buf) | ||
166 | { | ||
167 | return sprintf(buf, "%u\n", boost_enabled); | ||
168 | } | ||
169 | |||
170 | static struct global_attr global_boost = __ATTR(boost, 0644, | ||
171 | show_global_boost, | ||
172 | store_global_boost); | ||
81 | 173 | ||
82 | static int check_est_cpu(unsigned int cpuid) | 174 | static int check_est_cpu(unsigned int cpuid) |
83 | { | 175 | { |
@@ -448,6 +540,44 @@ static void free_acpi_perf_data(void) | |||
448 | free_percpu(acpi_perf_data); | 540 | free_percpu(acpi_perf_data); |
449 | } | 541 | } |
450 | 542 | ||
543 | static int boost_notify(struct notifier_block *nb, unsigned long action, | ||
544 | void *hcpu) | ||
545 | { | ||
546 | unsigned cpu = (long)hcpu; | ||
547 | const struct cpumask *cpumask; | ||
548 | |||
549 | cpumask = get_cpu_mask(cpu); | ||
550 | |||
551 | /* | ||
552 | * Clear the boost-disable bit on the CPU_DOWN path so that | ||
553 | * this cpu cannot block the remaining ones from boosting. On | ||
554 | * the CPU_UP path we simply keep the boost-disable flag in | ||
555 | * sync with the current global state. | ||
556 | */ | ||
557 | |||
558 | switch (action) { | ||
559 | case CPU_UP_PREPARE: | ||
560 | case CPU_UP_PREPARE_FROZEN: | ||
561 | boost_set_msrs(boost_enabled, cpumask); | ||
562 | break; | ||
563 | |||
564 | case CPU_DOWN_PREPARE: | ||
565 | case CPU_DOWN_PREPARE_FROZEN: | ||
566 | boost_set_msrs(1, cpumask); | ||
567 | break; | ||
568 | |||
569 | default: | ||
570 | break; | ||
571 | } | ||
572 | |||
573 | return NOTIFY_OK; | ||
574 | } | ||
575 | |||
576 | |||
577 | static struct notifier_block boost_nb = { | ||
578 | .notifier_call = boost_notify, | ||
579 | }; | ||
580 | |||
451 | /* | 581 | /* |
452 | * acpi_cpufreq_early_init - initialize ACPI P-States library | 582 | * acpi_cpufreq_early_init - initialize ACPI P-States library |
453 | * | 583 | * |
@@ -774,6 +904,49 @@ static struct cpufreq_driver acpi_cpufreq_driver = { | |||
774 | .attr = acpi_cpufreq_attr, | 904 | .attr = acpi_cpufreq_attr, |
775 | }; | 905 | }; |
776 | 906 | ||
907 | static void __init acpi_cpufreq_boost_init(void) | ||
908 | { | ||
909 | if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) { | ||
910 | msrs = msrs_alloc(); | ||
911 | |||
912 | if (!msrs) | ||
913 | return; | ||
914 | |||
915 | boost_supported = true; | ||
916 | boost_enabled = boost_state(0); | ||
917 | |||
918 | get_online_cpus(); | ||
919 | |||
920 | /* Force all MSRs to the same value */ | ||
921 | boost_set_msrs(boost_enabled, cpu_online_mask); | ||
922 | |||
923 | register_cpu_notifier(&boost_nb); | ||
924 | |||
925 | put_online_cpus(); | ||
926 | } else | ||
927 | global_boost.attr.mode = 0444; | ||
928 | |||
929 | /* We create the boost file in any case, though for systems without | ||
930 | * hardware support it will be read-only and hardwired to return 0. | ||
931 | */ | ||
932 | if (sysfs_create_file(cpufreq_global_kobject, &(global_boost.attr))) | ||
933 | pr_warn(PFX "could not register global boost sysfs file\n"); | ||
934 | else | ||
935 | pr_debug("registered global boost sysfs file\n"); | ||
936 | } | ||
937 | |||
938 | static void __exit acpi_cpufreq_boost_exit(void) | ||
939 | { | ||
940 | sysfs_remove_file(cpufreq_global_kobject, &(global_boost.attr)); | ||
941 | |||
942 | if (msrs) { | ||
943 | unregister_cpu_notifier(&boost_nb); | ||
944 | |||
945 | msrs_free(msrs); | ||
946 | msrs = NULL; | ||
947 | } | ||
948 | } | ||
949 | |||
777 | static int __init acpi_cpufreq_init(void) | 950 | static int __init acpi_cpufreq_init(void) |
778 | { | 951 | { |
779 | int ret; | 952 | int ret; |
@@ -790,6 +963,8 @@ static int __init acpi_cpufreq_init(void) | |||
790 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); | 963 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); |
791 | if (ret) | 964 | if (ret) |
792 | free_acpi_perf_data(); | 965 | free_acpi_perf_data(); |
966 | else | ||
967 | acpi_cpufreq_boost_init(); | ||
793 | 968 | ||
794 | return ret; | 969 | return ret; |
795 | } | 970 | } |
@@ -798,6 +973,8 @@ static void __exit acpi_cpufreq_exit(void) | |||
798 | { | 973 | { |
799 | pr_debug("acpi_cpufreq_exit\n"); | 974 | pr_debug("acpi_cpufreq_exit\n"); |
800 | 975 | ||
976 | acpi_cpufreq_boost_exit(); | ||
977 | |||
801 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | 978 | cpufreq_unregister_driver(&acpi_cpufreq_driver); |
802 | 979 | ||
803 | free_acpi_perf_data(); | 980 | free_acpi_perf_data(); |