aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDirk Brandewie <dirk.j.brandewie@intel.com>2014-11-06 12:40:47 -0500
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-11-11 18:04:38 -0500
commit2f86dc4cddcb21290ca099e1dce2a53533c86e0b (patch)
tree4f713a464f066752b8f190fafe1d23bc37118d59
parent77873887729aaddec5cd27203a6ce8c4987733e4 (diff)
intel_pstate: Add support for HWP
Add support of Hardware Managed Performance States (HWP) described in Volume 3 section 14.4 of the SDM. With HWP enbaled intel_pstate will no longer be responsible for selecting P states for the processor. intel_pstate will continue to register to the cpufreq core as the scaling driver for CPUs implementing HWP. In HWP mode intel_pstate provides three functions reporting frequency to the cpufreq core, support for the set_policy() interface from the core and maintaining the intel_pstate sysfs interface in /sys/devices/system/cpu/intel_pstate. User preferences expressed via the set_policy() interface or the sysfs interface are forwared to the CPU via the HWP MSR interface. Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--Documentation/cpu-freq/intel-pstate.txt37
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h41
-rw-r--r--drivers/cpufreq/intel_pstate.c100
4 files changed, 167 insertions, 14 deletions
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
index a69ffe1d54d5..765d7fc0e692 100644
--- a/Documentation/cpu-freq/intel-pstate.txt
+++ b/Documentation/cpu-freq/intel-pstate.txt
@@ -1,17 +1,28 @@
1Intel P-state driver 1Intel P-state driver
2-------------------- 2--------------------
3 3
4This driver implements a scaling driver with an internal governor for 4This driver provides an interface to control the P state selection for
5Intel Core processors. The driver follows the same model as the 5SandyBridge+ Intel processors. The driver can operate two different
6Transmeta scaling driver (longrun.c) and implements the setpolicy() 6modes based on the processor model legacy and Hardware P state (HWP)
7instead of target(). Scaling drivers that implement setpolicy() are 7mode.
8assumed to implement internal governors by the cpufreq core. All the 8
9logic for selecting the current P state is contained within the 9In legacy mode the driver implements a scaling driver with an internal
10driver; no external governor is used by the cpufreq core. 10governor for Intel Core processors. The driver follows the same model
11 11as the Transmeta scaling driver (longrun.c) and implements the
12Intel SandyBridge+ processors are supported. 12setpolicy() instead of target(). Scaling drivers that implement
13 13setpolicy() are assumed to implement internal governors by the cpufreq
14New sysfs files for controlling P state selection have been added to 14core. All the logic for selecting the current P state is contained
15within the driver; no external governor is used by the cpufreq core.
16
17In HWP mode P state selection is implemented in the processor
18itself. The driver provides the interfaces between the cpufreq core and
19the processor to control P state selection based on user preferences
20and reporting frequency to the cpufreq core. In this mode the
21internal governor code is disabled.
22
23In addtion to the interfaces provided by the cpufreq core for
24controlling frequency the driver provides sysfs files for
25controlling P state selection. These files have been added to
15/sys/devices/system/cpu/intel_pstate/ 26/sys/devices/system/cpu/intel_pstate/
16 27
17 max_perf_pct: limits the maximum P state that will be requested by 28 max_perf_pct: limits the maximum P state that will be requested by
@@ -33,7 +44,9 @@ frequency is fiction for Intel Core processors. Even if the scaling
33driver selects a single P state the actual frequency the processor 44driver selects a single P state the actual frequency the processor
34will run at is selected by the processor itself. 45will run at is selected by the processor itself.
35 46
36New debugfs files have also been added to /sys/kernel/debug/pstate_snb/ 47For legacy mode debugfs files have also been added to allow tuning of
48the internal governor algorythm. These files are located at
49/sys/kernel/debug/pstate_snb/ These files are NOT present in HWP mode.
37 50
38 deadband 51 deadband
39 d_gain_pct 52 d_gain_pct
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4c81a860cc2b..907a0f119bee 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1446,6 +1446,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1446 disable 1446 disable
1447 Do not enable intel_pstate as the default 1447 Do not enable intel_pstate as the default
1448 scaling driver for the supported processors 1448 scaling driver for the supported processors
1449 no_hwp
1450 Do not enable hardware P state control (HWP)
1451 if available.
1449 1452
1450 intremap= [X86-64, Intel-IOMMU] 1453 intremap= [X86-64, Intel-IOMMU]
1451 on enable Interrupt Remapping (default) 1454 on enable Interrupt Remapping (default)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index e21331ce368f..62838e54947d 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -152,6 +152,45 @@
152#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 152#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
153#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 153#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
154 154
155/* Hardware P state interface */
156#define MSR_PPERF 0x0000064e
157#define MSR_PERF_LIMIT_REASONS 0x0000064f
158#define MSR_PM_ENABLE 0x00000770
159#define MSR_HWP_CAPABILITIES 0x00000771
160#define MSR_HWP_REQUEST_PKG 0x00000772
161#define MSR_HWP_INTERRUPT 0x00000773
162#define MSR_HWP_REQUEST 0x00000774
163#define MSR_HWP_STATUS 0x00000777
164
165/* CPUID.6.EAX */
166#define HWP_BASE_BIT (1<<7)
167#define HWP_NOTIFICATIONS_BIT (1<<8)
168#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
169#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
170#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
171
172/* IA32_HWP_CAPABILITIES */
173#define HWP_HIGHEST_PERF(x) (x & 0xff)
174#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8)
175#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16)
176#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24)
177
178/* IA32_HWP_REQUEST */
179#define HWP_MIN_PERF(x) (x & 0xff)
180#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
181#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
182#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24)
183#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32)
184#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42)
185
186/* IA32_HWP_STATUS */
187#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
188#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
189
190/* IA32_HWP_INTERRUPT */
191#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
192#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
193
155#define MSR_AMD64_MC0_MASK 0xc0010044 194#define MSR_AMD64_MC0_MASK 0xc0010044
156 195
157#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 196#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
@@ -345,6 +384,8 @@
345 384
346#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 385#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
347 386
387#define MSR_MISC_PWR_MGMT 0x000001aa
388
348#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 389#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
349#define ENERGY_PERF_BIAS_PERFORMANCE 0 390#define ENERGY_PERF_BIAS_PERFORMANCE 0
350#define ENERGY_PERF_BIAS_NORMAL 6 391#define ENERGY_PERF_BIAS_NORMAL 6
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 27bb6d3877ed..ba35db092239 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -137,6 +137,7 @@ struct cpu_defaults {
137 137
138static struct pstate_adjust_policy pid_params; 138static struct pstate_adjust_policy pid_params;
139static struct pstate_funcs pstate_funcs; 139static struct pstate_funcs pstate_funcs;
140static int hwp_active;
140 141
141struct perf_limits { 142struct perf_limits {
142 int no_turbo; 143 int no_turbo;
@@ -244,6 +245,34 @@ static inline void update_turbo_state(void)
244 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 245 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
245} 246}
246 247
248#define PCT_TO_HWP(x) (x * 255 / 100)
249static void intel_pstate_hwp_set(void)
250{
251 int min, max, cpu;
252 u64 value, freq;
253
254 get_online_cpus();
255
256 for_each_online_cpu(cpu) {
257 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
258 min = PCT_TO_HWP(limits.min_perf_pct);
259 value &= ~HWP_MIN_PERF(~0L);
260 value |= HWP_MIN_PERF(min);
261
262 max = PCT_TO_HWP(limits.max_perf_pct);
263 if (limits.no_turbo) {
264 rdmsrl( MSR_HWP_CAPABILITIES, freq);
265 max = HWP_GUARANTEED_PERF(freq);
266 }
267
268 value &= ~HWP_MAX_PERF(~0L);
269 value |= HWP_MAX_PERF(max);
270 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
271 }
272
273 put_online_cpus();
274}
275
247/************************** debugfs begin ************************/ 276/************************** debugfs begin ************************/
248static int pid_param_set(void *data, u64 val) 277static int pid_param_set(void *data, u64 val)
249{ 278{
@@ -279,6 +308,8 @@ static void __init intel_pstate_debug_expose_params(void)
279 struct dentry *debugfs_parent; 308 struct dentry *debugfs_parent;
280 int i = 0; 309 int i = 0;
281 310
311 if (hwp_active)
312 return;
282 debugfs_parent = debugfs_create_dir("pstate_snb", NULL); 313 debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
283 if (IS_ERR_OR_NULL(debugfs_parent)) 314 if (IS_ERR_OR_NULL(debugfs_parent))
284 return; 315 return;
@@ -329,8 +360,12 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
329 pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); 360 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
330 return -EPERM; 361 return -EPERM;
331 } 362 }
363
332 limits.no_turbo = clamp_t(int, input, 0, 1); 364 limits.no_turbo = clamp_t(int, input, 0, 1);
333 365
366 if (hwp_active)
367 intel_pstate_hwp_set();
368
334 return count; 369 return count;
335} 370}
336 371
@@ -348,6 +383,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
348 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 383 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
349 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 384 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
350 385
386 if (hwp_active)
387 intel_pstate_hwp_set();
351 return count; 388 return count;
352} 389}
353 390
@@ -363,6 +400,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
363 limits.min_perf_pct = clamp_t(int, input, 0 , 100); 400 limits.min_perf_pct = clamp_t(int, input, 0 , 100);
364 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 401 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
365 402
403 if (hwp_active)
404 intel_pstate_hwp_set();
366 return count; 405 return count;
367} 406}
368 407
@@ -395,8 +434,16 @@ static void __init intel_pstate_sysfs_expose_params(void)
395 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 434 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
396 BUG_ON(rc); 435 BUG_ON(rc);
397} 436}
398
399/************************** sysfs end ************************/ 437/************************** sysfs end ************************/
438
439static void intel_pstate_hwp_enable(void)
440{
441 hwp_active++;
442 pr_info("intel_pstate HWP enabled\n");
443
444 wrmsrl( MSR_PM_ENABLE, 0x1);
445}
446
400static int byt_get_min_pstate(void) 447static int byt_get_min_pstate(void)
401{ 448{
402 u64 value; 449 u64 value;
@@ -648,6 +695,14 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
648 cpu->prev_mperf = mperf; 695 cpu->prev_mperf = mperf;
649} 696}
650 697
698static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
699{
700 int delay;
701
702 delay = msecs_to_jiffies(50);
703 mod_timer_pinned(&cpu->timer, jiffies + delay);
704}
705
651static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 706static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
652{ 707{
653 int delay; 708 int delay;
@@ -694,6 +749,14 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
694 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl); 749 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
695} 750}
696 751
752static void intel_hwp_timer_func(unsigned long __data)
753{
754 struct cpudata *cpu = (struct cpudata *) __data;
755
756 intel_pstate_sample(cpu);
757 intel_hwp_set_sample_time(cpu);
758}
759
697static void intel_pstate_timer_func(unsigned long __data) 760static void intel_pstate_timer_func(unsigned long __data)
698{ 761{
699 struct cpudata *cpu = (struct cpudata *) __data; 762 struct cpudata *cpu = (struct cpudata *) __data;
@@ -737,6 +800,11 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
737}; 800};
738MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); 801MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
739 802
803static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
804 ICPU(0x56, core_params),
805 {}
806};
807
740static int intel_pstate_init_cpu(unsigned int cpunum) 808static int intel_pstate_init_cpu(unsigned int cpunum)
741{ 809{
742 struct cpudata *cpu; 810 struct cpudata *cpu;
@@ -753,9 +821,14 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
753 intel_pstate_get_cpu_pstates(cpu); 821 intel_pstate_get_cpu_pstates(cpu);
754 822
755 init_timer_deferrable(&cpu->timer); 823 init_timer_deferrable(&cpu->timer);
756 cpu->timer.function = intel_pstate_timer_func;
757 cpu->timer.data = (unsigned long)cpu; 824 cpu->timer.data = (unsigned long)cpu;
758 cpu->timer.expires = jiffies + HZ/100; 825 cpu->timer.expires = jiffies + HZ/100;
826
827 if (!hwp_active)
828 cpu->timer.function = intel_pstate_timer_func;
829 else
830 cpu->timer.function = intel_hwp_timer_func;
831
759 intel_pstate_busy_pid_reset(cpu); 832 intel_pstate_busy_pid_reset(cpu);
760 intel_pstate_sample(cpu); 833 intel_pstate_sample(cpu);
761 834
@@ -792,6 +865,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
792 limits.no_turbo = 0; 865 limits.no_turbo = 0;
793 return 0; 866 return 0;
794 } 867 }
868
795 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 869 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
796 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); 870 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
797 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); 871 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
@@ -801,6 +875,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
801 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); 875 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
802 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); 876 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
803 877
878 if (hwp_active)
879 intel_pstate_hwp_set();
880
804 return 0; 881 return 0;
805} 882}
806 883
@@ -823,6 +900,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
823 pr_info("intel_pstate CPU %d exiting\n", cpu_num); 900 pr_info("intel_pstate CPU %d exiting\n", cpu_num);
824 901
825 del_timer_sync(&all_cpu_data[cpu_num]->timer); 902 del_timer_sync(&all_cpu_data[cpu_num]->timer);
903 if (hwp_active)
904 return;
905
826 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 906 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
827} 907}
828 908
@@ -866,6 +946,7 @@ static struct cpufreq_driver intel_pstate_driver = {
866}; 946};
867 947
868static int __initdata no_load; 948static int __initdata no_load;
949static int __initdata no_hwp;
869 950
870static int intel_pstate_msrs_not_valid(void) 951static int intel_pstate_msrs_not_valid(void)
871{ 952{
@@ -959,6 +1040,15 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void)
959{ 1040{
960 struct acpi_table_header hdr; 1041 struct acpi_table_header hdr;
961 struct hw_vendor_info *v_info; 1042 struct hw_vendor_info *v_info;
1043 const struct x86_cpu_id *id;
1044 u64 misc_pwr;
1045
1046 id = x86_match_cpu(intel_pstate_cpu_oob_ids);
1047 if (id) {
1048 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
1049 if ( misc_pwr & (1 << 8))
1050 return true;
1051 }
962 1052
963 if (acpi_disabled || 1053 if (acpi_disabled ||
964 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) 1054 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
@@ -982,6 +1072,7 @@ static int __init intel_pstate_init(void)
982 int cpu, rc = 0; 1072 int cpu, rc = 0;
983 const struct x86_cpu_id *id; 1073 const struct x86_cpu_id *id;
984 struct cpu_defaults *cpu_info; 1074 struct cpu_defaults *cpu_info;
1075 struct cpuinfo_x86 *c = &boot_cpu_data;
985 1076
986 if (no_load) 1077 if (no_load)
987 return -ENODEV; 1078 return -ENODEV;
@@ -1011,6 +1102,9 @@ static int __init intel_pstate_init(void)
1011 if (!all_cpu_data) 1102 if (!all_cpu_data)
1012 return -ENOMEM; 1103 return -ENOMEM;
1013 1104
1105 if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp)
1106 intel_pstate_hwp_enable();
1107
1014 rc = cpufreq_register_driver(&intel_pstate_driver); 1108 rc = cpufreq_register_driver(&intel_pstate_driver);
1015 if (rc) 1109 if (rc)
1016 goto out; 1110 goto out;
@@ -1041,6 +1135,8 @@ static int __init intel_pstate_setup(char *str)
1041 1135
1042 if (!strcmp(str, "disable")) 1136 if (!strcmp(str, "disable"))
1043 no_load = 1; 1137 no_load = 1;
1138 if (!strcmp(str, "no_hwp"))
1139 no_hwp = 1;
1044 return 0; 1140 return 0;
1045} 1141}
1046early_param("intel_pstate", intel_pstate_setup); 1142early_param("intel_pstate", intel_pstate_setup);