summaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorSrinivas Pandruvada <srinivas.pandruvada@linux.intel.com>2016-10-25 16:20:40 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-11-01 01:04:06 -0400
commiteae48f046ffa117afb782cd9b3ae5469df0042e2 (patch)
tree0049f483ae63479e8ba6c757b68c73130674c05e /drivers/cpufreq
parentae8b8d8f86a03c19c5ecfd848609b2e9438f1cf2 (diff)
cpufreq: intel_pstate: Per CPU P-State limits
Intel P-State offers two interface to set performance limits: - Intel P-State sysfs /sys/devices/system/cpu/intel_pstate/max_perf_pct /sys/devices/system/cpu/intel_pstate/min_perf_pct - cpufreq /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq In the current implementation both of the above methods, change limits to every CPU in the system. Moreover the limits placed using cpufreq policy interface also presented in the Intel P-State sysfs via modified max_perf_pct and min_per_pct during sysfs reads. This allows to check percent of reduced/increased performance, irrespective of method used to limit. There are some new generations of processors, where it is possible to have limits placed on individual CPU cores. Using cpufreq interface it is possible to set limits on each CPU. But the current processing will use last limits placed on all CPUs. So the per core limit feature of CPUs can't be used. This change brings in capability to set P-States limits for each CPU, with some limitations. In this case what should be the read of max_perf_pct and min_perf_pct? It can be most restrictive limits placed on any CPU or max possible performance on any given CPU on which no limits are placed. In either case someone will have issue. So the consensus is, we can't have both sysfs controls present when user wants to use limit per core limits. - By default per-core-control feature is not enabled. So no one will notice any difference. - The way to enable is by kernel command line intel_pstate=per_cpu_perf_limits - When the per-core-controls are enabled there is no display of for both read and write on /sys/devices/system/cpu/intel_pstate/max_perf_pct /sys/devices/system/cpu/intel_pstate/min_perf_pct - User can change limits using /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor - User can still observe turbo percent and number of P-States from /sys/devices/system/cpu/intel_pstate/turbo_pct /sys/devices/system/cpu/intel_pstate/num_pstates - User can read write system wide turbo status /sys/devices/system/cpu/no_turbo While changing this BUG_ON is changed to WARN_ON, as they are not fatal errors for the system. Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/intel_pstate.c236
1 files changed, 156 insertions, 80 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d7a9195a8351..b6e9b49bf151 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -177,6 +177,48 @@ struct _pid {
177}; 177};
178 178
179/** 179/**
180 * struct perf_limits - Store user and policy limits
181 * @no_turbo: User requested turbo state from intel_pstate sysfs
182 * @turbo_disabled: Platform turbo status either from msr
183 * MSR_IA32_MISC_ENABLE or when maximum available pstate
184 * matches the maximum turbo pstate
185 * @max_perf_pct: Effective maximum performance limit in percentage, this
186 * is minimum of either limits enforced by cpufreq policy
187 * or limits from user set limits via intel_pstate sysfs
188 * @min_perf_pct: Effective minimum performance limit in percentage, this
189 * is maximum of either limits enforced by cpufreq policy
190 * or limits from user set limits via intel_pstate sysfs
191 * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
192 * This value is used to limit max pstate
193 * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
194 * This value is used to limit min pstate
195 * @max_policy_pct: The maximum performance in percentage enforced by
196 * cpufreq setpolicy interface
197 * @max_sysfs_pct: The maximum performance in percentage enforced by
198 * intel pstate sysfs interface, unused when per cpu
199 * controls are enforced
200 * @min_policy_pct: The minimum performance in percentage enforced by
201 * cpufreq setpolicy interface
202 * @min_sysfs_pct: The minimum performance in percentage enforced by
203 * intel pstate sysfs interface, unused when per cpu
204 * controls are enforced
205 *
206 * Storage for user and policy defined limits.
207 */
208struct perf_limits {
209 int no_turbo;
210 int turbo_disabled;
211 int max_perf_pct;
212 int min_perf_pct;
213 int32_t max_perf;
214 int32_t min_perf;
215 int max_policy_pct;
216 int max_sysfs_pct;
217 int min_policy_pct;
218 int min_sysfs_pct;
219};
220
221/**
180 * struct cpudata - Per CPU instance data storage 222 * struct cpudata - Per CPU instance data storage
181 * @cpu: CPU number for this instance data 223 * @cpu: CPU number for this instance data
182 * @policy: CPUFreq policy value 224 * @policy: CPUFreq policy value
@@ -194,6 +236,9 @@ struct _pid {
194 * @prev_cummulative_iowait: IO Wait time difference from last and 236 * @prev_cummulative_iowait: IO Wait time difference from last and
195 * current sample 237 * current sample
196 * @sample: Storage for storing last Sample data 238 * @sample: Storage for storing last Sample data
239 * @perf_limits: Pointer to perf_limit unique to this CPU
240 * Not all field in the structure are applicable
241 * when per cpu controls are enforced
197 * @acpi_perf_data: Stores ACPI perf information read from _PSS 242 * @acpi_perf_data: Stores ACPI perf information read from _PSS
198 * @valid_pss_table: Set to true for valid ACPI _PSS entries found 243 * @valid_pss_table: Set to true for valid ACPI _PSS entries found
199 * 244 *
@@ -217,6 +262,7 @@ struct cpudata {
217 u64 prev_tsc; 262 u64 prev_tsc;
218 u64 prev_cummulative_iowait; 263 u64 prev_cummulative_iowait;
219 struct sample sample; 264 struct sample sample;
265 struct perf_limits *perf_limits;
220#ifdef CONFIG_ACPI 266#ifdef CONFIG_ACPI
221 struct acpi_processor_performance acpi_perf_data; 267 struct acpi_processor_performance acpi_perf_data;
222 bool valid_pss_table; 268 bool valid_pss_table;
@@ -289,51 +335,12 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
289static struct pstate_adjust_policy pid_params __read_mostly; 335static struct pstate_adjust_policy pid_params __read_mostly;
290static struct pstate_funcs pstate_funcs __read_mostly; 336static struct pstate_funcs pstate_funcs __read_mostly;
291static int hwp_active __read_mostly; 337static int hwp_active __read_mostly;
338static bool per_cpu_limits __read_mostly;
292 339
293#ifdef CONFIG_ACPI 340#ifdef CONFIG_ACPI
294static bool acpi_ppc; 341static bool acpi_ppc;
295#endif 342#endif
296 343
297/**
298 * struct perf_limits - Store user and policy limits
299 * @no_turbo: User requested turbo state from intel_pstate sysfs
300 * @turbo_disabled: Platform turbo status either from msr
301 * MSR_IA32_MISC_ENABLE or when maximum available pstate
302 * matches the maximum turbo pstate
303 * @max_perf_pct: Effective maximum performance limit in percentage, this
304 * is minimum of either limits enforced by cpufreq policy
305 * or limits from user set limits via intel_pstate sysfs
306 * @min_perf_pct: Effective minimum performance limit in percentage, this
307 * is maximum of either limits enforced by cpufreq policy
308 * or limits from user set limits via intel_pstate sysfs
309 * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
310 * This value is used to limit max pstate
311 * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
312 * This value is used to limit min pstate
313 * @max_policy_pct: The maximum performance in percentage enforced by
314 * cpufreq setpolicy interface
315 * @max_sysfs_pct: The maximum performance in percentage enforced by
316 * intel pstate sysfs interface
317 * @min_policy_pct: The minimum performance in percentage enforced by
318 * cpufreq setpolicy interface
319 * @min_sysfs_pct: The minimum performance in percentage enforced by
320 * intel pstate sysfs interface
321 *
322 * Storage for user and policy defined limits.
323 */
324struct perf_limits {
325 int no_turbo;
326 int turbo_disabled;
327 int max_perf_pct;
328 int min_perf_pct;
329 int32_t max_perf;
330 int32_t min_perf;
331 int max_policy_pct;
332 int max_sysfs_pct;
333 int min_policy_pct;
334 int min_sysfs_pct;
335};
336
337static struct perf_limits performance_limits = { 344static struct perf_limits performance_limits = {
338 .no_turbo = 0, 345 .no_turbo = 0,
339 .turbo_disabled = 0, 346 .turbo_disabled = 0,
@@ -560,21 +567,30 @@ static inline void update_turbo_state(void)
560static void intel_pstate_hwp_set(const struct cpumask *cpumask) 567static void intel_pstate_hwp_set(const struct cpumask *cpumask)
561{ 568{
562 int min, hw_min, max, hw_max, cpu, range, adj_range; 569 int min, hw_min, max, hw_max, cpu, range, adj_range;
570 struct perf_limits *perf_limits = limits;
563 u64 value, cap; 571 u64 value, cap;
564 572
565 for_each_cpu(cpu, cpumask) { 573 for_each_cpu(cpu, cpumask) {
574 int max_perf_pct, min_perf_pct;
575
576 if (per_cpu_limits)
577 perf_limits = all_cpu_data[cpu]->perf_limits;
578
566 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); 579 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
567 hw_min = HWP_LOWEST_PERF(cap); 580 hw_min = HWP_LOWEST_PERF(cap);
568 hw_max = HWP_HIGHEST_PERF(cap); 581 hw_max = HWP_HIGHEST_PERF(cap);
569 range = hw_max - hw_min; 582 range = hw_max - hw_min;
570 583
584 max_perf_pct = perf_limits->max_perf_pct;
585 min_perf_pct = perf_limits->min_perf_pct;
586
571 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 587 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
572 adj_range = limits->min_perf_pct * range / 100; 588 adj_range = min_perf_pct * range / 100;
573 min = hw_min + adj_range; 589 min = hw_min + adj_range;
574 value &= ~HWP_MIN_PERF(~0L); 590 value &= ~HWP_MIN_PERF(~0L);
575 value |= HWP_MIN_PERF(min); 591 value |= HWP_MIN_PERF(min);
576 592
577 adj_range = limits->max_perf_pct * range / 100; 593 adj_range = max_perf_pct * range / 100;
578 max = hw_min + adj_range; 594 max = hw_min + adj_range;
579 if (limits->no_turbo) { 595 if (limits->no_turbo) {
580 hw_max = HWP_GUARANTEED_PERF(cap); 596 hw_max = HWP_GUARANTEED_PERF(cap);
@@ -787,8 +803,6 @@ define_one_global_ro(num_pstates);
787 803
788static struct attribute *intel_pstate_attributes[] = { 804static struct attribute *intel_pstate_attributes[] = {
789 &no_turbo.attr, 805 &no_turbo.attr,
790 &max_perf_pct.attr,
791 &min_perf_pct.attr,
792 &turbo_pct.attr, 806 &turbo_pct.attr,
793 &num_pstates.attr, 807 &num_pstates.attr,
794 NULL 808 NULL
@@ -805,9 +819,26 @@ static void __init intel_pstate_sysfs_expose_params(void)
805 819
806 intel_pstate_kobject = kobject_create_and_add("intel_pstate", 820 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
807 &cpu_subsys.dev_root->kobj); 821 &cpu_subsys.dev_root->kobj);
808 BUG_ON(!intel_pstate_kobject); 822 if (WARN_ON(!intel_pstate_kobject))
823 return;
824
809 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); 825 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
810 BUG_ON(rc); 826 if (WARN_ON(rc))
827 return;
828
829 /*
830 * If per cpu limits are enforced there are no global limits, so
831 * return without creating max/min_perf_pct attributes
832 */
833 if (per_cpu_limits)
834 return;
835
836 rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
837 WARN_ON(rc);
838
839 rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
840 WARN_ON(rc);
841
811} 842}
812/************************** sysfs end ************************/ 843/************************** sysfs end ************************/
813 844
@@ -1124,20 +1155,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
1124 int max_perf = cpu->pstate.turbo_pstate; 1155 int max_perf = cpu->pstate.turbo_pstate;
1125 int max_perf_adj; 1156 int max_perf_adj;
1126 int min_perf; 1157 int min_perf;
1158 struct perf_limits *perf_limits = limits;
1127 1159
1128 if (limits->no_turbo || limits->turbo_disabled) 1160 if (limits->no_turbo || limits->turbo_disabled)
1129 max_perf = cpu->pstate.max_pstate; 1161 max_perf = cpu->pstate.max_pstate;
1130 1162
1163 if (per_cpu_limits)
1164 perf_limits = cpu->perf_limits;
1165
1131 /* 1166 /*
1132 * performance can be limited by user through sysfs, by cpufreq 1167 * performance can be limited by user through sysfs, by cpufreq
1133 * policy, or by cpu specific default values determined through 1168 * policy, or by cpu specific default values determined through
1134 * experimentation. 1169 * experimentation.
1135 */ 1170 */
1136 max_perf_adj = fp_toint(max_perf * limits->max_perf); 1171 max_perf_adj = fp_toint(max_perf * perf_limits->max_perf);
1137 *max = clamp_t(int, max_perf_adj, 1172 *max = clamp_t(int, max_perf_adj,
1138 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 1173 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
1139 1174
1140 min_perf = fp_toint(max_perf * limits->min_perf); 1175 min_perf = fp_toint(max_perf * perf_limits->min_perf);
1141 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 1176 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
1142} 1177}
1143 1178
@@ -1421,11 +1456,23 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
1421{ 1456{
1422 struct cpudata *cpu; 1457 struct cpudata *cpu;
1423 1458
1424 if (!all_cpu_data[cpunum]) 1459 cpu = all_cpu_data[cpunum];
1425 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), 1460
1426 GFP_KERNEL); 1461 if (!cpu) {
1427 if (!all_cpu_data[cpunum]) 1462 unsigned int size = sizeof(struct cpudata);
1428 return -ENOMEM; 1463
1464 if (per_cpu_limits)
1465 size += sizeof(struct perf_limits);
1466
1467 cpu = kzalloc(size, GFP_KERNEL);
1468 if (!cpu)
1469 return -ENOMEM;
1470
1471 all_cpu_data[cpunum] = cpu;
1472 if (per_cpu_limits)
1473 cpu->perf_limits = (struct perf_limits *)(cpu + 1);
1474
1475 }
1429 1476
1430 cpu = all_cpu_data[cpunum]; 1477 cpu = all_cpu_data[cpunum];
1431 1478
@@ -1493,9 +1540,40 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits)
1493 limits->min_sysfs_pct = 0; 1540 limits->min_sysfs_pct = 0;
1494} 1541}
1495 1542
1543static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
1544 struct perf_limits *limits)
1545{
1546 limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
1547 limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0, 100);
1548 limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
1549 policy->cpuinfo.max_freq);
1550 limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
1551
1552 /* Normalize user input to [min_policy_pct, max_policy_pct] */
1553 limits->min_perf_pct = max(limits->min_policy_pct,
1554 limits->min_sysfs_pct);
1555 limits->min_perf_pct = min(limits->max_policy_pct,
1556 limits->min_perf_pct);
1557 limits->max_perf_pct = min(limits->max_policy_pct,
1558 limits->max_sysfs_pct);
1559 limits->max_perf_pct = max(limits->min_policy_pct,
1560 limits->max_perf_pct);
1561
1562 /* Make sure min_perf_pct <= max_perf_pct */
1563 limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1564
1565 limits->min_perf = div_fp(limits->min_perf_pct, 100);
1566 limits->max_perf = div_fp(limits->max_perf_pct, 100);
1567 limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
1568
1569 pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
1570 limits->max_perf_pct, limits->min_perf_pct);
1571}
1572
1496static int intel_pstate_set_policy(struct cpufreq_policy *policy) 1573static int intel_pstate_set_policy(struct cpufreq_policy *policy)
1497{ 1574{
1498 struct cpudata *cpu; 1575 struct cpudata *cpu;
1576 struct perf_limits *perf_limits = NULL;
1499 1577
1500 if (!policy->cpuinfo.max_freq) 1578 if (!policy->cpuinfo.max_freq)
1501 return -ENODEV; 1579 return -ENODEV;
@@ -1513,41 +1591,29 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
1513 policy->max = policy->cpuinfo.max_freq; 1591 policy->max = policy->cpuinfo.max_freq;
1514 } 1592 }
1515 1593
1516 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { 1594 if (per_cpu_limits)
1517 limits = &performance_limits; 1595 perf_limits = cpu->perf_limits;
1596
1597 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
1598 if (!perf_limits) {
1599 limits = &performance_limits;
1600 perf_limits = limits;
1601 }
1518 if (policy->max >= policy->cpuinfo.max_freq) { 1602 if (policy->max >= policy->cpuinfo.max_freq) {
1519 pr_debug("set performance\n"); 1603 pr_debug("set performance\n");
1520 intel_pstate_set_performance_limits(limits); 1604 intel_pstate_set_performance_limits(perf_limits);
1521 goto out; 1605 goto out;
1522 } 1606 }
1523 } else { 1607 } else {
1524 pr_debug("set powersave\n"); 1608 pr_debug("set powersave\n");
1525 limits = &powersave_limits; 1609 if (!perf_limits) {
1526 } 1610 limits = &powersave_limits;
1527 1611 perf_limits = limits;
1528 limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 1612 }
1529 limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
1530 limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
1531 policy->cpuinfo.max_freq);
1532 limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
1533
1534 /* Normalize user input to [min_policy_pct, max_policy_pct] */
1535 limits->min_perf_pct = max(limits->min_policy_pct,
1536 limits->min_sysfs_pct);
1537 limits->min_perf_pct = min(limits->max_policy_pct,
1538 limits->min_perf_pct);
1539 limits->max_perf_pct = min(limits->max_policy_pct,
1540 limits->max_sysfs_pct);
1541 limits->max_perf_pct = max(limits->min_policy_pct,
1542 limits->max_perf_pct);
1543
1544 /* Make sure min_perf_pct <= max_perf_pct */
1545 limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1546 1613
1547 limits->min_perf = div_fp(limits->min_perf_pct, 100); 1614 }
1548 limits->max_perf = div_fp(limits->max_perf_pct, 100);
1549 limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
1550 1615
1616 intel_pstate_update_perf_limits(policy, perf_limits);
1551 out: 1617 out:
1552 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { 1618 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
1553 /* 1619 /*
@@ -1607,6 +1673,14 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1607 else 1673 else
1608 policy->policy = CPUFREQ_POLICY_POWERSAVE; 1674 policy->policy = CPUFREQ_POLICY_POWERSAVE;
1609 1675
1676 /*
1677 * We need sane value in the cpu->perf_limits, so inherit from global
1678 * perf_limits limits, which are seeded with values based on the
1679 * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
1680 */
1681 if (per_cpu_limits)
1682 memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
1683
1610 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 1684 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
1611 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 1685 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1612 1686
@@ -1888,6 +1962,8 @@ static int __init intel_pstate_setup(char *str)
1888 force_load = 1; 1962 force_load = 1;
1889 if (!strcmp(str, "hwp_only")) 1963 if (!strcmp(str, "hwp_only"))
1890 hwp_only = 1; 1964 hwp_only = 1;
1965 if (!strcmp(str, "per_cpu_perf_limits"))
1966 per_cpu_limits = true;
1891 1967
1892#ifdef CONFIG_ACPI 1968#ifdef CONFIG_ACPI
1893 if (!strcmp(str, "support_acpi_ppc")) 1969 if (!strcmp(str, "support_acpi_ppc"))