aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-03-22 18:58:57 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-03-23 22:04:31 -0400
commitc5a2ee7dde893e0a06044e75c16711f08d5c011d (patch)
tree363d163e5233a4ed226a2febeceb5c42cc883963
parent553953453b4b64fbccba31691257d006cee36613 (diff)
cpufreq: intel_pstate: Active mode P-state limits rework
The coordination of P-state limits used by intel_pstate in the active mode (ie. by default) is problematic, because it synchronizes all of the limits (ie. the global ones and the per-policy ones) so as to use one common pair of P-state limits (min and max) across all CPUs in the system. The drawbacks of that are as follows: - If P-states are coordinated in hardware, it is not necessary to coordinate them in software on top of that, so in that case all of the above activity is in vain. - If P-states are not coordinated in hardware, then the processor is actually capable of setting different P-states for different CPUs and coordinating them at the software level simply doesn't allow that capability to be utilized. - The coordination works in such a way that setting a per-policy limit (eg. scaling_max_freq) for one CPU causes the common effective limit to change (and it will affect all of the other CPUs too), but subsequent reads from the corresponding sysfs attributes for the other CPUs will return stale values (which is confusing). - Reads from the global P-state limit attributes, min_perf_pct and max_perf_pct, return the effective common values and not the last values set through these attributes. However, the last values set through these attributes become hard limits that cannot be exceeded by writes to scaling_min_freq and scaling_max_freq, respectively, and they are not exposed, so essentially users have to remember what they are. All of that is painful enough to warrant a change of the management of P-state limits in the active mode. To that end, redesign the active mode P-state limits management in intel_pstate in accordance with the following rules: (1) All CPUs are affected by the global limits (that is, none of them can be requested to run faster than the global max and none of them can be requested to run slower than the global min). (2) Each individual CPU is affected by its own per-policy limits (that is, it cannot be requested to run faster than its own per-policy max and it cannot be requested to run slower than its own per-policy min). (3) The global and per-policy limits can be set independently. Also, the global maximum and minimum P-state limits will be always expressed as percentages of the maximum supported turbo P-state. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--drivers/cpufreq/intel_pstate.c185
1 files changed, 85 insertions, 100 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2ef02fd568a6..c0afa78624a1 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -187,44 +187,35 @@ struct _pid {
187 187
188/** 188/**
189 * struct perf_limits - Store user and policy limits 189 * struct perf_limits - Store user and policy limits
190 * @no_turbo: User requested turbo state from intel_pstate sysfs
191 * @turbo_disabled: Platform turbo status either from msr
192 * MSR_IA32_MISC_ENABLE or when maximum available pstate
193 * matches the maximum turbo pstate
194 * @max_perf_pct: Effective maximum performance limit in percentage, this
195 * is minimum of either limits enforced by cpufreq policy
196 * or limits from user set limits via intel_pstate sysfs
197 * @min_perf_pct: Effective minimum performance limit in percentage, this
198 * is maximum of either limits enforced by cpufreq policy
199 * or limits from user set limits via intel_pstate sysfs
200 * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct 190 * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
201 * This value is used to limit max pstate 191 * This value is used to limit max pstate
202 * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct 192 * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
203 * This value is used to limit min pstate 193 * This value is used to limit min pstate
204 * @max_policy_pct: The maximum performance in percentage enforced by
205 * cpufreq setpolicy interface
206 * @max_sysfs_pct: The maximum performance in percentage enforced by
207 * intel pstate sysfs interface, unused when per cpu
208 * controls are enforced
209 * @min_policy_pct: The minimum performance in percentage enforced by
210 * cpufreq setpolicy interface
211 * @min_sysfs_pct: The minimum performance in percentage enforced by
212 * intel pstate sysfs interface, unused when per cpu
213 * controls are enforced
214 * 194 *
215 * Storage for user and policy defined limits. 195 * Storage for policy defined limits.
216 */ 196 */
217struct perf_limits { 197struct perf_limits {
218 int no_turbo;
219 int turbo_disabled;
220 int max_perf_pct;
221 int min_perf_pct;
222 int32_t max_perf; 198 int32_t max_perf;
223 int32_t min_perf; 199 int32_t min_perf;
224 int max_policy_pct; 200};
225 int max_sysfs_pct; 201
226 int min_policy_pct; 202/**
227 int min_sysfs_pct; 203 * struct global_params - Global parameters, mostly tunable via sysfs.
204 * @no_turbo: Whether or not to use turbo P-states.
205 * @turbo_disabled: Whethet or not turbo P-states are available at all,
206 * based on the MSR_IA32_MISC_ENABLE value and whether or
207 * not the maximum reported turbo P-state is different from
208 * the maximum reported non-turbo one.
209 * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo
210 * P-state capacity.
211 * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo
212 * P-state capacity.
213 */
214struct global_params {
215 bool no_turbo;
216 bool turbo_disabled;
217 int max_perf_pct;
218 int min_perf_pct;
228}; 219};
229 220
230/** 221/**
@@ -245,9 +236,7 @@ struct perf_limits {
245 * @prev_cummulative_iowait: IO Wait time difference from last and 236 * @prev_cummulative_iowait: IO Wait time difference from last and
246 * current sample 237 * current sample
247 * @sample: Storage for storing last Sample data 238 * @sample: Storage for storing last Sample data
248 * @perf_limits: Pointer to perf_limit unique to this CPU 239 * @perf_limits: Capacity limits unique to this CPU
249 * Not all field in the structure are applicable
250 * when per cpu controls are enforced
251 * @acpi_perf_data: Stores ACPI perf information read from _PSS 240 * @acpi_perf_data: Stores ACPI perf information read from _PSS
252 * @valid_pss_table: Set to true for valid ACPI _PSS entries found 241 * @valid_pss_table: Set to true for valid ACPI _PSS entries found
253 * @epp_powersave: Last saved HWP energy performance preference 242 * @epp_powersave: Last saved HWP energy performance preference
@@ -279,7 +268,7 @@ struct cpudata {
279 u64 prev_tsc; 268 u64 prev_tsc;
280 u64 prev_cummulative_iowait; 269 u64 prev_cummulative_iowait;
281 struct sample sample; 270 struct sample sample;
282 struct perf_limits *perf_limits; 271 struct perf_limits perf_limits;
283#ifdef CONFIG_ACPI 272#ifdef CONFIG_ACPI
284 struct acpi_processor_performance acpi_perf_data; 273 struct acpi_processor_performance acpi_perf_data;
285 bool valid_pss_table; 274 bool valid_pss_table;
@@ -364,16 +353,7 @@ static bool driver_registered __read_mostly;
364static bool acpi_ppc; 353static bool acpi_ppc;
365#endif 354#endif
366 355
367static struct perf_limits global; 356static struct global_params global;
368
369static void intel_pstate_init_limits(struct perf_limits *limits)
370{
371 memset(limits, 0, sizeof(*limits));
372 limits->max_perf_pct = 100;
373 limits->max_perf = int_ext_tofp(1);
374 limits->max_policy_pct = 100;
375 limits->max_sysfs_pct = 100;
376}
377 357
378static DEFINE_MUTEX(intel_pstate_driver_lock); 358static DEFINE_MUTEX(intel_pstate_driver_lock);
379static DEFINE_MUTEX(intel_pstate_limits_lock); 359static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -621,6 +601,14 @@ static inline void update_turbo_state(void)
621 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 601 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
622} 602}
623 603
604static int min_perf_pct_min(void)
605{
606 struct cpudata *cpu = all_cpu_data[0];
607
608 return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
609 cpu->pstate.turbo_pstate);
610}
611
624static s16 intel_pstate_get_epb(struct cpudata *cpu_data) 612static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
625{ 613{
626 u64 epb; 614 u64 epb;
@@ -841,16 +829,13 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
841static void intel_pstate_hwp_set(struct cpufreq_policy *policy) 829static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
842{ 830{
843 int min, hw_min, max, hw_max, cpu; 831 int min, hw_min, max, hw_max, cpu;
844 struct perf_limits *perf_limits = &global;
845 u64 value, cap; 832 u64 value, cap;
846 833
847 for_each_cpu(cpu, policy->cpus) { 834 for_each_cpu(cpu, policy->cpus) {
848 struct cpudata *cpu_data = all_cpu_data[cpu]; 835 struct cpudata *cpu_data = all_cpu_data[cpu];
836 struct perf_limits *perf_limits = &cpu_data->perf_limits;
849 s16 epp; 837 s16 epp;
850 838
851 if (per_cpu_limits)
852 perf_limits = all_cpu_data[cpu]->perf_limits;
853
854 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); 839 rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
855 hw_min = HWP_LOWEST_PERF(cap); 840 hw_min = HWP_LOWEST_PERF(cap);
856 if (global.no_turbo) 841 if (global.no_turbo)
@@ -1163,6 +1148,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
1163 1148
1164 global.no_turbo = clamp_t(int, input, 0, 1); 1149 global.no_turbo = clamp_t(int, input, 0, 1);
1165 1150
1151 if (global.no_turbo) {
1152 struct cpudata *cpu = all_cpu_data[0];
1153 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1154
1155 /* Squash the global minimum into the permitted range. */
1156 if (global.min_perf_pct > pct)
1157 global.min_perf_pct = pct;
1158 }
1159
1166 mutex_unlock(&intel_pstate_limits_lock); 1160 mutex_unlock(&intel_pstate_limits_lock);
1167 1161
1168 intel_pstate_update_policies(); 1162 intel_pstate_update_policies();
@@ -1191,11 +1185,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
1191 1185
1192 mutex_lock(&intel_pstate_limits_lock); 1186 mutex_lock(&intel_pstate_limits_lock);
1193 1187
1194 global.max_sysfs_pct = clamp_t(int, input, 0 , 100); 1188 global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
1195 global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
1196 global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
1197 global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
1198 global.max_perf = percent_ext_fp(global.max_perf_pct);
1199 1189
1200 mutex_unlock(&intel_pstate_limits_lock); 1190 mutex_unlock(&intel_pstate_limits_lock);
1201 1191
@@ -1225,11 +1215,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
1225 1215
1226 mutex_lock(&intel_pstate_limits_lock); 1216 mutex_lock(&intel_pstate_limits_lock);
1227 1217
1228 global.min_sysfs_pct = clamp_t(int, input, 0 , 100); 1218 global.min_perf_pct = clamp_t(int, input,
1229 global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); 1219 min_perf_pct_min(), global.max_perf_pct);
1230 global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
1231 global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
1232 global.min_perf = percent_ext_fp(global.min_perf_pct);
1233 1220
1234 mutex_unlock(&intel_pstate_limits_lock); 1221 mutex_unlock(&intel_pstate_limits_lock);
1235 1222
@@ -1650,14 +1637,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
1650 int max_perf = cpu->pstate.turbo_pstate; 1637 int max_perf = cpu->pstate.turbo_pstate;
1651 int max_perf_adj; 1638 int max_perf_adj;
1652 int min_perf; 1639 int min_perf;
1653 struct perf_limits *perf_limits = &global; 1640 struct perf_limits *perf_limits = &cpu->perf_limits;
1654 1641
1655 if (global.no_turbo || global.turbo_disabled) 1642 if (global.no_turbo || global.turbo_disabled)
1656 max_perf = cpu->pstate.max_pstate; 1643 max_perf = cpu->pstate.max_pstate;
1657 1644
1658 if (per_cpu_limits)
1659 perf_limits = cpu->perf_limits;
1660
1661 /* 1645 /*
1662 * performance can be limited by user through sysfs, by cpufreq 1646 * performance can be limited by user through sysfs, by cpufreq
1663 * policy, or by cpu specific default values determined through 1647 * policy, or by cpu specific default values determined through
@@ -1968,18 +1952,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
1968 cpu = all_cpu_data[cpunum]; 1952 cpu = all_cpu_data[cpunum];
1969 1953
1970 if (!cpu) { 1954 if (!cpu) {
1971 unsigned int size = sizeof(struct cpudata); 1955 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
1972
1973 if (per_cpu_limits)
1974 size += sizeof(struct perf_limits);
1975
1976 cpu = kzalloc(size, GFP_KERNEL);
1977 if (!cpu) 1956 if (!cpu)
1978 return -ENOMEM; 1957 return -ENOMEM;
1979 1958
1980 all_cpu_data[cpunum] = cpu; 1959 all_cpu_data[cpunum] = cpu;
1981 if (per_cpu_limits)
1982 cpu->perf_limits = (struct perf_limits *)(cpu + 1);
1983 1960
1984 cpu->epp_default = -EINVAL; 1961 cpu->epp_default = -EINVAL;
1985 cpu->epp_powersave = -EINVAL; 1962 cpu->epp_powersave = -EINVAL;
@@ -2045,8 +2022,9 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
2045} 2022}
2046 2023
2047static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, 2024static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
2048 struct perf_limits *limits) 2025 struct cpudata *cpu)
2049{ 2026{
2027 struct perf_limits *limits = &cpu->perf_limits;
2050 int32_t max_policy_perf, min_policy_perf; 2028 int32_t max_policy_perf, min_policy_perf;
2051 2029
2052 max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq); 2030 max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
@@ -2061,29 +2039,45 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
2061 } 2039 }
2062 2040
2063 /* Normalize user input to [min_perf, max_perf] */ 2041 /* Normalize user input to [min_perf, max_perf] */
2064 limits->min_perf = max(min_policy_perf, 2042 if (per_cpu_limits) {
2065 percent_ext_fp(limits->min_sysfs_pct)); 2043 limits->min_perf = min_policy_perf;
2066 limits->min_perf = min(limits->min_perf, max_policy_perf); 2044 limits->max_perf = max_policy_perf;
2067 limits->max_perf = min(max_policy_perf, 2045 } else {
2068 percent_ext_fp(limits->max_sysfs_pct)); 2046 int32_t global_min, global_max;
2069 limits->max_perf = max(min_policy_perf, limits->max_perf); 2047
2048 /* Global limits are in percent of the maximum turbo P-state. */
2049 global_max = percent_ext_fp(global.max_perf_pct);
2050 global_min = percent_ext_fp(global.min_perf_pct);
2051 if (policy->cpuinfo.max_freq != cpu->pstate.turbo_freq) {
2052 int32_t turbo_factor;
2053
2054 turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
2055 cpu->pstate.max_pstate);
2056 global_min = mul_ext_fp(global_min, turbo_factor);
2057 global_max = mul_ext_fp(global_max, turbo_factor);
2058 }
2059 global_min = clamp_t(int32_t, global_min, 0, global_max);
2070 2060
2071 /* Make sure min_perf <= max_perf */ 2061 limits->min_perf = max(min_policy_perf, global_min);
2072 limits->min_perf = min(limits->min_perf, limits->max_perf); 2062 limits->min_perf = min(limits->min_perf, max_policy_perf);
2063 limits->max_perf = min(max_policy_perf, global_max);
2064 limits->max_perf = max(min_policy_perf, limits->max_perf);
2065
2066 /* Make sure min_perf <= max_perf */
2067 limits->min_perf = min(limits->min_perf, limits->max_perf);
2068 }
2073 2069
2074 limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); 2070 limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
2075 limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); 2071 limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
2076 limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
2077 limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
2078 2072
2079 pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, 2073 pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
2080 limits->max_perf_pct, limits->min_perf_pct); 2074 fp_ext_toint(limits->max_perf * 100),
2075 fp_ext_toint(limits->min_perf * 100));
2081} 2076}
2082 2077
2083static int intel_pstate_set_policy(struct cpufreq_policy *policy) 2078static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2084{ 2079{
2085 struct cpudata *cpu; 2080 struct cpudata *cpu;
2086 struct perf_limits *perf_limits = &global;
2087 2081
2088 if (!policy->cpuinfo.max_freq) 2082 if (!policy->cpuinfo.max_freq)
2089 return -ENODEV; 2083 return -ENODEV;
@@ -2101,12 +2095,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2101 policy->max = policy->cpuinfo.max_freq; 2095 policy->max = policy->cpuinfo.max_freq;
2102 } 2096 }
2103 2097
2104 if (per_cpu_limits)
2105 perf_limits = cpu->perf_limits;
2106
2107 mutex_lock(&intel_pstate_limits_lock); 2098 mutex_lock(&intel_pstate_limits_lock);
2108 2099
2109 intel_pstate_update_perf_limits(policy, perf_limits); 2100 intel_pstate_update_perf_limits(policy, cpu);
2110 2101
2111 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { 2102 if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
2112 /* 2103 /*
@@ -2142,17 +2133,6 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
2142 policy->policy != CPUFREQ_POLICY_PERFORMANCE) 2133 policy->policy != CPUFREQ_POLICY_PERFORMANCE)
2143 return -EINVAL; 2134 return -EINVAL;
2144 2135
2145 /* When per-CPU limits are used, sysfs limits are not used */
2146 if (!per_cpu_limits) {
2147 unsigned int max_freq, min_freq;
2148
2149 max_freq = policy->cpuinfo.max_freq *
2150 global.max_sysfs_pct / 100;
2151 min_freq = policy->cpuinfo.max_freq *
2152 global.min_sysfs_pct / 100;
2153 cpufreq_verify_within_limits(policy, min_freq, max_freq);
2154 }
2155
2156 return 0; 2136 return 0;
2157} 2137}
2158 2138
@@ -2192,8 +2172,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
2192 2172
2193 cpu = all_cpu_data[policy->cpu]; 2173 cpu = all_cpu_data[policy->cpu];
2194 2174
2195 if (per_cpu_limits) 2175 cpu->perf_limits.max_perf = int_ext_tofp(1);
2196 intel_pstate_init_limits(cpu->perf_limits); 2176 cpu->perf_limits.min_perf = 0;
2197 2177
2198 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; 2178 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
2199 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; 2179 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2252,6 +2232,8 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
2252 2232
2253 cpufreq_verify_within_cpu_limits(policy); 2233 cpufreq_verify_within_cpu_limits(policy);
2254 2234
2235 intel_pstate_update_perf_limits(policy, cpu);
2236
2255 return 0; 2237 return 0;
2256} 2238}
2257 2239
@@ -2354,7 +2336,8 @@ static int intel_pstate_register_driver(void)
2354{ 2336{
2355 int ret; 2337 int ret;
2356 2338
2357 intel_pstate_init_limits(&global); 2339 memset(&global, 0, sizeof(global));
2340 global.max_perf_pct = 100;
2358 2341
2359 ret = cpufreq_register_driver(intel_pstate_driver); 2342 ret = cpufreq_register_driver(intel_pstate_driver);
2360 if (ret) { 2343 if (ret) {
@@ -2362,6 +2345,8 @@ static int intel_pstate_register_driver(void)
2362 return ret; 2345 return ret;
2363 } 2346 }
2364 2347
2348 global.min_perf_pct = min_perf_pct_min();
2349
2365 mutex_lock(&intel_pstate_limits_lock); 2350 mutex_lock(&intel_pstate_limits_lock);
2366 driver_registered = true; 2351 driver_registered = true;
2367 mutex_unlock(&intel_pstate_limits_lock); 2352 mutex_unlock(&intel_pstate_limits_lock);