diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 206 |
1 files changed, 189 insertions, 17 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4b644526fd59..8b5a415ee14a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -64,6 +64,25 @@ static inline int ceiling_fp(int32_t x) | |||
64 | return ret; | 64 | return ret; |
65 | } | 65 | } |
66 | 66 | ||
67 | /** | ||
68 | * struct sample - Store performance sample | ||
69 | * @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual | ||
70 | * performance during last sample period | ||
71 | * @busy_scaled: Scaled busy value which is used to calculate next | ||
72 | * P state. This can be different than core_pct_busy | ||
73 | * to account for cpu idle period | ||
74 | * @aperf: Difference of actual performance frequency clock count | ||
75 | * read from APERF MSR between last and current sample | ||
76 | * @mperf: Difference of maximum performance frequency clock count | ||
77 | * read from MPERF MSR between last and current sample | ||
78 | * @tsc: Difference of time stamp counter between last and | ||
79 | * current sample | ||
80 | * @freq: Effective frequency calculated from APERF/MPERF | ||
81 | * @time: Current time from scheduler | ||
82 | * | ||
83 | * This structure is used in the cpudata structure to store performance sample | ||
84 | * data for choosing next P State. | ||
85 | */ | ||
67 | struct sample { | 86 | struct sample { |
68 | int32_t core_pct_busy; | 87 | int32_t core_pct_busy; |
69 | int32_t busy_scaled; | 88 | int32_t busy_scaled; |
@@ -74,6 +93,20 @@ struct sample { | |||
74 | u64 time; | 93 | u64 time; |
75 | }; | 94 | }; |
76 | 95 | ||
96 | /** | ||
97 | * struct pstate_data - Store P state data | ||
98 | * @current_pstate: Current requested P state | ||
99 | * @min_pstate: Min P state possible for this platform | ||
100 | * @max_pstate: Max P state possible for this platform | ||
101 | * @max_pstate_physical:This is physical Max P state for a processor | ||
102 | * This can be higher than the max_pstate which can | ||
103 | * be limited by platform thermal design power limits | ||
104 | * @scaling: Scaling factor to convert frequency to cpufreq | ||
105 | * frequency units | ||
106 | * @turbo_pstate: Max Turbo P state possible for this platform | ||
107 | * | ||
108 | * Stores the per cpu model P state limits and current P state. | ||
109 | */ | ||
77 | struct pstate_data { | 110 | struct pstate_data { |
78 | int current_pstate; | 111 | int current_pstate; |
79 | int min_pstate; | 112 | int min_pstate; |
@@ -83,6 +116,19 @@ struct pstate_data { | |||
83 | int turbo_pstate; | 116 | int turbo_pstate; |
84 | }; | 117 | }; |
85 | 118 | ||
119 | /** | ||
120 | * struct vid_data - Stores voltage information data | ||
121 | * @min: VID data for this platform corresponding to | ||
122 | * the lowest P state | ||
123 | * @max: VID data corresponding to the highest P State. | ||
124 | * @turbo: VID data for turbo P state | ||
125 | * @ratio: Ratio of (vid max - vid min) / | ||
126 | * (max P state - Min P State) | ||
127 | * | ||
128 | * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) | ||
129 | * This data is used in Atom platforms, where in addition to target P state, | ||
130 | * the voltage data needs to be specified to select next P State. | ||
131 | */ | ||
86 | struct vid_data { | 132 | struct vid_data { |
87 | int min; | 133 | int min; |
88 | int max; | 134 | int max; |
@@ -90,6 +136,18 @@ struct vid_data { | |||
90 | int32_t ratio; | 136 | int32_t ratio; |
91 | }; | 137 | }; |
92 | 138 | ||
139 | /** | ||
140 | * struct _pid - Stores PID data | ||
141 | * @setpoint: Target set point for busyness or performance | ||
142 | * @integral: Storage for accumulated error values | ||
143 | * @p_gain: PID proportional gain | ||
144 | * @i_gain: PID integral gain | ||
145 | * @d_gain: PID derivative gain | ||
146 | * @deadband: PID deadband | ||
147 | * @last_err: Last error storage for integral part of PID calculation | ||
148 | * | ||
149 | * Stores PID coefficients and last error for PID controller. | ||
150 | */ | ||
93 | struct _pid { | 151 | struct _pid { |
94 | int setpoint; | 152 | int setpoint; |
95 | int32_t integral; | 153 | int32_t integral; |
@@ -100,6 +158,23 @@ struct _pid { | |||
100 | int32_t last_err; | 158 | int32_t last_err; |
101 | }; | 159 | }; |
102 | 160 | ||
161 | /** | ||
162 | * struct cpudata - Per CPU instance data storage | ||
163 | * @cpu: CPU number for this instance data | ||
164 | * @update_util: CPUFreq utility callback information | ||
165 | * @pstate: Stores P state limits for this CPU | ||
166 | * @vid: Stores VID limits for this CPU | ||
167 | * @pid: Stores PID parameters for this CPU | ||
168 | * @last_sample_time: Last Sample time | ||
169 | * @prev_aperf: Last APERF value read from APERF MSR | ||
170 | * @prev_mperf: Last MPERF value read from MPERF MSR | ||
171 | * @prev_tsc: Last timestamp counter (TSC) value | ||
172 | * @prev_cummulative_iowait: IO Wait time difference from last and | ||
173 | * current sample | ||
174 | * @sample: Storage for storing last Sample data | ||
175 | * | ||
176 | * This structure stores per CPU instance data for all CPUs. | ||
177 | */ | ||
103 | struct cpudata { | 178 | struct cpudata { |
104 | int cpu; | 179 | int cpu; |
105 | 180 | ||
@@ -118,6 +193,19 @@ struct cpudata { | |||
118 | }; | 193 | }; |
119 | 194 | ||
120 | static struct cpudata **all_cpu_data; | 195 | static struct cpudata **all_cpu_data; |
196 | |||
197 | /** | ||
198 | * struct pid_adjust_policy - Stores static PID configuration data | ||
199 | * @sample_rate_ms: PID calculation sample rate in ms | ||
200 | * @sample_rate_ns: Sample rate calculation in ns | ||
201 | * @deadband: PID deadband | ||
202 | * @setpoint: PID Setpoint | ||
203 | * @p_gain_pct: PID proportional gain | ||
204 | * @i_gain_pct: PID integral gain | ||
205 | * @d_gain_pct: PID derivative gain | ||
206 | * | ||
207 | * Stores per CPU model static PID configuration data. | ||
208 | */ | ||
121 | struct pstate_adjust_policy { | 209 | struct pstate_adjust_policy { |
122 | int sample_rate_ms; | 210 | int sample_rate_ms; |
123 | s64 sample_rate_ns; | 211 | s64 sample_rate_ns; |
@@ -128,6 +216,20 @@ struct pstate_adjust_policy { | |||
128 | int i_gain_pct; | 216 | int i_gain_pct; |
129 | }; | 217 | }; |
130 | 218 | ||
219 | /** | ||
220 | * struct pstate_funcs - Per CPU model specific callbacks | ||
221 | * @get_max: Callback to get maximum non turbo effective P state | ||
222 | * @get_max_physical: Callback to get maximum non turbo physical P state | ||
223 | * @get_min: Callback to get minimum P state | ||
224 | * @get_turbo: Callback to get turbo P state | ||
225 | * @get_scaling: Callback to get frequency scaling factor | ||
226 | * @get_val: Callback to convert P state to actual MSR write value | ||
227 | * @get_vid: Callback to get VID data for Atom platforms | ||
228 | * @get_target_pstate: Callback to a function to calculate next P state to use | ||
229 | * | ||
230 | * Core and Atom CPU models have different way to get P State limits. This | ||
231 | * structure is used to store those callbacks. | ||
232 | */ | ||
131 | struct pstate_funcs { | 233 | struct pstate_funcs { |
132 | int (*get_max)(void); | 234 | int (*get_max)(void); |
133 | int (*get_max_physical)(void); | 235 | int (*get_max_physical)(void); |
@@ -139,6 +241,11 @@ struct pstate_funcs { | |||
139 | int32_t (*get_target_pstate)(struct cpudata *); | 241 | int32_t (*get_target_pstate)(struct cpudata *); |
140 | }; | 242 | }; |
141 | 243 | ||
244 | /** | ||
245 | * struct cpu_defaults- Per CPU model default config data | ||
246 | * @pid_policy: PID config data | ||
247 | * @funcs: Callback function data | ||
248 | */ | ||
142 | struct cpu_defaults { | 249 | struct cpu_defaults { |
143 | struct pstate_adjust_policy pid_policy; | 250 | struct pstate_adjust_policy pid_policy; |
144 | struct pstate_funcs funcs; | 251 | struct pstate_funcs funcs; |
@@ -151,6 +258,34 @@ static struct pstate_adjust_policy pid_params; | |||
151 | static struct pstate_funcs pstate_funcs; | 258 | static struct pstate_funcs pstate_funcs; |
152 | static int hwp_active; | 259 | static int hwp_active; |
153 | 260 | ||
261 | |||
262 | /** | ||
263 | * struct perf_limits - Store user and policy limits | ||
264 | * @no_turbo: User requested turbo state from intel_pstate sysfs | ||
265 | * @turbo_disabled: Platform turbo status either from msr | ||
266 | * MSR_IA32_MISC_ENABLE or when maximum available pstate | ||
267 | * matches the maximum turbo pstate | ||
268 | * @max_perf_pct: Effective maximum performance limit in percentage, this | ||
269 | * is minimum of either limits enforced by cpufreq policy | ||
270 | * or limits from user set limits via intel_pstate sysfs | ||
271 | * @min_perf_pct: Effective minimum performance limit in percentage, this | ||
272 | * is maximum of either limits enforced by cpufreq policy | ||
273 | * or limits from user set limits via intel_pstate sysfs | ||
274 | * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct | ||
275 | * This value is used to limit max pstate | ||
276 | * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct | ||
277 | * This value is used to limit min pstate | ||
278 | * @max_policy_pct: The maximum performance in percentage enforced by | ||
279 | * cpufreq setpolicy interface | ||
280 | * @max_sysfs_pct: The maximum performance in percentage enforced by | ||
281 | * intel pstate sysfs interface | ||
282 | * @min_policy_pct: The minimum performance in percentage enforced by | ||
283 | * cpufreq setpolicy interface | ||
284 | * @min_sysfs_pct: The minimum performance in percentage enforced by | ||
285 | * intel pstate sysfs interface | ||
286 | * | ||
287 | * Storage for user and policy defined limits. | ||
288 | */ | ||
154 | struct perf_limits { | 289 | struct perf_limits { |
155 | int no_turbo; | 290 | int no_turbo; |
156 | int turbo_disabled; | 291 | int turbo_disabled; |
@@ -910,7 +1045,14 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) | |||
910 | cpu->prev_aperf = aperf; | 1045 | cpu->prev_aperf = aperf; |
911 | cpu->prev_mperf = mperf; | 1046 | cpu->prev_mperf = mperf; |
912 | cpu->prev_tsc = tsc; | 1047 | cpu->prev_tsc = tsc; |
913 | return true; | 1048 | /* |
1049 | * First time this function is invoked in a given cycle, all of the | ||
1050 | * previous sample data fields are equal to zero or stale and they must | ||
1051 | * be populated with meaningful numbers for things to work, so assume | ||
1052 | * that sample.time will always be reset before setting the utilization | ||
1053 | * update hook and make the caller skip the sample then. | ||
1054 | */ | ||
1055 | return !!cpu->last_sample_time; | ||
914 | } | 1056 | } |
915 | 1057 | ||
916 | static inline int32_t get_avg_frequency(struct cpudata *cpu) | 1058 | static inline int32_t get_avg_frequency(struct cpudata *cpu) |
@@ -984,8 +1126,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) | |||
984 | * enough period of time to adjust our busyness. | 1126 | * enough period of time to adjust our busyness. |
985 | */ | 1127 | */ |
986 | duration_ns = cpu->sample.time - cpu->last_sample_time; | 1128 | duration_ns = cpu->sample.time - cpu->last_sample_time; |
987 | if ((s64)duration_ns > pid_params.sample_rate_ns * 3 | 1129 | if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { |
988 | && cpu->last_sample_time > 0) { | ||
989 | sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), | 1130 | sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), |
990 | int_tofp(duration_ns)); | 1131 | int_tofp(duration_ns)); |
991 | core_busy = mul_fp(core_busy, sample_ratio); | 1132 | core_busy = mul_fp(core_busy, sample_ratio); |
@@ -1100,10 +1241,8 @@ static int intel_pstate_init_cpu(unsigned int cpunum) | |||
1100 | intel_pstate_get_cpu_pstates(cpu); | 1241 | intel_pstate_get_cpu_pstates(cpu); |
1101 | 1242 | ||
1102 | intel_pstate_busy_pid_reset(cpu); | 1243 | intel_pstate_busy_pid_reset(cpu); |
1103 | intel_pstate_sample(cpu, 0); | ||
1104 | 1244 | ||
1105 | cpu->update_util.func = intel_pstate_update_util; | 1245 | cpu->update_util.func = intel_pstate_update_util; |
1106 | cpufreq_set_update_util_data(cpunum, &cpu->update_util); | ||
1107 | 1246 | ||
1108 | pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); | 1247 | pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); |
1109 | 1248 | ||
@@ -1122,22 +1261,54 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) | |||
1122 | return get_avg_frequency(cpu); | 1261 | return get_avg_frequency(cpu); |
1123 | } | 1262 | } |
1124 | 1263 | ||
1264 | static void intel_pstate_set_update_util_hook(unsigned int cpu_num) | ||
1265 | { | ||
1266 | struct cpudata *cpu = all_cpu_data[cpu_num]; | ||
1267 | |||
1268 | /* Prevent intel_pstate_update_util() from using stale data. */ | ||
1269 | cpu->sample.time = 0; | ||
1270 | cpufreq_set_update_util_data(cpu_num, &cpu->update_util); | ||
1271 | } | ||
1272 | |||
1273 | static void intel_pstate_clear_update_util_hook(unsigned int cpu) | ||
1274 | { | ||
1275 | cpufreq_set_update_util_data(cpu, NULL); | ||
1276 | synchronize_sched(); | ||
1277 | } | ||
1278 | |||
1279 | static void intel_pstate_set_performance_limits(struct perf_limits *limits) | ||
1280 | { | ||
1281 | limits->no_turbo = 0; | ||
1282 | limits->turbo_disabled = 0; | ||
1283 | limits->max_perf_pct = 100; | ||
1284 | limits->max_perf = int_tofp(1); | ||
1285 | limits->min_perf_pct = 100; | ||
1286 | limits->min_perf = int_tofp(1); | ||
1287 | limits->max_policy_pct = 100; | ||
1288 | limits->max_sysfs_pct = 100; | ||
1289 | limits->min_policy_pct = 0; | ||
1290 | limits->min_sysfs_pct = 0; | ||
1291 | } | ||
1292 | |||
1125 | static int intel_pstate_set_policy(struct cpufreq_policy *policy) | 1293 | static int intel_pstate_set_policy(struct cpufreq_policy *policy) |
1126 | { | 1294 | { |
1127 | if (!policy->cpuinfo.max_freq) | 1295 | if (!policy->cpuinfo.max_freq) |
1128 | return -ENODEV; | 1296 | return -ENODEV; |
1129 | 1297 | ||
1130 | if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && | 1298 | intel_pstate_clear_update_util_hook(policy->cpu); |
1131 | policy->max >= policy->cpuinfo.max_freq) { | 1299 | |
1132 | pr_debug("intel_pstate: set performance\n"); | 1300 | if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { |
1133 | limits = &performance_limits; | 1301 | limits = &performance_limits; |
1134 | if (hwp_active) | 1302 | if (policy->max >= policy->cpuinfo.max_freq) { |
1135 | intel_pstate_hwp_set(policy->cpus); | 1303 | pr_debug("intel_pstate: set performance\n"); |
1136 | return 0; | 1304 | intel_pstate_set_performance_limits(limits); |
1305 | goto out; | ||
1306 | } | ||
1307 | } else { | ||
1308 | pr_debug("intel_pstate: set powersave\n"); | ||
1309 | limits = &powersave_limits; | ||
1137 | } | 1310 | } |
1138 | 1311 | ||
1139 | pr_debug("intel_pstate: set powersave\n"); | ||
1140 | limits = &powersave_limits; | ||
1141 | limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; | 1312 | limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; |
1142 | limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); | 1313 | limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); |
1143 | limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, | 1314 | limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, |
@@ -1163,6 +1334,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) | |||
1163 | limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), | 1334 | limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), |
1164 | int_tofp(100)); | 1335 | int_tofp(100)); |
1165 | 1336 | ||
1337 | out: | ||
1338 | intel_pstate_set_update_util_hook(policy->cpu); | ||
1339 | |||
1166 | if (hwp_active) | 1340 | if (hwp_active) |
1167 | intel_pstate_hwp_set(policy->cpus); | 1341 | intel_pstate_hwp_set(policy->cpus); |
1168 | 1342 | ||
@@ -1187,8 +1361,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) | |||
1187 | 1361 | ||
1188 | pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); | 1362 | pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); |
1189 | 1363 | ||
1190 | cpufreq_set_update_util_data(cpu_num, NULL); | 1364 | intel_pstate_clear_update_util_hook(cpu_num); |
1191 | synchronize_sched(); | ||
1192 | 1365 | ||
1193 | if (hwp_active) | 1366 | if (hwp_active) |
1194 | return; | 1367 | return; |
@@ -1455,8 +1628,7 @@ out: | |||
1455 | get_online_cpus(); | 1628 | get_online_cpus(); |
1456 | for_each_online_cpu(cpu) { | 1629 | for_each_online_cpu(cpu) { |
1457 | if (all_cpu_data[cpu]) { | 1630 | if (all_cpu_data[cpu]) { |
1458 | cpufreq_set_update_util_data(cpu, NULL); | 1631 | intel_pstate_clear_update_util_hook(cpu); |
1459 | synchronize_sched(); | ||
1460 | kfree(all_cpu_data[cpu]); | 1632 | kfree(all_cpu_data[cpu]); |
1461 | } | 1633 | } |
1462 | } | 1634 | } |