diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2017-09-03 18:05:42 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2017-09-03 18:05:42 -0400 |
commit | ab271bc95b40960799c084b0e94a33c4272ec0bf (patch) | |
tree | c60bb7015f75267fc72808e6dccecbda02e486e6 | |
parent | 08a10002bed151f6df201715adb80c1c5e7fe7ca (diff) | |
parent | 57ccaf33845491ac7ee41796511cec8dcd49777e (diff) |
Merge branch 'intel_pstate'
* intel_pstate:
cpufreq: intel_pstate: Shorten a couple of long names
cpufreq: intel_pstate: Simplify intel_pstate_adjust_pstate()
cpufreq: intel_pstate: Improve IO performance with per-core P-states
cpufreq: intel_pstate: Drop INTEL_PSTATE_HWP_SAMPLING_INTERVAL
cpufreq: intel_pstate: Drop ->update_util from pstate_funcs
cpufreq: intel_pstate: Do not use PID-based P-state selection
-rw-r--r-- | Documentation/admin-guide/pm/intel_pstate.rst | 61 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 320 |
2 files changed, 28 insertions, 353 deletions
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 1d6249825efc..d2b6fda3d67b 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst | |||
@@ -167,35 +167,17 @@ is set. | |||
167 | ``powersave`` | 167 | ``powersave`` |
168 | ............. | 168 | ............. |
169 | 169 | ||
170 | Without HWP, this P-state selection algorithm generally depends on the | 170 | Without HWP, this P-state selection algorithm is similar to the algorithm |
171 | processor model and/or the system profile setting in the ACPI tables and there | ||
172 | are two variants of it. | ||
173 | |||
174 | One of them is used with processors from the Atom line and (regardless of the | ||
175 | processor model) on platforms with the system profile in the ACPI tables set to | ||
176 | "mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or | ||
177 | "workstation". It is also used with processors supporting the HWP feature if | ||
178 | that feature has not been enabled (that is, with the ``intel_pstate=no_hwp`` | ||
179 | argument in the kernel command line). It is similar to the algorithm | ||
180 | implemented by the generic ``schedutil`` scaling governor except that the | 171 | implemented by the generic ``schedutil`` scaling governor except that the |
181 | utilization metric used by it is based on numbers coming from feedback | 172 | utilization metric used by it is based on numbers coming from feedback |
182 | registers of the CPU. It generally selects P-states proportional to the | 173 | registers of the CPU. It generally selects P-states proportional to the |
183 | current CPU utilization, so it is referred to as the "proportional" algorithm. | 174 | current CPU utilization. |
184 | 175 | ||
185 | The second variant of the ``powersave`` P-state selection algorithm, used in all | 176 | This algorithm is run by the driver's utilization update callback for the |
186 | of the other cases (generally, on processors from the Core line, so it is | 177 | given CPU when it is invoked by the CPU scheduler, but not more often than |
187 | referred to as the "Core" algorithm), is based on the values read from the APERF | 178 | every 10 ms. Like in the ``performance`` case, the hardware configuration |
188 | and MPERF feedback registers and the previously requested target P-state. | 179 | is not touched if the new P-state turns out to be the same as the current |
189 | It does not really take CPU utilization into account explicitly, but as a rule | 180 | one. |
190 | it causes the CPU P-state to ramp up very quickly in response to increased | ||
191 | utilization which is generally desirable in server environments. | ||
192 | |||
193 | Regardless of the variant, this algorithm is run by the driver's utilization | ||
194 | update callback for the given CPU when it is invoked by the CPU scheduler, but | ||
195 | not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this | ||
196 | particular case <Tuning Interface in debugfs_>`_). Like in the ``performance`` | ||
197 | case, the hardware configuration is not touched if the new P-state turns out to | ||
198 | be the same as the current one. | ||
199 | 181 | ||
200 | This is the default P-state selection algorithm if the | 182 | This is the default P-state selection algorithm if the |
201 | :c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option | 183 | :c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option |
@@ -720,34 +702,7 @@ P-state is called, the ``ftrace`` filter can be set to to | |||
720 | gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func | 702 | gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func |
721 | <idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func | 703 | <idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func |
722 | 704 | ||
723 | Tuning Interface in ``debugfs`` | ||
724 | ------------------------------- | ||
725 | |||
726 | The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of | ||
727 | processors in the active mode <powersave_>`_ is based on a `PID controller`_ | ||
728 | whose parameters were chosen to address a number of different use cases at the | ||
729 | same time. However, it still is possible to fine-tune it to a specific workload | ||
730 | and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is | ||
731 | provided for this purpose. [Note that the ``pstate_snb`` directory will be | ||
732 | present only if the specific P-state selection algorithm matching the interface | ||
733 | in it actually is in use.] | ||
734 | |||
735 | The following files present in that directory can be used to modify the PID | ||
736 | controller parameters at run time: | ||
737 | |||
738 | | ``deadband`` | ||
739 | | ``d_gain_pct`` | ||
740 | | ``i_gain_pct`` | ||
741 | | ``p_gain_pct`` | ||
742 | | ``sample_rate_ms`` | ||
743 | | ``setpoint`` | ||
744 | |||
745 | Note, however, that achieving desirable results this way generally requires | ||
746 | expert-level understanding of the power vs performance tradeoff, so extra care | ||
747 | is recommended when attempting to do that. | ||
748 | |||
749 | 705 | ||
750 | .. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf | 706 | .. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf |
751 | .. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html | 707 | .. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html |
752 | .. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf | 708 | .. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf |
753 | .. _PID controller: https://en.wikipedia.org/wiki/PID_controller | ||
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0c50637e6bda..8f95265d5f52 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -37,8 +37,7 @@ | |||
37 | #include <asm/cpufeature.h> | 37 | #include <asm/cpufeature.h> |
38 | #include <asm/intel-family.h> | 38 | #include <asm/intel-family.h> |
39 | 39 | ||
40 | #define INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) | 40 | #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC) |
41 | #define INTEL_PSTATE_HWP_SAMPLING_INTERVAL (50 * NSEC_PER_MSEC) | ||
42 | 41 | ||
43 | #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 | 42 | #define INTEL_CPUFREQ_TRANSITION_LATENCY 20000 |
44 | #define INTEL_CPUFREQ_TRANSITION_DELAY 500 | 43 | #define INTEL_CPUFREQ_TRANSITION_DELAY 500 |
@@ -173,28 +172,6 @@ struct vid_data { | |||
173 | }; | 172 | }; |
174 | 173 | ||
175 | /** | 174 | /** |
176 | * struct _pid - Stores PID data | ||
177 | * @setpoint: Target set point for busyness or performance | ||
178 | * @integral: Storage for accumulated error values | ||
179 | * @p_gain: PID proportional gain | ||
180 | * @i_gain: PID integral gain | ||
181 | * @d_gain: PID derivative gain | ||
182 | * @deadband: PID deadband | ||
183 | * @last_err: Last error storage for integral part of PID calculation | ||
184 | * | ||
185 | * Stores PID coefficients and last error for PID controller. | ||
186 | */ | ||
187 | struct _pid { | ||
188 | int setpoint; | ||
189 | int32_t integral; | ||
190 | int32_t p_gain; | ||
191 | int32_t i_gain; | ||
192 | int32_t d_gain; | ||
193 | int deadband; | ||
194 | int32_t last_err; | ||
195 | }; | ||
196 | |||
197 | /** | ||
198 | * struct global_params - Global parameters, mostly tunable via sysfs. | 175 | * struct global_params - Global parameters, mostly tunable via sysfs. |
199 | * @no_turbo: Whether or not to use turbo P-states. | 176 | * @no_turbo: Whether or not to use turbo P-states. |
200 | * @turbo_disabled: Whethet or not turbo P-states are available at all, | 177 | * @turbo_disabled: Whethet or not turbo P-states are available at all, |
@@ -223,7 +200,6 @@ struct global_params { | |||
223 | * @last_update: Time of the last update. | 200 | * @last_update: Time of the last update. |
224 | * @pstate: Stores P state limits for this CPU | 201 | * @pstate: Stores P state limits for this CPU |
225 | * @vid: Stores VID limits for this CPU | 202 | * @vid: Stores VID limits for this CPU |
226 | * @pid: Stores PID parameters for this CPU | ||
227 | * @last_sample_time: Last Sample time | 203 | * @last_sample_time: Last Sample time |
228 | * @aperf_mperf_shift: Number of clock cycles after aperf, merf is incremented | 204 | * @aperf_mperf_shift: Number of clock cycles after aperf, merf is incremented |
229 | * This shift is a multiplier to mperf delta to | 205 | * This shift is a multiplier to mperf delta to |
@@ -258,7 +234,6 @@ struct cpudata { | |||
258 | 234 | ||
259 | struct pstate_data pstate; | 235 | struct pstate_data pstate; |
260 | struct vid_data vid; | 236 | struct vid_data vid; |
261 | struct _pid pid; | ||
262 | 237 | ||
263 | u64 last_update; | 238 | u64 last_update; |
264 | u64 last_sample_time; | 239 | u64 last_sample_time; |
@@ -284,28 +259,6 @@ struct cpudata { | |||
284 | static struct cpudata **all_cpu_data; | 259 | static struct cpudata **all_cpu_data; |
285 | 260 | ||
286 | /** | 261 | /** |
287 | * struct pstate_adjust_policy - Stores static PID configuration data | ||
288 | * @sample_rate_ms: PID calculation sample rate in ms | ||
289 | * @sample_rate_ns: Sample rate calculation in ns | ||
290 | * @deadband: PID deadband | ||
291 | * @setpoint: PID Setpoint | ||
292 | * @p_gain_pct: PID proportional gain | ||
293 | * @i_gain_pct: PID integral gain | ||
294 | * @d_gain_pct: PID derivative gain | ||
295 | * | ||
296 | * Stores per CPU model static PID configuration data. | ||
297 | */ | ||
298 | struct pstate_adjust_policy { | ||
299 | int sample_rate_ms; | ||
300 | s64 sample_rate_ns; | ||
301 | int deadband; | ||
302 | int setpoint; | ||
303 | int p_gain_pct; | ||
304 | int d_gain_pct; | ||
305 | int i_gain_pct; | ||
306 | }; | ||
307 | |||
308 | /** | ||
309 | * struct pstate_funcs - Per CPU model specific callbacks | 262 | * struct pstate_funcs - Per CPU model specific callbacks |
310 | * @get_max: Callback to get maximum non turbo effective P state | 263 | * @get_max: Callback to get maximum non turbo effective P state |
311 | * @get_max_physical: Callback to get maximum non turbo physical P state | 264 | * @get_max_physical: Callback to get maximum non turbo physical P state |
@@ -314,7 +267,6 @@ struct pstate_adjust_policy { | |||
314 | * @get_scaling: Callback to get frequency scaling factor | 267 | * @get_scaling: Callback to get frequency scaling factor |
315 | * @get_val: Callback to convert P state to actual MSR write value | 268 | * @get_val: Callback to convert P state to actual MSR write value |
316 | * @get_vid: Callback to get VID data for Atom platforms | 269 | * @get_vid: Callback to get VID data for Atom platforms |
317 | * @update_util: Active mode utilization update callback. | ||
318 | * | 270 | * |
319 | * Core and Atom CPU models have different way to get P State limits. This | 271 | * Core and Atom CPU models have different way to get P State limits. This |
320 | * structure is used to store those callbacks. | 272 | * structure is used to store those callbacks. |
@@ -328,20 +280,9 @@ struct pstate_funcs { | |||
328 | int (*get_aperf_mperf_shift)(void); | 280 | int (*get_aperf_mperf_shift)(void); |
329 | u64 (*get_val)(struct cpudata*, int pstate); | 281 | u64 (*get_val)(struct cpudata*, int pstate); |
330 | void (*get_vid)(struct cpudata *); | 282 | void (*get_vid)(struct cpudata *); |
331 | void (*update_util)(struct update_util_data *data, u64 time, | ||
332 | unsigned int flags); | ||
333 | }; | 283 | }; |
334 | 284 | ||
335 | static struct pstate_funcs pstate_funcs __read_mostly; | 285 | static struct pstate_funcs pstate_funcs __read_mostly; |
336 | static struct pstate_adjust_policy pid_params __read_mostly = { | ||
337 | .sample_rate_ms = 10, | ||
338 | .sample_rate_ns = 10 * NSEC_PER_MSEC, | ||
339 | .deadband = 0, | ||
340 | .setpoint = 97, | ||
341 | .p_gain_pct = 20, | ||
342 | .d_gain_pct = 0, | ||
343 | .i_gain_pct = 0, | ||
344 | }; | ||
345 | 286 | ||
346 | static int hwp_active __read_mostly; | 287 | static int hwp_active __read_mostly; |
347 | static bool per_cpu_limits __read_mostly; | 288 | static bool per_cpu_limits __read_mostly; |
@@ -509,56 +450,6 @@ static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) | |||
509 | } | 450 | } |
510 | #endif | 451 | #endif |
511 | 452 | ||
512 | static signed int pid_calc(struct _pid *pid, int32_t busy) | ||
513 | { | ||
514 | signed int result; | ||
515 | int32_t pterm, dterm, fp_error; | ||
516 | int32_t integral_limit; | ||
517 | |||
518 | fp_error = pid->setpoint - busy; | ||
519 | |||
520 | if (abs(fp_error) <= pid->deadband) | ||
521 | return 0; | ||
522 | |||
523 | pterm = mul_fp(pid->p_gain, fp_error); | ||
524 | |||
525 | pid->integral += fp_error; | ||
526 | |||
527 | /* | ||
528 | * We limit the integral here so that it will never | ||
529 | * get higher than 30. This prevents it from becoming | ||
530 | * too large an input over long periods of time and allows | ||
531 | * it to get factored out sooner. | ||
532 | * | ||
533 | * The value of 30 was chosen through experimentation. | ||
534 | */ | ||
535 | integral_limit = int_tofp(30); | ||
536 | if (pid->integral > integral_limit) | ||
537 | pid->integral = integral_limit; | ||
538 | if (pid->integral < -integral_limit) | ||
539 | pid->integral = -integral_limit; | ||
540 | |||
541 | dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); | ||
542 | pid->last_err = fp_error; | ||
543 | |||
544 | result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; | ||
545 | result = result + (1 << (FRAC_BITS-1)); | ||
546 | return (signed int)fp_toint(result); | ||
547 | } | ||
548 | |||
549 | static inline void intel_pstate_pid_reset(struct cpudata *cpu) | ||
550 | { | ||
551 | struct _pid *pid = &cpu->pid; | ||
552 | |||
553 | pid->p_gain = percent_fp(pid_params.p_gain_pct); | ||
554 | pid->d_gain = percent_fp(pid_params.d_gain_pct); | ||
555 | pid->i_gain = percent_fp(pid_params.i_gain_pct); | ||
556 | pid->setpoint = int_tofp(pid_params.setpoint); | ||
557 | pid->last_err = pid->setpoint - int_tofp(100); | ||
558 | pid->deadband = int_tofp(pid_params.deadband); | ||
559 | pid->integral = 0; | ||
560 | } | ||
561 | |||
562 | static inline void update_turbo_state(void) | 453 | static inline void update_turbo_state(void) |
563 | { | 454 | { |
564 | u64 misc_en; | 455 | u64 misc_en; |
@@ -911,82 +802,6 @@ static void intel_pstate_update_policies(void) | |||
911 | cpufreq_update_policy(cpu); | 802 | cpufreq_update_policy(cpu); |
912 | } | 803 | } |
913 | 804 | ||
914 | /************************** debugfs begin ************************/ | ||
915 | static int pid_param_set(void *data, u64 val) | ||
916 | { | ||
917 | unsigned int cpu; | ||
918 | |||
919 | *(u32 *)data = val; | ||
920 | pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; | ||
921 | for_each_possible_cpu(cpu) | ||
922 | if (all_cpu_data[cpu]) | ||
923 | intel_pstate_pid_reset(all_cpu_data[cpu]); | ||
924 | |||
925 | return 0; | ||
926 | } | ||
927 | |||
928 | static int pid_param_get(void *data, u64 *val) | ||
929 | { | ||
930 | *val = *(u32 *)data; | ||
931 | return 0; | ||
932 | } | ||
933 | DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); | ||
934 | |||
935 | static struct dentry *debugfs_parent; | ||
936 | |||
937 | struct pid_param { | ||
938 | char *name; | ||
939 | void *value; | ||
940 | struct dentry *dentry; | ||
941 | }; | ||
942 | |||
943 | static struct pid_param pid_files[] = { | ||
944 | {"sample_rate_ms", &pid_params.sample_rate_ms, }, | ||
945 | {"d_gain_pct", &pid_params.d_gain_pct, }, | ||
946 | {"i_gain_pct", &pid_params.i_gain_pct, }, | ||
947 | {"deadband", &pid_params.deadband, }, | ||
948 | {"setpoint", &pid_params.setpoint, }, | ||
949 | {"p_gain_pct", &pid_params.p_gain_pct, }, | ||
950 | {NULL, NULL, } | ||
951 | }; | ||
952 | |||
953 | static void intel_pstate_debug_expose_params(void) | ||
954 | { | ||
955 | int i; | ||
956 | |||
957 | debugfs_parent = debugfs_create_dir("pstate_snb", NULL); | ||
958 | if (IS_ERR_OR_NULL(debugfs_parent)) | ||
959 | return; | ||
960 | |||
961 | for (i = 0; pid_files[i].name; i++) { | ||
962 | struct dentry *dentry; | ||
963 | |||
964 | dentry = debugfs_create_file(pid_files[i].name, 0660, | ||
965 | debugfs_parent, pid_files[i].value, | ||
966 | &fops_pid_param); | ||
967 | if (!IS_ERR(dentry)) | ||
968 | pid_files[i].dentry = dentry; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | static void intel_pstate_debug_hide_params(void) | ||
973 | { | ||
974 | int i; | ||
975 | |||
976 | if (IS_ERR_OR_NULL(debugfs_parent)) | ||
977 | return; | ||
978 | |||
979 | for (i = 0; pid_files[i].name; i++) { | ||
980 | debugfs_remove(pid_files[i].dentry); | ||
981 | pid_files[i].dentry = NULL; | ||
982 | } | ||
983 | |||
984 | debugfs_remove(debugfs_parent); | ||
985 | debugfs_parent = NULL; | ||
986 | } | ||
987 | |||
988 | /************************** debugfs end ************************/ | ||
989 | |||
990 | /************************** sysfs begin ************************/ | 805 | /************************** sysfs begin ************************/ |
991 | #define show_one(file_name, object) \ | 806 | #define show_one(file_name, object) \ |
992 | static ssize_t show_##file_name \ | 807 | static ssize_t show_##file_name \ |
@@ -1622,7 +1437,7 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu) | |||
1622 | cpu->sample.core_avg_perf); | 1437 | cpu->sample.core_avg_perf); |
1623 | } | 1438 | } |
1624 | 1439 | ||
1625 | static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) | 1440 | static inline int32_t get_target_pstate(struct cpudata *cpu) |
1626 | { | 1441 | { |
1627 | struct sample *sample = &cpu->sample; | 1442 | struct sample *sample = &cpu->sample; |
1628 | int32_t busy_frac, boost; | 1443 | int32_t busy_frac, boost; |
@@ -1660,44 +1475,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) | |||
1660 | return target; | 1475 | return target; |
1661 | } | 1476 | } |
1662 | 1477 | ||
1663 | static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) | ||
1664 | { | ||
1665 | int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; | ||
1666 | u64 duration_ns; | ||
1667 | |||
1668 | /* | ||
1669 | * perf_scaled is the ratio of the average P-state during the last | ||
1670 | * sampling period to the P-state requested last time (in percent). | ||
1671 | * | ||
1672 | * That measures the system's response to the previous P-state | ||
1673 | * selection. | ||
1674 | */ | ||
1675 | max_pstate = cpu->pstate.max_pstate_physical; | ||
1676 | current_pstate = cpu->pstate.current_pstate; | ||
1677 | perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf, | ||
1678 | div_fp(100 * max_pstate, current_pstate)); | ||
1679 | |||
1680 | /* | ||
1681 | * Since our utilization update callback will not run unless we are | ||
1682 | * in C0, check if the actual elapsed time is significantly greater (3x) | ||
1683 | * than our sample interval. If it is, then we were idle for a long | ||
1684 | * enough period of time to adjust our performance metric. | ||
1685 | */ | ||
1686 | duration_ns = cpu->sample.time - cpu->last_sample_time; | ||
1687 | if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { | ||
1688 | sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); | ||
1689 | perf_scaled = mul_fp(perf_scaled, sample_ratio); | ||
1690 | } else { | ||
1691 | sample_ratio = div_fp(100 * (cpu->sample.mperf << cpu->aperf_mperf_shift), | ||
1692 | cpu->sample.tsc); | ||
1693 | if (sample_ratio < int_tofp(1)) | ||
1694 | perf_scaled = 0; | ||
1695 | } | ||
1696 | |||
1697 | cpu->sample.busy_scaled = perf_scaled; | ||
1698 | return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled); | ||
1699 | } | ||
1700 | |||
1701 | static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) | 1478 | static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) |
1702 | { | 1479 | { |
1703 | int max_pstate = intel_pstate_get_base_pstate(cpu); | 1480 | int max_pstate = intel_pstate_get_base_pstate(cpu); |
@@ -1717,13 +1494,15 @@ static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) | |||
1717 | wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); | 1494 | wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); |
1718 | } | 1495 | } |
1719 | 1496 | ||
1720 | static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) | 1497 | static void intel_pstate_adjust_pstate(struct cpudata *cpu) |
1721 | { | 1498 | { |
1722 | int from = cpu->pstate.current_pstate; | 1499 | int from = cpu->pstate.current_pstate; |
1723 | struct sample *sample; | 1500 | struct sample *sample; |
1501 | int target_pstate; | ||
1724 | 1502 | ||
1725 | update_turbo_state(); | 1503 | update_turbo_state(); |
1726 | 1504 | ||
1505 | target_pstate = get_target_pstate(cpu); | ||
1727 | target_pstate = intel_pstate_prepare_request(cpu, target_pstate); | 1506 | target_pstate = intel_pstate_prepare_request(cpu, target_pstate); |
1728 | trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); | 1507 | trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); |
1729 | intel_pstate_update_pstate(cpu, target_pstate); | 1508 | intel_pstate_update_pstate(cpu, target_pstate); |
@@ -1740,27 +1519,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) | |||
1740 | fp_toint(cpu->iowait_boost * 100)); | 1519 | fp_toint(cpu->iowait_boost * 100)); |
1741 | } | 1520 | } |
1742 | 1521 | ||
1743 | static void intel_pstate_update_util_pid(struct update_util_data *data, | ||
1744 | u64 time, unsigned int flags) | ||
1745 | { | ||
1746 | struct cpudata *cpu = container_of(data, struct cpudata, update_util); | ||
1747 | u64 delta_ns = time - cpu->sample.time; | ||
1748 | |||
1749 | /* Don't allow remote callbacks */ | ||
1750 | if (smp_processor_id() != cpu->cpu) | ||
1751 | return; | ||
1752 | |||
1753 | if ((s64)delta_ns < pid_params.sample_rate_ns) | ||
1754 | return; | ||
1755 | |||
1756 | if (intel_pstate_sample(cpu, time)) { | ||
1757 | int target_pstate; | ||
1758 | |||
1759 | target_pstate = get_target_pstate_use_performance(cpu); | ||
1760 | intel_pstate_adjust_pstate(cpu, target_pstate); | ||
1761 | } | ||
1762 | } | ||
1763 | |||
1764 | static void intel_pstate_update_util(struct update_util_data *data, u64 time, | 1522 | static void intel_pstate_update_util(struct update_util_data *data, u64 time, |
1765 | unsigned int flags) | 1523 | unsigned int flags) |
1766 | { | 1524 | { |
@@ -1773,6 +1531,15 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, | |||
1773 | 1531 | ||
1774 | if (flags & SCHED_CPUFREQ_IOWAIT) { | 1532 | if (flags & SCHED_CPUFREQ_IOWAIT) { |
1775 | cpu->iowait_boost = int_tofp(1); | 1533 | cpu->iowait_boost = int_tofp(1); |
1534 | cpu->last_update = time; | ||
1535 | /* | ||
1536 | * The last time the busy was 100% so P-state was max anyway | ||
1537 | * so avoid overhead of computation. | ||
1538 | */ | ||
1539 | if (fp_toint(cpu->sample.busy_scaled) == 100) | ||
1540 | return; | ||
1541 | |||
1542 | goto set_pstate; | ||
1776 | } else if (cpu->iowait_boost) { | 1543 | } else if (cpu->iowait_boost) { |
1777 | /* Clear iowait_boost if the CPU may have been idle. */ | 1544 | /* Clear iowait_boost if the CPU may have been idle. */ |
1778 | delta_ns = time - cpu->last_update; | 1545 | delta_ns = time - cpu->last_update; |
@@ -1781,15 +1548,12 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, | |||
1781 | } | 1548 | } |
1782 | cpu->last_update = time; | 1549 | cpu->last_update = time; |
1783 | delta_ns = time - cpu->sample.time; | 1550 | delta_ns = time - cpu->sample.time; |
1784 | if ((s64)delta_ns < INTEL_PSTATE_DEFAULT_SAMPLING_INTERVAL) | 1551 | if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL) |
1785 | return; | 1552 | return; |
1786 | 1553 | ||
1787 | if (intel_pstate_sample(cpu, time)) { | 1554 | set_pstate: |
1788 | int target_pstate; | 1555 | if (intel_pstate_sample(cpu, time)) |
1789 | 1556 | intel_pstate_adjust_pstate(cpu); | |
1790 | target_pstate = get_target_pstate_use_cpu_load(cpu); | ||
1791 | intel_pstate_adjust_pstate(cpu, target_pstate); | ||
1792 | } | ||
1793 | } | 1557 | } |
1794 | 1558 | ||
1795 | static struct pstate_funcs core_funcs = { | 1559 | static struct pstate_funcs core_funcs = { |
@@ -1799,7 +1563,6 @@ static struct pstate_funcs core_funcs = { | |||
1799 | .get_turbo = core_get_turbo_pstate, | 1563 | .get_turbo = core_get_turbo_pstate, |
1800 | .get_scaling = core_get_scaling, | 1564 | .get_scaling = core_get_scaling, |
1801 | .get_val = core_get_val, | 1565 | .get_val = core_get_val, |
1802 | .update_util = intel_pstate_update_util_pid, | ||
1803 | }; | 1566 | }; |
1804 | 1567 | ||
1805 | static const struct pstate_funcs silvermont_funcs = { | 1568 | static const struct pstate_funcs silvermont_funcs = { |
@@ -1810,7 +1573,6 @@ static const struct pstate_funcs silvermont_funcs = { | |||
1810 | .get_val = atom_get_val, | 1573 | .get_val = atom_get_val, |
1811 | .get_scaling = silvermont_get_scaling, | 1574 | .get_scaling = silvermont_get_scaling, |
1812 | .get_vid = atom_get_vid, | 1575 | .get_vid = atom_get_vid, |
1813 | .update_util = intel_pstate_update_util, | ||
1814 | }; | 1576 | }; |
1815 | 1577 | ||
1816 | static const struct pstate_funcs airmont_funcs = { | 1578 | static const struct pstate_funcs airmont_funcs = { |
@@ -1821,7 +1583,6 @@ static const struct pstate_funcs airmont_funcs = { | |||
1821 | .get_val = atom_get_val, | 1583 | .get_val = atom_get_val, |
1822 | .get_scaling = airmont_get_scaling, | 1584 | .get_scaling = airmont_get_scaling, |
1823 | .get_vid = atom_get_vid, | 1585 | .get_vid = atom_get_vid, |
1824 | .update_util = intel_pstate_update_util, | ||
1825 | }; | 1586 | }; |
1826 | 1587 | ||
1827 | static const struct pstate_funcs knl_funcs = { | 1588 | static const struct pstate_funcs knl_funcs = { |
@@ -1832,7 +1593,6 @@ static const struct pstate_funcs knl_funcs = { | |||
1832 | .get_aperf_mperf_shift = knl_get_aperf_mperf_shift, | 1593 | .get_aperf_mperf_shift = knl_get_aperf_mperf_shift, |
1833 | .get_scaling = core_get_scaling, | 1594 | .get_scaling = core_get_scaling, |
1834 | .get_val = core_get_val, | 1595 | .get_val = core_get_val, |
1835 | .update_util = intel_pstate_update_util_pid, | ||
1836 | }; | 1596 | }; |
1837 | 1597 | ||
1838 | static const struct pstate_funcs bxt_funcs = { | 1598 | static const struct pstate_funcs bxt_funcs = { |
@@ -1842,7 +1602,6 @@ static const struct pstate_funcs bxt_funcs = { | |||
1842 | .get_turbo = core_get_turbo_pstate, | 1602 | .get_turbo = core_get_turbo_pstate, |
1843 | .get_scaling = core_get_scaling, | 1603 | .get_scaling = core_get_scaling, |
1844 | .get_val = core_get_val, | 1604 | .get_val = core_get_val, |
1845 | .update_util = intel_pstate_update_util, | ||
1846 | }; | 1605 | }; |
1847 | 1606 | ||
1848 | #define ICPU(model, policy) \ | 1607 | #define ICPU(model, policy) \ |
@@ -1886,8 +1645,6 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { | |||
1886 | {} | 1645 | {} |
1887 | }; | 1646 | }; |
1888 | 1647 | ||
1889 | static bool pid_in_use(void); | ||
1890 | |||
1891 | static int intel_pstate_init_cpu(unsigned int cpunum) | 1648 | static int intel_pstate_init_cpu(unsigned int cpunum) |
1892 | { | 1649 | { |
1893 | struct cpudata *cpu; | 1650 | struct cpudata *cpu; |
@@ -1918,8 +1675,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) | |||
1918 | intel_pstate_disable_ee(cpunum); | 1675 | intel_pstate_disable_ee(cpunum); |
1919 | 1676 | ||
1920 | intel_pstate_hwp_enable(cpu); | 1677 | intel_pstate_hwp_enable(cpu); |
1921 | } else if (pid_in_use()) { | ||
1922 | intel_pstate_pid_reset(cpu); | ||
1923 | } | 1678 | } |
1924 | 1679 | ||
1925 | intel_pstate_get_cpu_pstates(cpu); | 1680 | intel_pstate_get_cpu_pstates(cpu); |
@@ -1942,7 +1697,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) | |||
1942 | /* Prevent intel_pstate_update_util() from using stale data. */ | 1697 | /* Prevent intel_pstate_update_util() from using stale data. */ |
1943 | cpu->sample.time = 0; | 1698 | cpu->sample.time = 0; |
1944 | cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, | 1699 | cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, |
1945 | pstate_funcs.update_util); | 1700 | intel_pstate_update_util); |
1946 | cpu->update_util_set = true; | 1701 | cpu->update_util_set = true; |
1947 | } | 1702 | } |
1948 | 1703 | ||
@@ -2267,12 +2022,6 @@ static struct cpufreq_driver intel_cpufreq = { | |||
2267 | 2022 | ||
2268 | static struct cpufreq_driver *default_driver = &intel_pstate; | 2023 | static struct cpufreq_driver *default_driver = &intel_pstate; |
2269 | 2024 | ||
2270 | static bool pid_in_use(void) | ||
2271 | { | ||
2272 | return intel_pstate_driver == &intel_pstate && | ||
2273 | pstate_funcs.update_util == intel_pstate_update_util_pid; | ||
2274 | } | ||
2275 | |||
2276 | static void intel_pstate_driver_cleanup(void) | 2025 | static void intel_pstate_driver_cleanup(void) |
2277 | { | 2026 | { |
2278 | unsigned int cpu; | 2027 | unsigned int cpu; |
@@ -2307,9 +2056,6 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver) | |||
2307 | 2056 | ||
2308 | global.min_perf_pct = min_perf_pct_min(); | 2057 | global.min_perf_pct = min_perf_pct_min(); |
2309 | 2058 | ||
2310 | if (pid_in_use()) | ||
2311 | intel_pstate_debug_expose_params(); | ||
2312 | |||
2313 | return 0; | 2059 | return 0; |
2314 | } | 2060 | } |
2315 | 2061 | ||
@@ -2318,9 +2064,6 @@ static int intel_pstate_unregister_driver(void) | |||
2318 | if (hwp_active) | 2064 | if (hwp_active) |
2319 | return -EBUSY; | 2065 | return -EBUSY; |
2320 | 2066 | ||
2321 | if (pid_in_use()) | ||
2322 | intel_pstate_debug_hide_params(); | ||
2323 | |||
2324 | cpufreq_unregister_driver(intel_pstate_driver); | 2067 | cpufreq_unregister_driver(intel_pstate_driver); |
2325 | intel_pstate_driver_cleanup(); | 2068 | intel_pstate_driver_cleanup(); |
2326 | 2069 | ||
@@ -2388,24 +2131,6 @@ static int __init intel_pstate_msrs_not_valid(void) | |||
2388 | return 0; | 2131 | return 0; |
2389 | } | 2132 | } |
2390 | 2133 | ||
2391 | #ifdef CONFIG_ACPI | ||
2392 | static void intel_pstate_use_acpi_profile(void) | ||
2393 | { | ||
2394 | switch (acpi_gbl_FADT.preferred_profile) { | ||
2395 | case PM_MOBILE: | ||
2396 | case PM_TABLET: | ||
2397 | case PM_APPLIANCE_PC: | ||
2398 | case PM_DESKTOP: | ||
2399 | case PM_WORKSTATION: | ||
2400 | pstate_funcs.update_util = intel_pstate_update_util; | ||
2401 | } | ||
2402 | } | ||
2403 | #else | ||
2404 | static void intel_pstate_use_acpi_profile(void) | ||
2405 | { | ||
2406 | } | ||
2407 | #endif | ||
2408 | |||
2409 | static void __init copy_cpu_funcs(struct pstate_funcs *funcs) | 2134 | static void __init copy_cpu_funcs(struct pstate_funcs *funcs) |
2410 | { | 2135 | { |
2411 | pstate_funcs.get_max = funcs->get_max; | 2136 | pstate_funcs.get_max = funcs->get_max; |
@@ -2415,10 +2140,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs) | |||
2415 | pstate_funcs.get_scaling = funcs->get_scaling; | 2140 | pstate_funcs.get_scaling = funcs->get_scaling; |
2416 | pstate_funcs.get_val = funcs->get_val; | 2141 | pstate_funcs.get_val = funcs->get_val; |
2417 | pstate_funcs.get_vid = funcs->get_vid; | 2142 | pstate_funcs.get_vid = funcs->get_vid; |
2418 | pstate_funcs.update_util = funcs->update_util; | ||
2419 | pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift; | 2143 | pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift; |
2420 | |||
2421 | intel_pstate_use_acpi_profile(); | ||
2422 | } | 2144 | } |
2423 | 2145 | ||
2424 | #ifdef CONFIG_ACPI | 2146 | #ifdef CONFIG_ACPI |
@@ -2562,9 +2284,7 @@ static int __init intel_pstate_init(void) | |||
2562 | 2284 | ||
2563 | if (x86_match_cpu(hwp_support_ids)) { | 2285 | if (x86_match_cpu(hwp_support_ids)) { |
2564 | copy_cpu_funcs(&core_funcs); | 2286 | copy_cpu_funcs(&core_funcs); |
2565 | if (no_hwp) { | 2287 | if (!no_hwp) { |
2566 | pstate_funcs.update_util = intel_pstate_update_util; | ||
2567 | } else { | ||
2568 | hwp_active++; | 2288 | hwp_active++; |
2569 | intel_pstate.attr = hwp_cpufreq_attrs; | 2289 | intel_pstate.attr = hwp_cpufreq_attrs; |
2570 | goto hwp_cpu_matched; | 2290 | goto hwp_cpu_matched; |