diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 9 | ||||
-rw-r--r-- | MAINTAINERS | 6 | ||||
-rw-r--r-- | drivers/base/power/opp.c | 74 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 38 | ||||
-rw-r--r-- | tools/power/cpupower/utils/cpuidle-info.c | 8 |
5 files changed, 97 insertions, 38 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index eacb2e0397ae..10b8cc1bda8d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1446,6 +1446,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1446 | disable | 1446 | disable |
1447 | Do not enable intel_pstate as the default | 1447 | Do not enable intel_pstate as the default |
1448 | scaling driver for the supported processors | 1448 | scaling driver for the supported processors |
1449 | force | ||
1450 | Enable intel_pstate on systems that prohibit it by default | ||
1451 | in favor of acpi-cpufreq. Forcing the intel_pstate driver | ||
1452 | instead of acpi-cpufreq may disable platform features, such | ||
1453 | as thermal controls and power capping, that rely on ACPI | ||
1454 | P-States information being indicated to OSPM and therefore | ||
1455 | should be used with caution. This option does not work with | ||
1456 | processors that aren't supported by the intel_pstate driver | ||
1457 | or on platforms that use pcc-cpufreq instead of acpi-cpufreq. | ||
1449 | no_hwp | 1458 | no_hwp |
1450 | Do not enable hardware P state control (HWP) | 1459 | Do not enable hardware P state control (HWP) |
1451 | if available. | 1460 | if available. |
diff --git a/MAINTAINERS b/MAINTAINERS index c8927bc7748e..1c63cfecc6bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4869,6 +4869,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git | |||
4869 | S: Supported | 4869 | S: Supported |
4870 | F: drivers/idle/intel_idle.c | 4870 | F: drivers/idle/intel_idle.c |
4871 | 4871 | ||
4872 | INTEL PSTATE DRIVER | ||
4873 | M: Kristen Carlson Accardi <kristen@linux.intel.com> | ||
4874 | L: linux-pm@vger.kernel.org | ||
4875 | S: Supported | ||
4876 | F: drivers/cpufreq/intel_pstate.c | ||
4877 | |||
4872 | INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) | 4878 | INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) |
4873 | M: Maik Broemme <mbroemme@plusserver.de> | 4879 | M: Maik Broemme <mbroemme@plusserver.de> |
4874 | L: linux-fbdev@vger.kernel.org | 4880 | L: linux-fbdev@vger.kernel.org |
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 1bbef8e838e7..d24dd614a0bd 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c | |||
@@ -84,7 +84,11 @@ struct dev_pm_opp { | |||
84 | * | 84 | * |
85 | * This is an internal data structure maintaining the link to opps attached to | 85 | * This is an internal data structure maintaining the link to opps attached to |
86 | * a device. This structure is not meant to be shared to users as it is | 86 | * a device. This structure is not meant to be shared to users as it is |
87 | * meant for book keeping and private to OPP library | 87 | * meant for book keeping and private to OPP library. |
88 | * | ||
89 | * Because the opp structures can be used from both rcu and srcu readers, we | ||
90 | * need to wait for the grace period of both of them before freeing any | ||
91 | * resources. And so we have used kfree_rcu() from within call_srcu() handlers. | ||
88 | */ | 92 | */ |
89 | struct device_opp { | 93 | struct device_opp { |
90 | struct list_head node; | 94 | struct list_head node; |
@@ -382,12 +386,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, | |||
382 | } | 386 | } |
383 | EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); | 387 | EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); |
384 | 388 | ||
389 | static struct device_opp *add_device_opp(struct device *dev) | ||
390 | { | ||
391 | struct device_opp *dev_opp; | ||
392 | |||
393 | /* | ||
394 | * Allocate a new device OPP table. In the infrequent case where a new | ||
395 | * device is needed to be added, we pay this penalty. | ||
396 | */ | ||
397 | dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL); | ||
398 | if (!dev_opp) | ||
399 | return NULL; | ||
400 | |||
401 | dev_opp->dev = dev; | ||
402 | srcu_init_notifier_head(&dev_opp->srcu_head); | ||
403 | INIT_LIST_HEAD(&dev_opp->opp_list); | ||
404 | |||
405 | /* Secure the device list modification */ | ||
406 | list_add_rcu(&dev_opp->node, &dev_opp_list); | ||
407 | return dev_opp; | ||
408 | } | ||
409 | |||
385 | static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, | 410 | static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, |
386 | unsigned long u_volt, bool dynamic) | 411 | unsigned long u_volt, bool dynamic) |
387 | { | 412 | { |
388 | struct device_opp *dev_opp = NULL; | 413 | struct device_opp *dev_opp = NULL; |
389 | struct dev_pm_opp *opp, *new_opp; | 414 | struct dev_pm_opp *opp, *new_opp; |
390 | struct list_head *head; | 415 | struct list_head *head; |
416 | int ret; | ||
391 | 417 | ||
392 | /* allocate new OPP node */ | 418 | /* allocate new OPP node */ |
393 | new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL); | 419 | new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL); |
@@ -408,27 +434,12 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, | |||
408 | /* Check for existing list for 'dev' */ | 434 | /* Check for existing list for 'dev' */ |
409 | dev_opp = find_device_opp(dev); | 435 | dev_opp = find_device_opp(dev); |
410 | if (IS_ERR(dev_opp)) { | 436 | if (IS_ERR(dev_opp)) { |
411 | /* | 437 | dev_opp = add_device_opp(dev); |
412 | * Allocate a new device OPP table. In the infrequent case | ||
413 | * where a new device is needed to be added, we pay this | ||
414 | * penalty. | ||
415 | */ | ||
416 | dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL); | ||
417 | if (!dev_opp) { | 438 | if (!dev_opp) { |
418 | mutex_unlock(&dev_opp_list_lock); | 439 | ret = -ENOMEM; |
419 | kfree(new_opp); | 440 | goto free_opp; |
420 | dev_warn(dev, | ||
421 | "%s: Unable to create device OPP structure\n", | ||
422 | __func__); | ||
423 | return -ENOMEM; | ||
424 | } | 441 | } |
425 | 442 | ||
426 | dev_opp->dev = dev; | ||
427 | srcu_init_notifier_head(&dev_opp->srcu_head); | ||
428 | INIT_LIST_HEAD(&dev_opp->opp_list); | ||
429 | |||
430 | /* Secure the device list modification */ | ||
431 | list_add_rcu(&dev_opp->node, &dev_opp_list); | ||
432 | head = &dev_opp->opp_list; | 443 | head = &dev_opp->opp_list; |
433 | goto list_add; | 444 | goto list_add; |
434 | } | 445 | } |
@@ -447,15 +458,13 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, | |||
447 | 458 | ||
448 | /* Duplicate OPPs ? */ | 459 | /* Duplicate OPPs ? */ |
449 | if (new_opp->rate == opp->rate) { | 460 | if (new_opp->rate == opp->rate) { |
450 | int ret = opp->available && new_opp->u_volt == opp->u_volt ? | 461 | ret = opp->available && new_opp->u_volt == opp->u_volt ? |
451 | 0 : -EEXIST; | 462 | 0 : -EEXIST; |
452 | 463 | ||
453 | dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n", | 464 | dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n", |
454 | __func__, opp->rate, opp->u_volt, opp->available, | 465 | __func__, opp->rate, opp->u_volt, opp->available, |
455 | new_opp->rate, new_opp->u_volt, new_opp->available); | 466 | new_opp->rate, new_opp->u_volt, new_opp->available); |
456 | mutex_unlock(&dev_opp_list_lock); | 467 | goto free_opp; |
457 | kfree(new_opp); | ||
458 | return ret; | ||
459 | } | 468 | } |
460 | 469 | ||
461 | list_add: | 470 | list_add: |
@@ -469,6 +478,11 @@ list_add: | |||
469 | */ | 478 | */ |
470 | srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); | 479 | srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); |
471 | return 0; | 480 | return 0; |
481 | |||
482 | free_opp: | ||
483 | mutex_unlock(&dev_opp_list_lock); | ||
484 | kfree(new_opp); | ||
485 | return ret; | ||
472 | } | 486 | } |
473 | 487 | ||
474 | /** | 488 | /** |
@@ -511,10 +525,11 @@ static void kfree_device_rcu(struct rcu_head *head) | |||
511 | { | 525 | { |
512 | struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); | 526 | struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); |
513 | 527 | ||
514 | kfree(device_opp); | 528 | kfree_rcu(device_opp, rcu_head); |
515 | } | 529 | } |
516 | 530 | ||
517 | void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp) | 531 | static void __dev_pm_opp_remove(struct device_opp *dev_opp, |
532 | struct dev_pm_opp *opp) | ||
518 | { | 533 | { |
519 | /* | 534 | /* |
520 | * Notify the changes in the availability of the operable | 535 | * Notify the changes in the availability of the operable |
@@ -592,7 +607,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove); | |||
592 | static int opp_set_availability(struct device *dev, unsigned long freq, | 607 | static int opp_set_availability(struct device *dev, unsigned long freq, |
593 | bool availability_req) | 608 | bool availability_req) |
594 | { | 609 | { |
595 | struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV); | 610 | struct device_opp *dev_opp; |
596 | struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); | 611 | struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); |
597 | int r = 0; | 612 | int r = 0; |
598 | 613 | ||
@@ -606,12 +621,7 @@ static int opp_set_availability(struct device *dev, unsigned long freq, | |||
606 | mutex_lock(&dev_opp_list_lock); | 621 | mutex_lock(&dev_opp_list_lock); |
607 | 622 | ||
608 | /* Find the device_opp */ | 623 | /* Find the device_opp */ |
609 | list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) { | 624 | dev_opp = find_device_opp(dev); |
610 | if (dev == tmp_dev_opp->dev) { | ||
611 | dev_opp = tmp_dev_opp; | ||
612 | break; | ||
613 | } | ||
614 | } | ||
615 | if (IS_ERR(dev_opp)) { | 625 | if (IS_ERR(dev_opp)) { |
616 | r = PTR_ERR(dev_opp); | 626 | r = PTR_ERR(dev_opp); |
617 | dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); | 627 | dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); |
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1405b393c93d..742eefba12c2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -199,7 +199,14 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) | |||
199 | 199 | ||
200 | pid->integral += fp_error; | 200 | pid->integral += fp_error; |
201 | 201 | ||
202 | /* limit the integral term */ | 202 | /* |
203 | * We limit the integral here so that it will never | ||
204 | * get higher than 30. This prevents it from becoming | ||
205 | * too large an input over long periods of time and allows | ||
206 | * it to get factored out sooner. | ||
207 | * | ||
208 | * The value of 30 was chosen through experimentation. | ||
209 | */ | ||
203 | integral_limit = int_tofp(30); | 210 | integral_limit = int_tofp(30); |
204 | if (pid->integral > integral_limit) | 211 | if (pid->integral > integral_limit) |
205 | pid->integral = integral_limit; | 212 | pid->integral = integral_limit; |
@@ -616,6 +623,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) | |||
616 | if (limits.no_turbo || limits.turbo_disabled) | 623 | if (limits.no_turbo || limits.turbo_disabled) |
617 | max_perf = cpu->pstate.max_pstate; | 624 | max_perf = cpu->pstate.max_pstate; |
618 | 625 | ||
626 | /* | ||
627 | * performance can be limited by user through sysfs, by cpufreq | ||
628 | * policy, or by cpu specific default values determined through | ||
629 | * experimentation. | ||
630 | */ | ||
619 | max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); | 631 | max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); |
620 | *max = clamp_t(int, max_perf_adj, | 632 | *max = clamp_t(int, max_perf_adj, |
621 | cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); | 633 | cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); |
@@ -717,11 +729,29 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) | |||
717 | u32 duration_us; | 729 | u32 duration_us; |
718 | u32 sample_time; | 730 | u32 sample_time; |
719 | 731 | ||
732 | /* | ||
733 | * core_busy is the ratio of actual performance to max | ||
734 | * max_pstate is the max non turbo pstate available | ||
735 | * current_pstate was the pstate that was requested during | ||
736 | * the last sample period. | ||
737 | * | ||
738 | * We normalize core_busy, which was our actual percent | ||
739 | * performance to what we requested during the last sample | ||
740 | * period. The result will be a percentage of busy at a | ||
741 | * specified pstate. | ||
742 | */ | ||
720 | core_busy = cpu->sample.core_pct_busy; | 743 | core_busy = cpu->sample.core_pct_busy; |
721 | max_pstate = int_tofp(cpu->pstate.max_pstate); | 744 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
722 | current_pstate = int_tofp(cpu->pstate.current_pstate); | 745 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
723 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); | 746 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
724 | 747 | ||
748 | /* | ||
749 | * Since we have a deferred timer, it will not fire unless | ||
750 | * we are in C0. So, determine if the actual elapsed time | ||
751 | * is significantly greater (3x) than our sample interval. If it | ||
752 | * is, then we were idle for a long enough period of time | ||
753 | * to adjust our busyness. | ||
754 | */ | ||
725 | sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; | 755 | sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; |
726 | duration_us = (u32) ktime_us_delta(cpu->sample.time, | 756 | duration_us = (u32) ktime_us_delta(cpu->sample.time, |
727 | cpu->last_sample_time); | 757 | cpu->last_sample_time); |
@@ -948,6 +978,7 @@ static struct cpufreq_driver intel_pstate_driver = { | |||
948 | 978 | ||
949 | static int __initdata no_load; | 979 | static int __initdata no_load; |
950 | static int __initdata no_hwp; | 980 | static int __initdata no_hwp; |
981 | static unsigned int force_load; | ||
951 | 982 | ||
952 | static int intel_pstate_msrs_not_valid(void) | 983 | static int intel_pstate_msrs_not_valid(void) |
953 | { | 984 | { |
@@ -1094,7 +1125,8 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void) | |||
1094 | case PSS: | 1125 | case PSS: |
1095 | return intel_pstate_no_acpi_pss(); | 1126 | return intel_pstate_no_acpi_pss(); |
1096 | case PPC: | 1127 | case PPC: |
1097 | return intel_pstate_has_acpi_ppc(); | 1128 | return intel_pstate_has_acpi_ppc() && |
1129 | (!force_load); | ||
1098 | } | 1130 | } |
1099 | } | 1131 | } |
1100 | 1132 | ||
@@ -1175,6 +1207,8 @@ static int __init intel_pstate_setup(char *str) | |||
1175 | no_load = 1; | 1207 | no_load = 1; |
1176 | if (!strcmp(str, "no_hwp")) | 1208 | if (!strcmp(str, "no_hwp")) |
1177 | no_hwp = 1; | 1209 | no_hwp = 1; |
1210 | if (!strcmp(str, "force")) | ||
1211 | force_load = 1; | ||
1178 | return 0; | 1212 | return 0; |
1179 | } | 1213 | } |
1180 | early_param("intel_pstate", intel_pstate_setup); | 1214 | early_param("intel_pstate", intel_pstate_setup); |
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 458d69b444ad..75e66de7e7a7 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c | |||
@@ -22,13 +22,13 @@ | |||
22 | 22 | ||
23 | static void cpuidle_cpu_output(unsigned int cpu, int verbose) | 23 | static void cpuidle_cpu_output(unsigned int cpu, int verbose) |
24 | { | 24 | { |
25 | int idlestates, idlestate; | 25 | unsigned int idlestates, idlestate; |
26 | char *tmp; | 26 | char *tmp; |
27 | 27 | ||
28 | printf(_ ("Analyzing CPU %d:\n"), cpu); | 28 | printf(_ ("Analyzing CPU %d:\n"), cpu); |
29 | 29 | ||
30 | idlestates = sysfs_get_idlestate_count(cpu); | 30 | idlestates = sysfs_get_idlestate_count(cpu); |
31 | if (idlestates < 1) { | 31 | if (idlestates == 0) { |
32 | printf(_("CPU %u: No idle states\n"), cpu); | 32 | printf(_("CPU %u: No idle states\n"), cpu); |
33 | return; | 33 | return; |
34 | } | 34 | } |
@@ -100,10 +100,10 @@ static void cpuidle_general_output(void) | |||
100 | static void proc_cpuidle_cpu_output(unsigned int cpu) | 100 | static void proc_cpuidle_cpu_output(unsigned int cpu) |
101 | { | 101 | { |
102 | long max_allowed_cstate = 2000000000; | 102 | long max_allowed_cstate = 2000000000; |
103 | int cstate, cstates; | 103 | unsigned int cstate, cstates; |
104 | 104 | ||
105 | cstates = sysfs_get_idlestate_count(cpu); | 105 | cstates = sysfs_get_idlestate_count(cpu); |
106 | if (cstates < 1) { | 106 | if (cstates == 0) { |
107 | printf(_("CPU %u: No C-states info\n"), cpu); | 107 | printf(_("CPU %u: No C-states info\n"), cpu); |
108 | return; | 108 | return; |
109 | } | 109 | } |