aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--MAINTAINERS6
-rw-r--r--drivers/base/power/opp.c74
-rw-r--r--drivers/cpufreq/intel_pstate.c38
-rw-r--r--tools/power/cpupower/utils/cpuidle-info.c8
5 files changed, 97 insertions, 38 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index eacb2e0397ae..10b8cc1bda8d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1446,6 +1446,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1446 disable 1446 disable
1447 Do not enable intel_pstate as the default 1447 Do not enable intel_pstate as the default
1448 scaling driver for the supported processors 1448 scaling driver for the supported processors
1449 force
1450 Enable intel_pstate on systems that prohibit it by default
1451 in favor of acpi-cpufreq. Forcing the intel_pstate driver
1452 instead of acpi-cpufreq may disable platform features, such
1453 as thermal controls and power capping, that rely on ACPI
1454 P-States information being indicated to OSPM and therefore
1455 should be used with caution. This option does not work with
1456 processors that aren't supported by the intel_pstate driver
1457 or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
1449 no_hwp 1458 no_hwp
1450 Do not enable hardware P state control (HWP) 1459 Do not enable hardware P state control (HWP)
1451 if available. 1460 if available.
diff --git a/MAINTAINERS b/MAINTAINERS
index c8927bc7748e..1c63cfecc6bf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4869,6 +4869,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
4869S: Supported 4869S: Supported
4870F: drivers/idle/intel_idle.c 4870F: drivers/idle/intel_idle.c
4871 4871
4872INTEL PSTATE DRIVER
4873M: Kristen Carlson Accardi <kristen@linux.intel.com>
4874L: linux-pm@vger.kernel.org
4875S: Supported
4876F: drivers/cpufreq/intel_pstate.c
4877
4872INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) 4878INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
4873M: Maik Broemme <mbroemme@plusserver.de> 4879M: Maik Broemme <mbroemme@plusserver.de>
4874L: linux-fbdev@vger.kernel.org 4880L: linux-fbdev@vger.kernel.org
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 1bbef8e838e7..d24dd614a0bd 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -84,7 +84,11 @@ struct dev_pm_opp {
84 * 84 *
85 * This is an internal data structure maintaining the link to opps attached to 85 * This is an internal data structure maintaining the link to opps attached to
86 * a device. This structure is not meant to be shared to users as it is 86 * a device. This structure is not meant to be shared to users as it is
87 * meant for book keeping and private to OPP library 87 * meant for book keeping and private to OPP library.
88 *
89 * Because the opp structures can be used from both rcu and srcu readers, we
90 * need to wait for the grace period of both of them before freeing any
91 * resources. And so we have used kfree_rcu() from within call_srcu() handlers.
88 */ 92 */
89struct device_opp { 93struct device_opp {
90 struct list_head node; 94 struct list_head node;
@@ -382,12 +386,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
382} 386}
383EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); 387EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
384 388
389static struct device_opp *add_device_opp(struct device *dev)
390{
391 struct device_opp *dev_opp;
392
393 /*
394 * Allocate a new device OPP table. In the infrequent case where a new
395 * device is needed to be added, we pay this penalty.
396 */
397 dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
398 if (!dev_opp)
399 return NULL;
400
401 dev_opp->dev = dev;
402 srcu_init_notifier_head(&dev_opp->srcu_head);
403 INIT_LIST_HEAD(&dev_opp->opp_list);
404
405 /* Secure the device list modification */
406 list_add_rcu(&dev_opp->node, &dev_opp_list);
407 return dev_opp;
408}
409
385static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, 410static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
386 unsigned long u_volt, bool dynamic) 411 unsigned long u_volt, bool dynamic)
387{ 412{
388 struct device_opp *dev_opp = NULL; 413 struct device_opp *dev_opp = NULL;
389 struct dev_pm_opp *opp, *new_opp; 414 struct dev_pm_opp *opp, *new_opp;
390 struct list_head *head; 415 struct list_head *head;
416 int ret;
391 417
392 /* allocate new OPP node */ 418 /* allocate new OPP node */
393 new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL); 419 new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
@@ -408,27 +434,12 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
408 /* Check for existing list for 'dev' */ 434 /* Check for existing list for 'dev' */
409 dev_opp = find_device_opp(dev); 435 dev_opp = find_device_opp(dev);
410 if (IS_ERR(dev_opp)) { 436 if (IS_ERR(dev_opp)) {
411 /* 437 dev_opp = add_device_opp(dev);
412 * Allocate a new device OPP table. In the infrequent case
413 * where a new device is needed to be added, we pay this
414 * penalty.
415 */
416 dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
417 if (!dev_opp) { 438 if (!dev_opp) {
418 mutex_unlock(&dev_opp_list_lock); 439 ret = -ENOMEM;
419 kfree(new_opp); 440 goto free_opp;
420 dev_warn(dev,
421 "%s: Unable to create device OPP structure\n",
422 __func__);
423 return -ENOMEM;
424 } 441 }
425 442
426 dev_opp->dev = dev;
427 srcu_init_notifier_head(&dev_opp->srcu_head);
428 INIT_LIST_HEAD(&dev_opp->opp_list);
429
430 /* Secure the device list modification */
431 list_add_rcu(&dev_opp->node, &dev_opp_list);
432 head = &dev_opp->opp_list; 443 head = &dev_opp->opp_list;
433 goto list_add; 444 goto list_add;
434 } 445 }
@@ -447,15 +458,13 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
447 458
448 /* Duplicate OPPs ? */ 459 /* Duplicate OPPs ? */
449 if (new_opp->rate == opp->rate) { 460 if (new_opp->rate == opp->rate) {
450 int ret = opp->available && new_opp->u_volt == opp->u_volt ? 461 ret = opp->available && new_opp->u_volt == opp->u_volt ?
451 0 : -EEXIST; 462 0 : -EEXIST;
452 463
453 dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n", 464 dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
454 __func__, opp->rate, opp->u_volt, opp->available, 465 __func__, opp->rate, opp->u_volt, opp->available,
455 new_opp->rate, new_opp->u_volt, new_opp->available); 466 new_opp->rate, new_opp->u_volt, new_opp->available);
456 mutex_unlock(&dev_opp_list_lock); 467 goto free_opp;
457 kfree(new_opp);
458 return ret;
459 } 468 }
460 469
461list_add: 470list_add:
@@ -469,6 +478,11 @@ list_add:
469 */ 478 */
470 srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); 479 srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
471 return 0; 480 return 0;
481
482free_opp:
483 mutex_unlock(&dev_opp_list_lock);
484 kfree(new_opp);
485 return ret;
472} 486}
473 487
474/** 488/**
@@ -511,10 +525,11 @@ static void kfree_device_rcu(struct rcu_head *head)
511{ 525{
512 struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); 526 struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
513 527
514 kfree(device_opp); 528 kfree_rcu(device_opp, rcu_head);
515} 529}
516 530
517void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp) 531static void __dev_pm_opp_remove(struct device_opp *dev_opp,
532 struct dev_pm_opp *opp)
518{ 533{
519 /* 534 /*
520 * Notify the changes in the availability of the operable 535 * Notify the changes in the availability of the operable
@@ -592,7 +607,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
592static int opp_set_availability(struct device *dev, unsigned long freq, 607static int opp_set_availability(struct device *dev, unsigned long freq,
593 bool availability_req) 608 bool availability_req)
594{ 609{
595 struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV); 610 struct device_opp *dev_opp;
596 struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); 611 struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
597 int r = 0; 612 int r = 0;
598 613
@@ -606,12 +621,7 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
606 mutex_lock(&dev_opp_list_lock); 621 mutex_lock(&dev_opp_list_lock);
607 622
608 /* Find the device_opp */ 623 /* Find the device_opp */
609 list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) { 624 dev_opp = find_device_opp(dev);
610 if (dev == tmp_dev_opp->dev) {
611 dev_opp = tmp_dev_opp;
612 break;
613 }
614 }
615 if (IS_ERR(dev_opp)) { 625 if (IS_ERR(dev_opp)) {
616 r = PTR_ERR(dev_opp); 626 r = PTR_ERR(dev_opp);
617 dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); 627 dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1405b393c93d..742eefba12c2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -199,7 +199,14 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
199 199
200 pid->integral += fp_error; 200 pid->integral += fp_error;
201 201
202 /* limit the integral term */ 202 /*
203 * We limit the integral here so that it will never
204 * get higher than 30. This prevents it from becoming
205 * too large an input over long periods of time and allows
206 * it to get factored out sooner.
207 *
208 * The value of 30 was chosen through experimentation.
209 */
203 integral_limit = int_tofp(30); 210 integral_limit = int_tofp(30);
204 if (pid->integral > integral_limit) 211 if (pid->integral > integral_limit)
205 pid->integral = integral_limit; 212 pid->integral = integral_limit;
@@ -616,6 +623,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
616 if (limits.no_turbo || limits.turbo_disabled) 623 if (limits.no_turbo || limits.turbo_disabled)
617 max_perf = cpu->pstate.max_pstate; 624 max_perf = cpu->pstate.max_pstate;
618 625
626 /*
627 * performance can be limited by user through sysfs, by cpufreq
628 * policy, or by cpu specific default values determined through
629 * experimentation.
630 */
619 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 631 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
620 *max = clamp_t(int, max_perf_adj, 632 *max = clamp_t(int, max_perf_adj,
621 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 633 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
@@ -717,11 +729,29 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
717 u32 duration_us; 729 u32 duration_us;
718 u32 sample_time; 730 u32 sample_time;
719 731
732 /*
733 * core_busy is the ratio of actual performance to max
734 * max_pstate is the max non turbo pstate available
735 * current_pstate was the pstate that was requested during
736 * the last sample period.
737 *
738 * We normalize core_busy, which was our actual percent
739 * performance to what we requested during the last sample
740 * period. The result will be a percentage of busy at a
741 * specified pstate.
742 */
720 core_busy = cpu->sample.core_pct_busy; 743 core_busy = cpu->sample.core_pct_busy;
721 max_pstate = int_tofp(cpu->pstate.max_pstate); 744 max_pstate = int_tofp(cpu->pstate.max_pstate);
722 current_pstate = int_tofp(cpu->pstate.current_pstate); 745 current_pstate = int_tofp(cpu->pstate.current_pstate);
723 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 746 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
724 747
748 /*
749 * Since we have a deferred timer, it will not fire unless
750 * we are in C0. So, determine if the actual elapsed time
751 * is significantly greater (3x) than our sample interval. If it
752 * is, then we were idle for a long enough period of time
753 * to adjust our busyness.
754 */
725 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; 755 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
726 duration_us = (u32) ktime_us_delta(cpu->sample.time, 756 duration_us = (u32) ktime_us_delta(cpu->sample.time,
727 cpu->last_sample_time); 757 cpu->last_sample_time);
@@ -948,6 +978,7 @@ static struct cpufreq_driver intel_pstate_driver = {
948 978
949static int __initdata no_load; 979static int __initdata no_load;
950static int __initdata no_hwp; 980static int __initdata no_hwp;
981static unsigned int force_load;
951 982
952static int intel_pstate_msrs_not_valid(void) 983static int intel_pstate_msrs_not_valid(void)
953{ 984{
@@ -1094,7 +1125,8 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void)
1094 case PSS: 1125 case PSS:
1095 return intel_pstate_no_acpi_pss(); 1126 return intel_pstate_no_acpi_pss();
1096 case PPC: 1127 case PPC:
1097 return intel_pstate_has_acpi_ppc(); 1128 return intel_pstate_has_acpi_ppc() &&
1129 (!force_load);
1098 } 1130 }
1099 } 1131 }
1100 1132
@@ -1175,6 +1207,8 @@ static int __init intel_pstate_setup(char *str)
1175 no_load = 1; 1207 no_load = 1;
1176 if (!strcmp(str, "no_hwp")) 1208 if (!strcmp(str, "no_hwp"))
1177 no_hwp = 1; 1209 no_hwp = 1;
1210 if (!strcmp(str, "force"))
1211 force_load = 1;
1178 return 0; 1212 return 0;
1179} 1213}
1180early_param("intel_pstate", intel_pstate_setup); 1214early_param("intel_pstate", intel_pstate_setup);
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 458d69b444ad..75e66de7e7a7 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -22,13 +22,13 @@
22 22
23static void cpuidle_cpu_output(unsigned int cpu, int verbose) 23static void cpuidle_cpu_output(unsigned int cpu, int verbose)
24{ 24{
25 int idlestates, idlestate; 25 unsigned int idlestates, idlestate;
26 char *tmp; 26 char *tmp;
27 27
28 printf(_ ("Analyzing CPU %d:\n"), cpu); 28 printf(_ ("Analyzing CPU %d:\n"), cpu);
29 29
30 idlestates = sysfs_get_idlestate_count(cpu); 30 idlestates = sysfs_get_idlestate_count(cpu);
31 if (idlestates < 1) { 31 if (idlestates == 0) {
32 printf(_("CPU %u: No idle states\n"), cpu); 32 printf(_("CPU %u: No idle states\n"), cpu);
33 return; 33 return;
34 } 34 }
@@ -100,10 +100,10 @@ static void cpuidle_general_output(void)
100static void proc_cpuidle_cpu_output(unsigned int cpu) 100static void proc_cpuidle_cpu_output(unsigned int cpu)
101{ 101{
102 long max_allowed_cstate = 2000000000; 102 long max_allowed_cstate = 2000000000;
103 int cstate, cstates; 103 unsigned int cstate, cstates;
104 104
105 cstates = sysfs_get_idlestate_count(cpu); 105 cstates = sysfs_get_idlestate_count(cpu);
106 if (cstates < 1) { 106 if (cstates == 0) {
107 printf(_("CPU %u: No C-states info\n"), cpu); 107 printf(_("CPU %u: No C-states info\n"), cpu);
108 return; 108 return;
109 } 109 }