diff options
| author | Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> | 2016-04-27 18:48:06 -0400 |
|---|---|---|
| committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-04-27 19:01:39 -0400 |
| commit | 9522a2ff9cde26ef48c30e0c9ca9ae4dfb669764 (patch) | |
| tree | 897b9d98dd831b2b6cc5d4fe0954840a9eae45d5 /drivers/cpufreq | |
| parent | eaa2c3aeef83f096cd1ec73df1310712e423337b (diff) | |
cpufreq: intel_pstate: Enforce _PPC limits
Use ACPI _PPC notification to limit max P state driver will request.
ACPI _PPC change notification is sent by BIOS to limit max P state
in several cases:
- Reduce impact of platform thermal condition
- When Config TDP feature is used, a changed _PPC is sent to
follow TDP change
- Remote node managers in server want to control platform power
via baseboard management controller (BMC)
This change registers with ACPI processor performance lib so that
_PPC changes are notified to cpufreq core, which in turns will
result in call to .setpolicy() callback. Also the way _PSS
table identifies a turbo frequency is not compatible to max turbo
frequency in intel_pstate, so the very first entry in _PSS needs
to be adjusted.
This feature can be turned on by using kernel parameters:
intel_pstate=support_acpi_ppc
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Minor cleanups ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpufreq')
| -rw-r--r-- | drivers/cpufreq/Kconfig.x86 | 1 | ||||
| -rw-r--r-- | drivers/cpufreq/intel_pstate.c | 136 |
2 files changed, 135 insertions, 2 deletions
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index c59bdcb83217..adbd1de1cea5 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | config X86_INTEL_PSTATE | 5 | config X86_INTEL_PSTATE |
| 6 | bool "Intel P state control" | 6 | bool "Intel P state control" |
| 7 | depends on X86 | 7 | depends on X86 |
| 8 | select ACPI_PROCESSOR if ACPI | ||
| 8 | help | 9 | help |
| 9 | This driver provides a P state for Intel core processors. | 10 | This driver provides a P state for Intel core processors. |
| 10 | The driver implements an internal governor and will become | 11 | The driver implements an internal governor and will become |
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cfa6a6803e0e..c72a82a45872 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
| @@ -41,6 +41,10 @@ | |||
| 41 | #define ATOM_TURBO_RATIOS 0x66c | 41 | #define ATOM_TURBO_RATIOS 0x66c |
| 42 | #define ATOM_TURBO_VIDS 0x66d | 42 | #define ATOM_TURBO_VIDS 0x66d |
| 43 | 43 | ||
| 44 | #ifdef CONFIG_ACPI | ||
| 45 | #include <acpi/processor.h> | ||
| 46 | #endif | ||
| 47 | |||
| 44 | #define FRAC_BITS 8 | 48 | #define FRAC_BITS 8 |
| 45 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) | 49 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) |
| 46 | #define fp_toint(X) ((X) >> FRAC_BITS) | 50 | #define fp_toint(X) ((X) >> FRAC_BITS) |
| @@ -174,6 +178,8 @@ struct _pid { | |||
| 174 | * @prev_cummulative_iowait: IO Wait time difference from last and | 178 | * @prev_cummulative_iowait: IO Wait time difference from last and |
| 175 | * current sample | 179 | * current sample |
| 176 | * @sample: Storage for storing last Sample data | 180 | * @sample: Storage for storing last Sample data |
| 181 | * @acpi_perf_data: Stores ACPI perf information read from _PSS | ||
| 182 | * @valid_pss_table: Set to true for valid ACPI _PSS entries found | ||
| 177 | * | 183 | * |
| 178 | * This structure stores per CPU instance data for all CPUs. | 184 | * This structure stores per CPU instance data for all CPUs. |
| 179 | */ | 185 | */ |
| @@ -192,6 +198,10 @@ struct cpudata { | |||
| 192 | u64 prev_tsc; | 198 | u64 prev_tsc; |
| 193 | u64 prev_cummulative_iowait; | 199 | u64 prev_cummulative_iowait; |
| 194 | struct sample sample; | 200 | struct sample sample; |
| 201 | #ifdef CONFIG_ACPI | ||
| 202 | struct acpi_processor_performance acpi_perf_data; | ||
| 203 | bool valid_pss_table; | ||
| 204 | #endif | ||
| 195 | }; | 205 | }; |
| 196 | 206 | ||
| 197 | static struct cpudata **all_cpu_data; | 207 | static struct cpudata **all_cpu_data; |
| @@ -260,6 +270,9 @@ static struct pstate_adjust_policy pid_params; | |||
| 260 | static struct pstate_funcs pstate_funcs; | 270 | static struct pstate_funcs pstate_funcs; |
| 261 | static int hwp_active; | 271 | static int hwp_active; |
| 262 | 272 | ||
| 273 | #ifdef CONFIG_ACPI | ||
| 274 | static bool acpi_ppc; | ||
| 275 | #endif | ||
| 263 | 276 | ||
| 264 | /** | 277 | /** |
| 265 | * struct perf_limits - Store user and policy limits | 278 | * struct perf_limits - Store user and policy limits |
| @@ -333,6 +346,111 @@ static struct perf_limits *limits = &performance_limits; | |||
| 333 | static struct perf_limits *limits = &powersave_limits; | 346 | static struct perf_limits *limits = &powersave_limits; |
| 334 | #endif | 347 | #endif |
| 335 | 348 | ||
| 349 | #ifdef CONFIG_ACPI | ||
| 350 | /* | ||
| 351 | * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and | ||
| 352 | * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and | ||
| 353 | * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state | ||
| 354 | * ratio, out of it only high 8 bits are used. For example 0x1700 is setting | ||
| 355 | * target ratio 0x17. The _PSS control value stores in a format which can be | ||
| 356 | * directly written to PERF_CTL MSR. But in intel_pstate driver this shift | ||
| 357 | * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). | ||
| 358 | * This function converts the _PSS control value to intel pstate driver format | ||
| 359 | * for comparison and assignment. | ||
| 360 | */ | ||
| 361 | static int convert_to_native_pstate_format(struct cpudata *cpu, int index) | ||
| 362 | { | ||
| 363 | return cpu->acpi_perf_data.states[index].control >> 8; | ||
| 364 | } | ||
| 365 | |||
| 366 | static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) | ||
| 367 | { | ||
| 368 | struct cpudata *cpu; | ||
| 369 | int turbo_pss_ctl; | ||
| 370 | int ret; | ||
| 371 | int i; | ||
| 372 | |||
| 373 | if (!acpi_ppc) | ||
| 374 | return; | ||
| 375 | |||
| 376 | cpu = all_cpu_data[policy->cpu]; | ||
| 377 | |||
| 378 | ret = acpi_processor_register_performance(&cpu->acpi_perf_data, | ||
| 379 | policy->cpu); | ||
| 380 | if (ret) | ||
| 381 | return; | ||
| 382 | |||
| 383 | /* | ||
| 384 | * Check if the control value in _PSS is for PERF_CTL MSR, which should | ||
| 385 | * guarantee that the states returned by it map to the states in our | ||
| 386 | * list directly. | ||
| 387 | */ | ||
| 388 | if (cpu->acpi_perf_data.control_register.space_id != | ||
| 389 | ACPI_ADR_SPACE_FIXED_HARDWARE) | ||
| 390 | goto err; | ||
| 391 | |||
| 392 | /* | ||
| 393 | * If there is only one entry _PSS, simply ignore _PSS and continue as | ||
| 394 | * usual without taking _PSS into account | ||
| 395 | */ | ||
| 396 | if (cpu->acpi_perf_data.state_count < 2) | ||
| 397 | goto err; | ||
| 398 | |||
| 399 | pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); | ||
| 400 | for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { | ||
| 401 | pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", | ||
| 402 | (i == cpu->acpi_perf_data.state ? '*' : ' '), i, | ||
| 403 | (u32) cpu->acpi_perf_data.states[i].core_frequency, | ||
| 404 | (u32) cpu->acpi_perf_data.states[i].power, | ||
| 405 | (u32) cpu->acpi_perf_data.states[i].control); | ||
| 406 | } | ||
| 407 | |||
| 408 | /* | ||
| 409 | * The _PSS table doesn't contain whole turbo frequency range. | ||
| 410 | * This just contains +1 MHZ above the max non turbo frequency, | ||
| 411 | * with control value corresponding to max turbo ratio. But | ||
| 412 | * when cpufreq set policy is called, it will call with this | ||
| 413 | * max frequency, which will cause a reduced performance as | ||
| 414 | * this driver uses real max turbo frequency as the max | ||
| 415 | * frequency. So correct this frequency in _PSS table to | ||
| 416 | * correct max turbo frequency based on the turbo ratio. | ||
| 417 | * Also need to convert to MHz as _PSS freq is in MHz. | ||
| 418 | */ | ||
| 419 | turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); | ||
| 420 | if (turbo_pss_ctl > cpu->pstate.max_pstate) | ||
| 421 | cpu->acpi_perf_data.states[0].core_frequency = | ||
| 422 | policy->cpuinfo.max_freq / 1000; | ||
| 423 | cpu->valid_pss_table = true; | ||
| 424 | pr_info("_PPC limits will be enforced\n"); | ||
| 425 | |||
| 426 | return; | ||
| 427 | |||
| 428 | err: | ||
| 429 | cpu->valid_pss_table = false; | ||
| 430 | acpi_processor_unregister_performance(policy->cpu); | ||
| 431 | } | ||
| 432 | |||
| 433 | static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) | ||
| 434 | { | ||
| 435 | struct cpudata *cpu; | ||
| 436 | |||
| 437 | cpu = all_cpu_data[policy->cpu]; | ||
| 438 | if (!cpu->valid_pss_table) | ||
| 439 | return; | ||
| 440 | |||
| 441 | acpi_processor_unregister_performance(policy->cpu); | ||
| 442 | } | ||
| 443 | |||
| 444 | #else | ||
| 445 | static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) | ||
| 446 | { | ||
| 447 | } | ||
| 448 | |||
| 449 | static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) | ||
| 450 | { | ||
| 451 | } | ||
| 452 | #endif | ||
| 453 | |||
| 336 | static inline void pid_reset(struct _pid *pid, int setpoint, int busy, | 454 | static inline void pid_reset(struct _pid *pid, int setpoint, int busy, |
| 337 | int deadband, int integral) { | 455 | int deadband, int integral) { |
| 338 | pid->setpoint = int_tofp(setpoint); | 456 | pid->setpoint = int_tofp(setpoint); |
| @@ -1398,18 +1516,27 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) | |||
| 1398 | policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; | 1516 | policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; |
| 1399 | policy->cpuinfo.max_freq = | 1517 | policy->cpuinfo.max_freq = |
| 1400 | cpu->pstate.turbo_pstate * cpu->pstate.scaling; | 1518 | cpu->pstate.turbo_pstate * cpu->pstate.scaling; |
| 1519 | intel_pstate_init_acpi_perf_limits(policy); | ||
| 1401 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | 1520 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; |
| 1402 | cpumask_set_cpu(policy->cpu, policy->cpus); | 1521 | cpumask_set_cpu(policy->cpu, policy->cpus); |
| 1403 | 1522 | ||
| 1404 | return 0; | 1523 | return 0; |
| 1405 | } | 1524 | } |
| 1406 | 1525 | ||
| 1526 | static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) | ||
| 1527 | { | ||
| 1528 | intel_pstate_exit_perf_limits(policy); | ||
| 1529 | |||
| 1530 | return 0; | ||
| 1531 | } | ||
| 1532 | |||
| 1407 | static struct cpufreq_driver intel_pstate_driver = { | 1533 | static struct cpufreq_driver intel_pstate_driver = { |
| 1408 | .flags = CPUFREQ_CONST_LOOPS, | 1534 | .flags = CPUFREQ_CONST_LOOPS, |
| 1409 | .verify = intel_pstate_verify_policy, | 1535 | .verify = intel_pstate_verify_policy, |
| 1410 | .setpolicy = intel_pstate_set_policy, | 1536 | .setpolicy = intel_pstate_set_policy, |
| 1411 | .get = intel_pstate_get, | 1537 | .get = intel_pstate_get, |
| 1412 | .init = intel_pstate_cpu_init, | 1538 | .init = intel_pstate_cpu_init, |
| 1539 | .exit = intel_pstate_cpu_exit, | ||
| 1413 | .stop_cpu = intel_pstate_stop_cpu, | 1540 | .stop_cpu = intel_pstate_stop_cpu, |
| 1414 | .name = "intel_pstate", | 1541 | .name = "intel_pstate", |
| 1415 | }; | 1542 | }; |
| @@ -1453,8 +1580,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) | |||
| 1453 | 1580 | ||
| 1454 | } | 1581 | } |
| 1455 | 1582 | ||
| 1456 | #if IS_ENABLED(CONFIG_ACPI) | 1583 | #ifdef CONFIG_ACPI |
| 1457 | #include <acpi/processor.h> | ||
| 1458 | 1584 | ||
| 1459 | static bool intel_pstate_no_acpi_pss(void) | 1585 | static bool intel_pstate_no_acpi_pss(void) |
| 1460 | { | 1586 | { |
| @@ -1660,6 +1786,12 @@ static int __init intel_pstate_setup(char *str) | |||
| 1660 | force_load = 1; | 1786 | force_load = 1; |
| 1661 | if (!strcmp(str, "hwp_only")) | 1787 | if (!strcmp(str, "hwp_only")) |
| 1662 | hwp_only = 1; | 1788 | hwp_only = 1; |
| 1789 | |||
| 1790 | #ifdef CONFIG_ACPI | ||
| 1791 | if (!strcmp(str, "support_acpi_ppc")) | ||
| 1792 | acpi_ppc = true; | ||
| 1793 | #endif | ||
| 1794 | |||
| 1663 | return 0; | 1795 | return 0; |
| 1664 | } | 1796 | } |
| 1665 | early_param("intel_pstate", intel_pstate_setup); | 1797 | early_param("intel_pstate", intel_pstate_setup); |
