diff options
| author | Dirk Brandewie <dirk.j.brandewie@intel.com> | 2014-02-25 13:35:37 -0500 |
|---|---|---|
| committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2014-02-25 18:56:49 -0500 |
| commit | e66c176837462928a05a135bbe16cdce70536d6e (patch) | |
| tree | 3f2b47b1f93f84efa151fd9c5596a3d7238fa080 /drivers | |
| parent | cfbf8d4857c26a8a307fb7cd258074c9dcd8c691 (diff) | |
intel_pstate: Change busy calculation to use fixed point math.
Commit fcb6a15c2e (intel_pstate: Take core C0 time into account for
core busy calculation) introduced a regression on some processor SKUs
supported by intel_pstate. This was due to the truncation caused by
using integer math to calculate core busy and C0 percentages.
On a i7-4770K processor operating at 800Mhz going to 100% utilization
the percent busy of the CPU using integer math is 22%, but it actually
is 22.85%. This value scaled to the current frequency returned 97
which the PID interpreted as no error and did not adjust the P state.
Tested on i7-4770K, i7-2600, i5-3230M.
Fixes: fcb6a15c2e7e (intel_pstate: Take core C0 time into account for core busy calculation)
References: https://lkml.org/lkml/2014/2/19/626
References: https://bugzilla.kernel.org/show_bug.cgi?id=70941
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/cpufreq/intel_pstate.c | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e90816105921..2cd36b9297f3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
| @@ -39,9 +39,10 @@ | |||
| 39 | #define BYT_TURBO_RATIOS 0x66c | 39 | #define BYT_TURBO_RATIOS 0x66c |
| 40 | 40 | ||
| 41 | 41 | ||
| 42 | #define FRAC_BITS 8 | 42 | #define FRAC_BITS 6 |
| 43 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) | 43 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) |
| 44 | #define fp_toint(X) ((X) >> FRAC_BITS) | 44 | #define fp_toint(X) ((X) >> FRAC_BITS) |
| 45 | #define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) | ||
| 45 | 46 | ||
| 46 | static inline int32_t mul_fp(int32_t x, int32_t y) | 47 | static inline int32_t mul_fp(int32_t x, int32_t y) |
| 47 | { | 48 | { |
| @@ -556,18 +557,20 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) | |||
| 556 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, | 557 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, |
| 557 | struct sample *sample) | 558 | struct sample *sample) |
| 558 | { | 559 | { |
| 559 | u64 core_pct; | 560 | int32_t core_pct; |
| 560 | u64 c0_pct; | 561 | int32_t c0_pct; |
| 561 | 562 | ||
| 562 | core_pct = div64_u64(sample->aperf * 100, sample->mperf); | 563 | core_pct = div_fp(int_tofp((sample->aperf)), |
| 564 | int_tofp((sample->mperf))); | ||
| 565 | core_pct = mul_fp(core_pct, int_tofp(100)); | ||
| 566 | FP_ROUNDUP(core_pct); | ||
| 567 | |||
| 568 | c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); | ||
| 563 | 569 | ||
| 564 | c0_pct = div64_u64(sample->mperf * 100, sample->tsc); | ||
| 565 | sample->freq = fp_toint( | 570 | sample->freq = fp_toint( |
| 566 | mul_fp(int_tofp(cpu->pstate.max_pstate), | 571 | mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); |
| 567 | int_tofp(core_pct * 1000))); | ||
| 568 | 572 | ||
| 569 | sample->core_pct_busy = mul_fp(int_tofp(core_pct), | 573 | sample->core_pct_busy = mul_fp(core_pct, c0_pct); |
| 570 | div_fp(int_tofp(c0_pct + 1), int_tofp(100))); | ||
| 571 | } | 574 | } |
| 572 | 575 | ||
| 573 | static inline void intel_pstate_sample(struct cpudata *cpu) | 576 | static inline void intel_pstate_sample(struct cpudata *cpu) |
| @@ -579,6 +582,10 @@ static inline void intel_pstate_sample(struct cpudata *cpu) | |||
| 579 | rdmsrl(MSR_IA32_MPERF, mperf); | 582 | rdmsrl(MSR_IA32_MPERF, mperf); |
| 580 | tsc = native_read_tsc(); | 583 | tsc = native_read_tsc(); |
| 581 | 584 | ||
| 585 | aperf = aperf >> FRAC_BITS; | ||
| 586 | mperf = mperf >> FRAC_BITS; | ||
| 587 | tsc = tsc >> FRAC_BITS; | ||
| 588 | |||
| 582 | cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; | 589 | cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; |
| 583 | cpu->samples[cpu->sample_ptr].aperf = aperf; | 590 | cpu->samples[cpu->sample_ptr].aperf = aperf; |
| 584 | cpu->samples[cpu->sample_ptr].mperf = mperf; | 591 | cpu->samples[cpu->sample_ptr].mperf = mperf; |
| @@ -610,7 +617,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) | |||
| 610 | core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; | 617 | core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; |
| 611 | max_pstate = int_tofp(cpu->pstate.max_pstate); | 618 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
| 612 | current_pstate = int_tofp(cpu->pstate.current_pstate); | 619 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
| 613 | return mul_fp(core_busy, div_fp(max_pstate, current_pstate)); | 620 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
| 621 | return FP_ROUNDUP(core_busy); | ||
| 614 | } | 622 | } |
| 615 | 623 | ||
| 616 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) | 624 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) |
