diff options
author | Dirk Brandewie <dirk.j.brandewie@intel.com> | 2014-02-25 13:35:37 -0500 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2014-02-25 18:56:49 -0500 |
commit | e66c176837462928a05a135bbe16cdce70536d6e (patch) | |
tree | 3f2b47b1f93f84efa151fd9c5596a3d7238fa080 | |
parent | cfbf8d4857c26a8a307fb7cd258074c9dcd8c691 (diff) |
intel_pstate: Change busy calculation to use fixed point math.
Commit fcb6a15c2e (intel_pstate: Take core C0 time into account for
core busy calculation) introduced a regression on some processor SKUs
supported by intel_pstate. This was due to the truncation caused by
using integer math to calculate core busy and C0 percentages.
On a i7-4770K processor operating at 800Mhz going to 100% utilization
the percent busy of the CPU using integer math is 22%, but it actually
is 22.85%. This value scaled to the current frequency returned 97
which the PID interpreted as no error and did not adjust the P state.
Tested on i7-4770K, i7-2600, i5-3230M.
Fixes: fcb6a15c2e7e (intel_pstate: Take core C0 time into account for core busy calculation)
References: https://lkml.org/lkml/2014/2/19/626
References: https://bugzilla.kernel.org/show_bug.cgi?id=70941
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e90816105921..2cd36b9297f3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -39,9 +39,10 @@ | |||
39 | #define BYT_TURBO_RATIOS 0x66c | 39 | #define BYT_TURBO_RATIOS 0x66c |
40 | 40 | ||
41 | 41 | ||
42 | #define FRAC_BITS 8 | 42 | #define FRAC_BITS 6 |
43 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) | 43 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) |
44 | #define fp_toint(X) ((X) >> FRAC_BITS) | 44 | #define fp_toint(X) ((X) >> FRAC_BITS) |
45 | #define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) | ||
45 | 46 | ||
46 | static inline int32_t mul_fp(int32_t x, int32_t y) | 47 | static inline int32_t mul_fp(int32_t x, int32_t y) |
47 | { | 48 | { |
@@ -556,18 +557,20 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) | |||
556 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, | 557 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, |
557 | struct sample *sample) | 558 | struct sample *sample) |
558 | { | 559 | { |
559 | u64 core_pct; | 560 | int32_t core_pct; |
560 | u64 c0_pct; | 561 | int32_t c0_pct; |
561 | 562 | ||
562 | core_pct = div64_u64(sample->aperf * 100, sample->mperf); | 563 | core_pct = div_fp(int_tofp((sample->aperf)), |
564 | int_tofp((sample->mperf))); | ||
565 | core_pct = mul_fp(core_pct, int_tofp(100)); | ||
566 | FP_ROUNDUP(core_pct); | ||
567 | |||
568 | c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); | ||
563 | 569 | ||
564 | c0_pct = div64_u64(sample->mperf * 100, sample->tsc); | ||
565 | sample->freq = fp_toint( | 570 | sample->freq = fp_toint( |
566 | mul_fp(int_tofp(cpu->pstate.max_pstate), | 571 | mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); |
567 | int_tofp(core_pct * 1000))); | ||
568 | 572 | ||
569 | sample->core_pct_busy = mul_fp(int_tofp(core_pct), | 573 | sample->core_pct_busy = mul_fp(core_pct, c0_pct); |
570 | div_fp(int_tofp(c0_pct + 1), int_tofp(100))); | ||
571 | } | 574 | } |
572 | 575 | ||
573 | static inline void intel_pstate_sample(struct cpudata *cpu) | 576 | static inline void intel_pstate_sample(struct cpudata *cpu) |
@@ -579,6 +582,10 @@ static inline void intel_pstate_sample(struct cpudata *cpu) | |||
579 | rdmsrl(MSR_IA32_MPERF, mperf); | 582 | rdmsrl(MSR_IA32_MPERF, mperf); |
580 | tsc = native_read_tsc(); | 583 | tsc = native_read_tsc(); |
581 | 584 | ||
585 | aperf = aperf >> FRAC_BITS; | ||
586 | mperf = mperf >> FRAC_BITS; | ||
587 | tsc = tsc >> FRAC_BITS; | ||
588 | |||
582 | cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; | 589 | cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; |
583 | cpu->samples[cpu->sample_ptr].aperf = aperf; | 590 | cpu->samples[cpu->sample_ptr].aperf = aperf; |
584 | cpu->samples[cpu->sample_ptr].mperf = mperf; | 591 | cpu->samples[cpu->sample_ptr].mperf = mperf; |
@@ -610,7 +617,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) | |||
610 | core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; | 617 | core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; |
611 | max_pstate = int_tofp(cpu->pstate.max_pstate); | 618 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
612 | current_pstate = int_tofp(cpu->pstate.current_pstate); | 619 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
613 | return mul_fp(core_busy, div_fp(max_pstate, current_pstate)); | 620 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
621 | return FP_ROUNDUP(core_busy); | ||
614 | } | 622 | } |
615 | 623 | ||
616 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) | 624 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) |