diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-04 10:48:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-04 10:48:54 -0400 |
commit | c717d1561493c58d030405c7e30e35459db31912 (patch) | |
tree | 85c3a9521691b437cb7a76b701331d3992361f46 | |
parent | 9e9a928eed8796a0a1aaed7e0b676db86ba84594 (diff) | |
parent | bf8102228a8bf053051f311e5486042fe0542894 (diff) |
Merge tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull intel pstate fixes from Rafael Wysocki:
"Final power management fixes for 3.15
- Taking non-idle time into account when calculating core busy time
was a mistake and led to a performance regression. Since the
problem it was supposed to address is now taken care of in a
different way, we don't need to do it any more, so drop the
non-idle time tracking from intel_pstate. Dirk Brandewie.
- Changing to fixed point math throughout the busy calculation
introduced rounding errors that adversely affect the accuracy of
intel_pstate's computations. Fix from Dirk Brandewie.
- The PID controller algorithm used by intel_pstate assumes that the
time interval between two adjacent samples will always be the same
which is not the case for deferable timers (used by intel_pstate)
when the system is idle. This leads to inaccurate predictions and
artificially increases convergence times for the minimum P-state.
Fix from Dirk Brandewie.
- intel_pstate carries out computations using 32-bit variables that
may overflow for large enough values of APERF/MPERF. Switch to
using 64-bit variables for computations, from Doug Smythies"
* tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
intel_pstate: Improve initial busy calculation
intel_pstate: add sample time scaling
intel_pstate: Correct rounding in busy calculation
intel_pstate: Remove C0 tracking
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 52 |
1 files changed, 31 insertions, 21 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index eab8ccfe6beb..db2e45b4808e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -40,10 +40,10 @@ | |||
40 | #define BYT_TURBO_VIDS 0x66d | 40 | #define BYT_TURBO_VIDS 0x66d |
41 | 41 | ||
42 | 42 | ||
43 | #define FRAC_BITS 6 | 43 | #define FRAC_BITS 8 |
44 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) | 44 | #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) |
45 | #define fp_toint(X) ((X) >> FRAC_BITS) | 45 | #define fp_toint(X) ((X) >> FRAC_BITS) |
46 | #define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) | 46 | |
47 | 47 | ||
48 | static inline int32_t mul_fp(int32_t x, int32_t y) | 48 | static inline int32_t mul_fp(int32_t x, int32_t y) |
49 | { | 49 | { |
@@ -59,8 +59,8 @@ struct sample { | |||
59 | int32_t core_pct_busy; | 59 | int32_t core_pct_busy; |
60 | u64 aperf; | 60 | u64 aperf; |
61 | u64 mperf; | 61 | u64 mperf; |
62 | unsigned long long tsc; | ||
63 | int freq; | 62 | int freq; |
63 | ktime_t time; | ||
64 | }; | 64 | }; |
65 | 65 | ||
66 | struct pstate_data { | 66 | struct pstate_data { |
@@ -98,9 +98,9 @@ struct cpudata { | |||
98 | struct vid_data vid; | 98 | struct vid_data vid; |
99 | struct _pid pid; | 99 | struct _pid pid; |
100 | 100 | ||
101 | ktime_t last_sample_time; | ||
101 | u64 prev_aperf; | 102 | u64 prev_aperf; |
102 | u64 prev_mperf; | 103 | u64 prev_mperf; |
103 | unsigned long long prev_tsc; | ||
104 | struct sample sample; | 104 | struct sample sample; |
105 | }; | 105 | }; |
106 | 106 | ||
@@ -200,7 +200,10 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) | |||
200 | pid->last_err = fp_error; | 200 | pid->last_err = fp_error; |
201 | 201 | ||
202 | result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; | 202 | result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; |
203 | 203 | if (result >= 0) | |
204 | result = result + (1 << (FRAC_BITS-1)); | ||
205 | else | ||
206 | result = result - (1 << (FRAC_BITS-1)); | ||
204 | return (signed int)fp_toint(result); | 207 | return (signed int)fp_toint(result); |
205 | } | 208 | } |
206 | 209 | ||
@@ -560,47 +563,42 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) | |||
560 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, | 563 | static inline void intel_pstate_calc_busy(struct cpudata *cpu, |
561 | struct sample *sample) | 564 | struct sample *sample) |
562 | { | 565 | { |
563 | int32_t core_pct; | 566 | int64_t core_pct; |
564 | int32_t c0_pct; | 567 | int32_t rem; |
565 | 568 | ||
566 | core_pct = div_fp(int_tofp((sample->aperf)), | 569 | core_pct = int_tofp(sample->aperf) * int_tofp(100); |
567 | int_tofp((sample->mperf))); | 570 | core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem); |
568 | core_pct = mul_fp(core_pct, int_tofp(100)); | ||
569 | FP_ROUNDUP(core_pct); | ||
570 | 571 | ||
571 | c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); | 572 | if ((rem << 1) >= int_tofp(sample->mperf)) |
573 | core_pct += 1; | ||
572 | 574 | ||
573 | sample->freq = fp_toint( | 575 | sample->freq = fp_toint( |
574 | mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); | 576 | mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); |
575 | 577 | ||
576 | sample->core_pct_busy = mul_fp(core_pct, c0_pct); | 578 | sample->core_pct_busy = (int32_t)core_pct; |
577 | } | 579 | } |
578 | 580 | ||
579 | static inline void intel_pstate_sample(struct cpudata *cpu) | 581 | static inline void intel_pstate_sample(struct cpudata *cpu) |
580 | { | 582 | { |
581 | u64 aperf, mperf; | 583 | u64 aperf, mperf; |
582 | unsigned long long tsc; | ||
583 | 584 | ||
584 | rdmsrl(MSR_IA32_APERF, aperf); | 585 | rdmsrl(MSR_IA32_APERF, aperf); |
585 | rdmsrl(MSR_IA32_MPERF, mperf); | 586 | rdmsrl(MSR_IA32_MPERF, mperf); |
586 | tsc = native_read_tsc(); | ||
587 | 587 | ||
588 | aperf = aperf >> FRAC_BITS; | 588 | aperf = aperf >> FRAC_BITS; |
589 | mperf = mperf >> FRAC_BITS; | 589 | mperf = mperf >> FRAC_BITS; |
590 | tsc = tsc >> FRAC_BITS; | ||
591 | 590 | ||
591 | cpu->last_sample_time = cpu->sample.time; | ||
592 | cpu->sample.time = ktime_get(); | ||
592 | cpu->sample.aperf = aperf; | 593 | cpu->sample.aperf = aperf; |
593 | cpu->sample.mperf = mperf; | 594 | cpu->sample.mperf = mperf; |
594 | cpu->sample.tsc = tsc; | ||
595 | cpu->sample.aperf -= cpu->prev_aperf; | 595 | cpu->sample.aperf -= cpu->prev_aperf; |
596 | cpu->sample.mperf -= cpu->prev_mperf; | 596 | cpu->sample.mperf -= cpu->prev_mperf; |
597 | cpu->sample.tsc -= cpu->prev_tsc; | ||
598 | 597 | ||
599 | intel_pstate_calc_busy(cpu, &cpu->sample); | 598 | intel_pstate_calc_busy(cpu, &cpu->sample); |
600 | 599 | ||
601 | cpu->prev_aperf = aperf; | 600 | cpu->prev_aperf = aperf; |
602 | cpu->prev_mperf = mperf; | 601 | cpu->prev_mperf = mperf; |
603 | cpu->prev_tsc = tsc; | ||
604 | } | 602 | } |
605 | 603 | ||
606 | static inline void intel_pstate_set_sample_time(struct cpudata *cpu) | 604 | static inline void intel_pstate_set_sample_time(struct cpudata *cpu) |
@@ -614,13 +612,25 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) | |||
614 | 612 | ||
615 | static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) | 613 | static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) |
616 | { | 614 | { |
617 | int32_t core_busy, max_pstate, current_pstate; | 615 | int32_t core_busy, max_pstate, current_pstate, sample_ratio; |
616 | u32 duration_us; | ||
617 | u32 sample_time; | ||
618 | 618 | ||
619 | core_busy = cpu->sample.core_pct_busy; | 619 | core_busy = cpu->sample.core_pct_busy; |
620 | max_pstate = int_tofp(cpu->pstate.max_pstate); | 620 | max_pstate = int_tofp(cpu->pstate.max_pstate); |
621 | current_pstate = int_tofp(cpu->pstate.current_pstate); | 621 | current_pstate = int_tofp(cpu->pstate.current_pstate); |
622 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); | 622 | core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); |
623 | return FP_ROUNDUP(core_busy); | 623 | |
624 | sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC); | ||
625 | duration_us = (u32) ktime_us_delta(cpu->sample.time, | ||
626 | cpu->last_sample_time); | ||
627 | if (duration_us > sample_time * 3) { | ||
628 | sample_ratio = div_fp(int_tofp(sample_time), | ||
629 | int_tofp(duration_us)); | ||
630 | core_busy = mul_fp(core_busy, sample_ratio); | ||
631 | } | ||
632 | |||
633 | return core_busy; | ||
624 | } | 634 | } |
625 | 635 | ||
626 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) | 636 | static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) |