aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 10:48:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 10:48:54 -0400
commitc717d1561493c58d030405c7e30e35459db31912 (patch)
tree85c3a9521691b437cb7a76b701331d3992361f46
parent9e9a928eed8796a0a1aaed7e0b676db86ba84594 (diff)
parentbf8102228a8bf053051f311e5486042fe0542894 (diff)
Merge tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull intel pstate fixes from Rafael Wysocki: "Final power management fixes for 3.15 - Taking non-idle time into account when calculating core busy time was a mistake and led to a performance regression. Since the problem it was supposed to address is now taken care of in a different way, we don't need to do it any more, so drop the non-idle time tracking from intel_pstate. Dirk Brandewie. - Changing to fixed point math throughout the busy calculation introduced rounding errors that adversely affect the accuracy of intel_pstate's computations. Fix from Dirk Brandewie. - The PID controller algorithm used by intel_pstate assumes that the time interval between two adjacent samples will always be the same which is not the case for deferable timers (used by intel_pstate) when the system is idle. This leads to inaccurate predictions and artificially increases convergence times for the minimum P-state. Fix from Dirk Brandewie. - intel_pstate carries out computations using 32-bit variables that may overflow for large enough values of APERF/MPERF. Switch to using 64-bit variables for computations, from Doug Smythies" * tag 'pm-3.15-final' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: intel_pstate: Improve initial busy calculation intel_pstate: add sample time scaling intel_pstate: Correct rounding in busy calculation intel_pstate: Remove C0 tracking
-rw-r--r--drivers/cpufreq/intel_pstate.c52
1 files changed, 31 insertions, 21 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eab8ccfe6beb..db2e45b4808e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -40,10 +40,10 @@
40#define BYT_TURBO_VIDS 0x66d 40#define BYT_TURBO_VIDS 0x66d
41 41
42 42
43#define FRAC_BITS 6 43#define FRAC_BITS 8
44#define int_tofp(X) ((int64_t)(X) << FRAC_BITS) 44#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
45#define fp_toint(X) ((X) >> FRAC_BITS) 45#define fp_toint(X) ((X) >> FRAC_BITS)
46#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) 46
47 47
48static inline int32_t mul_fp(int32_t x, int32_t y) 48static inline int32_t mul_fp(int32_t x, int32_t y)
49{ 49{
@@ -59,8 +59,8 @@ struct sample {
59 int32_t core_pct_busy; 59 int32_t core_pct_busy;
60 u64 aperf; 60 u64 aperf;
61 u64 mperf; 61 u64 mperf;
62 unsigned long long tsc;
63 int freq; 62 int freq;
63 ktime_t time;
64}; 64};
65 65
66struct pstate_data { 66struct pstate_data {
@@ -98,9 +98,9 @@ struct cpudata {
98 struct vid_data vid; 98 struct vid_data vid;
99 struct _pid pid; 99 struct _pid pid;
100 100
101 ktime_t last_sample_time;
101 u64 prev_aperf; 102 u64 prev_aperf;
102 u64 prev_mperf; 103 u64 prev_mperf;
103 unsigned long long prev_tsc;
104 struct sample sample; 104 struct sample sample;
105}; 105};
106 106
@@ -200,7 +200,10 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
200 pid->last_err = fp_error; 200 pid->last_err = fp_error;
201 201
202 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; 202 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
203 203 if (result >= 0)
204 result = result + (1 << (FRAC_BITS-1));
205 else
206 result = result - (1 << (FRAC_BITS-1));
204 return (signed int)fp_toint(result); 207 return (signed int)fp_toint(result);
205} 208}
206 209
@@ -560,47 +563,42 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
560static inline void intel_pstate_calc_busy(struct cpudata *cpu, 563static inline void intel_pstate_calc_busy(struct cpudata *cpu,
561 struct sample *sample) 564 struct sample *sample)
562{ 565{
563 int32_t core_pct; 566 int64_t core_pct;
564 int32_t c0_pct; 567 int32_t rem;
565 568
566 core_pct = div_fp(int_tofp((sample->aperf)), 569 core_pct = int_tofp(sample->aperf) * int_tofp(100);
567 int_tofp((sample->mperf))); 570 core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem);
568 core_pct = mul_fp(core_pct, int_tofp(100));
569 FP_ROUNDUP(core_pct);
570 571
571 c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); 572 if ((rem << 1) >= int_tofp(sample->mperf))
573 core_pct += 1;
572 574
573 sample->freq = fp_toint( 575 sample->freq = fp_toint(
574 mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); 576 mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
575 577
576 sample->core_pct_busy = mul_fp(core_pct, c0_pct); 578 sample->core_pct_busy = (int32_t)core_pct;
577} 579}
578 580
579static inline void intel_pstate_sample(struct cpudata *cpu) 581static inline void intel_pstate_sample(struct cpudata *cpu)
580{ 582{
581 u64 aperf, mperf; 583 u64 aperf, mperf;
582 unsigned long long tsc;
583 584
584 rdmsrl(MSR_IA32_APERF, aperf); 585 rdmsrl(MSR_IA32_APERF, aperf);
585 rdmsrl(MSR_IA32_MPERF, mperf); 586 rdmsrl(MSR_IA32_MPERF, mperf);
586 tsc = native_read_tsc();
587 587
588 aperf = aperf >> FRAC_BITS; 588 aperf = aperf >> FRAC_BITS;
589 mperf = mperf >> FRAC_BITS; 589 mperf = mperf >> FRAC_BITS;
590 tsc = tsc >> FRAC_BITS;
591 590
591 cpu->last_sample_time = cpu->sample.time;
592 cpu->sample.time = ktime_get();
592 cpu->sample.aperf = aperf; 593 cpu->sample.aperf = aperf;
593 cpu->sample.mperf = mperf; 594 cpu->sample.mperf = mperf;
594 cpu->sample.tsc = tsc;
595 cpu->sample.aperf -= cpu->prev_aperf; 595 cpu->sample.aperf -= cpu->prev_aperf;
596 cpu->sample.mperf -= cpu->prev_mperf; 596 cpu->sample.mperf -= cpu->prev_mperf;
597 cpu->sample.tsc -= cpu->prev_tsc;
598 597
599 intel_pstate_calc_busy(cpu, &cpu->sample); 598 intel_pstate_calc_busy(cpu, &cpu->sample);
600 599
601 cpu->prev_aperf = aperf; 600 cpu->prev_aperf = aperf;
602 cpu->prev_mperf = mperf; 601 cpu->prev_mperf = mperf;
603 cpu->prev_tsc = tsc;
604} 602}
605 603
606static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 604static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
@@ -614,13 +612,25 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
614 612
615static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) 613static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
616{ 614{
617 int32_t core_busy, max_pstate, current_pstate; 615 int32_t core_busy, max_pstate, current_pstate, sample_ratio;
616 u32 duration_us;
617 u32 sample_time;
618 618
619 core_busy = cpu->sample.core_pct_busy; 619 core_busy = cpu->sample.core_pct_busy;
620 max_pstate = int_tofp(cpu->pstate.max_pstate); 620 max_pstate = int_tofp(cpu->pstate.max_pstate);
621 current_pstate = int_tofp(cpu->pstate.current_pstate); 621 current_pstate = int_tofp(cpu->pstate.current_pstate);
622 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 622 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
623 return FP_ROUNDUP(core_busy); 623
624 sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC);
625 duration_us = (u32) ktime_us_delta(cpu->sample.time,
626 cpu->last_sample_time);
627 if (duration_us > sample_time * 3) {
628 sample_ratio = div_fp(int_tofp(sample_time),
629 int_tofp(duration_us));
630 core_busy = mul_fp(core_busy, sample_ratio);
631 }
632
633 return core_busy;
624} 634}
625 635
626static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) 636static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)