aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhil Carmody <ext-phil.2.carmody@nokia.com>2011-03-22 19:34:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 20:44:11 -0400
commit191e56880a6a638ce931859317f37deb084b6433 (patch)
tree26853fa62983f12b85badda6b9ee2197c2f10697
parent71c696b1d0310da3ab8033d743282959bd49d28b (diff)
calibrate: home in on correct lpj value more quickly
Binary chop with a jiffy-resync on each step to find an upper bound is slow, so just race in a tight-ish loop to find an underestimate. If done with lots of individual steps, sometimes several hundreds of iterations would be required, which would impose a significant overhead, and make the initial estimate very low. By taking slowly increasing steps there will be less overhead. E.g. an x86_64 2.67GHz could have fitted in 613 individual small delays, but in reality should have been able to fit in a single delay 644 times longer, so underestimated by 31 steps. To reach the equivalent of 644 small delays with the accelerating scheme now requires about 130 iterations, so has <1/4th of the overhead, and can therefore be expected to underestimate by only 7 steps. As now we have a better initial estimate we can binary chop over a smaller range. With the loop overhead in the initial estimate kept low, and the step sizes moderate, we won't have under-estimated by much, so chose as tight a range as we can. Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--init/calibrate.c57
1 files changed, 34 insertions, 23 deletions
diff --git a/init/calibrate.c b/init/calibrate.c
index b71643a7acae..f9000dfbe227 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -110,8 +110,8 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
110 110
111/* 111/*
112 * This is the number of bits of precision for the loops_per_jiffy. Each 112 * This is the number of bits of precision for the loops_per_jiffy. Each
113 * bit takes on average 1.5/HZ seconds. This (like the original) is a little 113 * time we refine our estimate after the first takes 1.5/HZ seconds, so try
114 * better than 1% 114 * to start with a good estimate.
115 * For the boot cpu we can skip the delay calibration and assign it a value 115 * For the boot cpu we can skip the delay calibration and assign it a value
116 * calculated based on the timer frequency. 116 * calculated based on the timer frequency.
117 * For the rest of the CPUs we cannot assume that the timer frequency is same as 117 * For the rest of the CPUs we cannot assume that the timer frequency is same as
@@ -121,38 +121,49 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
121 121
122static unsigned long __cpuinit calibrate_delay_converge(void) 122static unsigned long __cpuinit calibrate_delay_converge(void)
123{ 123{
124 unsigned long lpj, ticks, loopbit; 124 /* First stage - slowly accelerate to find initial bounds */
125 int lps_precision = LPS_PREC; 125 unsigned long lpj, ticks, loopadd, chop_limit;
126 int trials = 0, band = 0, trial_in_band = 0;
126 127
127 lpj = (1<<12); 128 lpj = (1<<12);
128 while ((lpj <<= 1) != 0) { 129
129 /* wait for "start of" clock tick */ 130 /* wait for "start of" clock tick */
130 ticks = jiffies; 131 ticks = jiffies;
131 while (ticks == jiffies) 132 while (ticks == jiffies)
132 /* nothing */; 133 ; /* nothing */
133 /* Go .. */ 134 /* Go .. */
134 ticks = jiffies; 135 ticks = jiffies;
135 __delay(lpj); 136 do {
136 ticks = jiffies - ticks; 137 if (++trial_in_band == (1<<band)) {
137 if (ticks) 138 ++band;
138 break; 139 trial_in_band = 0;
139 } 140 }
141 __delay(lpj * band);
142 trials += band;
143 } while (ticks == jiffies);
144 /*
145 * We overshot, so retreat to a clear underestimate. Then estimate
146 * the largest likely undershoot. This defines our chop bounds.
147 */
148 trials -= band;
149 loopadd = lpj * band;
150 lpj *= trials;
151 chop_limit = lpj >> (LPS_PREC + 1);
140 152
141 /* 153 /*
142 * Do a binary approximation to get lpj set to 154 * Do a binary approximation to get lpj set to
143 * equal one clock (up to lps_precision bits) 155 * equal one clock (up to LPS_PREC bits)
144 */ 156 */
145 lpj >>= 1; 157 while (loopadd > chop_limit) {
146 loopbit = lpj; 158 lpj += loopadd;
147 while (lps_precision-- && (loopbit >>= 1)) {
148 lpj |= loopbit;
149 ticks = jiffies; 159 ticks = jiffies;
150 while (ticks == jiffies) 160 while (ticks == jiffies)
151 /* nothing */; 161 ; /* nothing */
152 ticks = jiffies; 162 ticks = jiffies;
153 __delay(lpj); 163 __delay(lpj);
154 if (jiffies != ticks) /* longer than 1 tick */ 164 if (jiffies != ticks) /* longer than 1 tick */
155 lpj &= ~loopbit; 165 lpj -= loopadd;
166 loopadd >>= 1;
156 } 167 }
157 168
158 return lpj; 169 return lpj;