summaryrefslogtreecommitdiffstats
path: root/init
diff options
context:
space:
mode:
authorPhil Carmody <ext-phil.2.carmody@nokia.com>2011-03-22 19:34:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 20:44:11 -0400
commit71c696b1d0310da3ab8033d743282959bd49d28b (patch)
treecf501760d1bb1e045c23bf5155742fa8aa221776 /init
parent9bfb23fc4a481650e60d22dbe84c0fd5a9d49bba (diff)
calibrate: extract fall-back calculation into own helper
The motivation for this patch series is that currently our OMAP calibrates itself using the trial-and-error binary chop fallback that some other architectures no longer need to perform. This is a lengthy process, taking 0.2s in an environment where boot time is of great interest. Patch 2/4 has two optimisations. Firstly, it replaces the initial repeated- doubling to find the relevant power of 2 with a tight loop that just does as much as it can in a jiffy. Secondly, it doesn't binary chop over an entire power of 2 range, it choses a much smaller range based on how much it squeezed in, and failed to squeeze in, during the first stage. Both are significant optimisations, and bring our calibration down from 23 jiffies to 5, and, in the process, often arrive at a more accurate lpj value. The 'bands' and 'sub-logarithmic' growth may look over-engineered, but they only cost a small level of inaccuracy in the initial guess (for all architectures) in order to avoid the very large inaccuracies that appeared during testing (on x86_64 architectures, and presumably others with less metronomic operation). Note that due to the existence of the TSC and other timers, the x86_64 will not typically use this fallback routine, but I wanted to code defensively, able to cope with all kinds of processor behaviours and kernel command line options. Patch 3/4 is an additional trap for the nightmare scenario where the initial estimate is very inaccurate, possibly due to things like SMIs. It simply retries with a larger bound. Stephen said: I tried this patch set out on an MSM7630. : : Before: : : Calibrating delay loop... 681.57 BogoMIPS (lpj=3407872) : : After: : : Calibrating delay loop... 680.75 BogoMIPS (lpj=3403776) : : But the really good news is calibration time dropped from ~247ms to ~56ms. : Sadly we won't be able to benefit from this should my udelay patches make : it into ARM because we would be using calibrate_delay_direct() instead (at : least on machines who choose to). Can we somehow reapply the logic behind : this to calibrate_delay_direct()? That would be even better, but this is : definitely a boot time improvement. : : Or maybe we could just replace calibrate_delay_direct() with this fallback : calculation? If __delay() is a thin wrapper around read_current_timer() : it should work just as well (plus patch 3 makes it handle SMIs). I'll try : that out. This patch: ... so that it can be modified more clinically. This is almost entirely cosmetic. The only change to the operation is that the global variable is only set once after the estimation is completed, rather than taking on all the intermediate values. However, there are no readers of that variable, so this change is unimportant. Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'init')
-rw-r--r--init/calibrate.c73
1 files changed, 40 insertions, 33 deletions
diff --git a/init/calibrate.c b/init/calibrate.c
index 24fe022c55f9..b71643a7acae 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -119,10 +119,47 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
119 */ 119 */
120#define LPS_PREC 8 120#define LPS_PREC 8
121 121
122void __cpuinit calibrate_delay(void) 122static unsigned long __cpuinit calibrate_delay_converge(void)
123{ 123{
124 unsigned long ticks, loopbit; 124 unsigned long lpj, ticks, loopbit;
125 int lps_precision = LPS_PREC; 125 int lps_precision = LPS_PREC;
126
127 lpj = (1<<12);
128 while ((lpj <<= 1) != 0) {
129 /* wait for "start of" clock tick */
130 ticks = jiffies;
131 while (ticks == jiffies)
132 /* nothing */;
133 /* Go .. */
134 ticks = jiffies;
135 __delay(lpj);
136 ticks = jiffies - ticks;
137 if (ticks)
138 break;
139 }
140
141 /*
142 * Do a binary approximation to get lpj set to
143 * equal one clock (up to lps_precision bits)
144 */
145 lpj >>= 1;
146 loopbit = lpj;
147 while (lps_precision-- && (loopbit >>= 1)) {
148 lpj |= loopbit;
149 ticks = jiffies;
150 while (ticks == jiffies)
151 /* nothing */;
152 ticks = jiffies;
153 __delay(lpj);
154 if (jiffies != ticks) /* longer than 1 tick */
155 lpj &= ~loopbit;
156 }
157
158 return lpj;
159}
160
161void __cpuinit calibrate_delay(void)
162{
126 static bool printed; 163 static bool printed;
127 164
128 if (preset_lpj) { 165 if (preset_lpj) {
@@ -139,39 +176,9 @@ void __cpuinit calibrate_delay(void)
139 pr_info("Calibrating delay using timer " 176 pr_info("Calibrating delay using timer "
140 "specific routine.. "); 177 "specific routine.. ");
141 } else { 178 } else {
142 loops_per_jiffy = (1<<12);
143
144 if (!printed) 179 if (!printed)
145 pr_info("Calibrating delay loop... "); 180 pr_info("Calibrating delay loop... ");
146 while ((loops_per_jiffy <<= 1) != 0) { 181 loops_per_jiffy = calibrate_delay_converge();
147 /* wait for "start of" clock tick */
148 ticks = jiffies;
149 while (ticks == jiffies)
150 /* nothing */;
151 /* Go .. */
152 ticks = jiffies;
153 __delay(loops_per_jiffy);
154 ticks = jiffies - ticks;
155 if (ticks)
156 break;
157 }
158
159 /*
160 * Do a binary approximation to get loops_per_jiffy set to
161 * equal one clock (up to lps_precision bits)
162 */
163 loops_per_jiffy >>= 1;
164 loopbit = loops_per_jiffy;
165 while (lps_precision-- && (loopbit >>= 1)) {
166 loops_per_jiffy |= loopbit;
167 ticks = jiffies;
168 while (ticks == jiffies)
169 /* nothing */;
170 ticks = jiffies;
171 __delay(loops_per_jiffy);
172 if (jiffies != ticks) /* longer than 1 tick */
173 loops_per_jiffy &= ~loopbit;
174 }
175 } 182 }
176 if (!printed) 183 if (!printed)
177 pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", 184 pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",