diff options
Diffstat (limited to 'kernel/time/clocksource.c')
-rw-r--r-- | kernel/time/clocksource.c | 529 |
1 files changed, 304 insertions, 225 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb811251..09113347d328 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -21,7 +21,6 @@ | |||
21 | * | 21 | * |
22 | * TODO WishList: | 22 | * TODO WishList: |
23 | * o Allow clocksource drivers to be unregistered | 23 | * o Allow clocksource drivers to be unregistered |
24 | * o get rid of clocksource_jiffies extern | ||
25 | */ | 24 | */ |
26 | 25 | ||
27 | #include <linux/clocksource.h> | 26 | #include <linux/clocksource.h> |
@@ -30,6 +29,7 @@ | |||
30 | #include <linux/module.h> | 29 | #include <linux/module.h> |
31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 30 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
32 | #include <linux/tick.h> | 31 | #include <linux/tick.h> |
32 | #include <linux/kthread.h> | ||
33 | 33 | ||
34 | void timecounter_init(struct timecounter *tc, | 34 | void timecounter_init(struct timecounter *tc, |
35 | const struct cyclecounter *cc, | 35 | const struct cyclecounter *cc, |
@@ -107,50 +107,35 @@ u64 timecounter_cyc2time(struct timecounter *tc, | |||
107 | } | 107 | } |
108 | EXPORT_SYMBOL(timecounter_cyc2time); | 108 | EXPORT_SYMBOL(timecounter_cyc2time); |
109 | 109 | ||
110 | /* XXX - Would like a better way for initializing curr_clocksource */ | ||
111 | extern struct clocksource clocksource_jiffies; | ||
112 | |||
113 | /*[Clocksource internal variables]--------- | 110 | /*[Clocksource internal variables]--------- |
114 | * curr_clocksource: | 111 | * curr_clocksource: |
115 | * currently selected clocksource. Initialized to clocksource_jiffies. | 112 | * currently selected clocksource. |
116 | * next_clocksource: | ||
117 | * pending next selected clocksource. | ||
118 | * clocksource_list: | 113 | * clocksource_list: |
119 | * linked list with the registered clocksources | 114 | * linked list with the registered clocksources |
120 | * clocksource_lock: | 115 | * clocksource_mutex: |
121 | * protects manipulations to curr_clocksource and next_clocksource | 116 | * protects manipulations to curr_clocksource and the clocksource_list |
122 | * and the clocksource_list | ||
123 | * override_name: | 117 | * override_name: |
124 | * Name of the user-specified clocksource. | 118 | * Name of the user-specified clocksource. |
125 | */ | 119 | */ |
126 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | 120 | static struct clocksource *curr_clocksource; |
127 | static struct clocksource *next_clocksource; | ||
128 | static struct clocksource *clocksource_override; | ||
129 | static LIST_HEAD(clocksource_list); | 121 | static LIST_HEAD(clocksource_list); |
130 | static DEFINE_SPINLOCK(clocksource_lock); | 122 | static DEFINE_MUTEX(clocksource_mutex); |
131 | static char override_name[32]; | 123 | static char override_name[32]; |
132 | static int finished_booting; | 124 | static int finished_booting; |
133 | 125 | ||
134 | /* clocksource_done_booting - Called near the end of core bootup | ||
135 | * | ||
136 | * Hack to avoid lots of clocksource churn at boot time. | ||
137 | * We use fs_initcall because we want this to start before | ||
138 | * device_initcall but after subsys_initcall. | ||
139 | */ | ||
140 | static int __init clocksource_done_booting(void) | ||
141 | { | ||
142 | finished_booting = 1; | ||
143 | return 0; | ||
144 | } | ||
145 | fs_initcall(clocksource_done_booting); | ||
146 | |||
147 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | 126 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG |
127 | static void clocksource_watchdog_work(struct work_struct *work); | ||
128 | |||
148 | static LIST_HEAD(watchdog_list); | 129 | static LIST_HEAD(watchdog_list); |
149 | static struct clocksource *watchdog; | 130 | static struct clocksource *watchdog; |
150 | static struct timer_list watchdog_timer; | 131 | static struct timer_list watchdog_timer; |
132 | static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); | ||
151 | static DEFINE_SPINLOCK(watchdog_lock); | 133 | static DEFINE_SPINLOCK(watchdog_lock); |
152 | static cycle_t watchdog_last; | 134 | static cycle_t watchdog_last; |
153 | static unsigned long watchdog_resumed; | 135 | static int watchdog_running; |
136 | |||
137 | static int clocksource_watchdog_kthread(void *data); | ||
138 | static void __clocksource_change_rating(struct clocksource *cs, int rating); | ||
154 | 139 | ||
155 | /* | 140 | /* |
156 | * Interval: 0.5sec Threshold: 0.0625s | 141 | * Interval: 0.5sec Threshold: 0.0625s |
@@ -158,135 +143,249 @@ static unsigned long watchdog_resumed; | |||
158 | #define WATCHDOG_INTERVAL (HZ >> 1) | 143 | #define WATCHDOG_INTERVAL (HZ >> 1) |
159 | #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) | 144 | #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) |
160 | 145 | ||
161 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | 146 | static void clocksource_watchdog_work(struct work_struct *work) |
162 | { | 147 | { |
163 | if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) | 148 | /* |
164 | return; | 149 | * If kthread_run fails the next watchdog scan over the |
150 | * watchdog_list will find the unstable clock again. | ||
151 | */ | ||
152 | kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); | ||
153 | } | ||
165 | 154 | ||
155 | static void __clocksource_unstable(struct clocksource *cs) | ||
156 | { | ||
157 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | ||
158 | cs->flags |= CLOCK_SOURCE_UNSTABLE; | ||
159 | if (finished_booting) | ||
160 | schedule_work(&watchdog_work); | ||
161 | } | ||
162 | |||
163 | static void clocksource_unstable(struct clocksource *cs, int64_t delta) | ||
164 | { | ||
166 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | 165 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", |
167 | cs->name, delta); | 166 | cs->name, delta); |
168 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | 167 | __clocksource_unstable(cs); |
169 | clocksource_change_rating(cs, 0); | 168 | } |
170 | list_del(&cs->wd_list); | 169 | |
170 | /** | ||
171 | * clocksource_mark_unstable - mark clocksource unstable via watchdog | ||
172 | * @cs: clocksource to be marked unstable | ||
173 | * | ||
174 | * This function is called instead of clocksource_change_rating from | ||
175 | * cpu hotplug code to avoid a deadlock between the clocksource mutex | ||
176 | * and the cpu hotplug mutex. It defers the update of the clocksource | ||
177 | * to the watchdog thread. | ||
178 | */ | ||
179 | void clocksource_mark_unstable(struct clocksource *cs) | ||
180 | { | ||
181 | unsigned long flags; | ||
182 | |||
183 | spin_lock_irqsave(&watchdog_lock, flags); | ||
184 | if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { | ||
185 | if (list_empty(&cs->wd_list)) | ||
186 | list_add(&cs->wd_list, &watchdog_list); | ||
187 | __clocksource_unstable(cs); | ||
188 | } | ||
189 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
171 | } | 190 | } |
172 | 191 | ||
173 | static void clocksource_watchdog(unsigned long data) | 192 | static void clocksource_watchdog(unsigned long data) |
174 | { | 193 | { |
175 | struct clocksource *cs, *tmp; | 194 | struct clocksource *cs; |
176 | cycle_t csnow, wdnow; | 195 | cycle_t csnow, wdnow; |
177 | int64_t wd_nsec, cs_nsec; | 196 | int64_t wd_nsec, cs_nsec; |
178 | int resumed; | 197 | int next_cpu; |
179 | 198 | ||
180 | spin_lock(&watchdog_lock); | 199 | spin_lock(&watchdog_lock); |
181 | 200 | if (!watchdog_running) | |
182 | resumed = test_and_clear_bit(0, &watchdog_resumed); | 201 | goto out; |
183 | 202 | ||
184 | wdnow = watchdog->read(watchdog); | 203 | wdnow = watchdog->read(watchdog); |
185 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | 204 | wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, |
205 | watchdog->mult, watchdog->shift); | ||
186 | watchdog_last = wdnow; | 206 | watchdog_last = wdnow; |
187 | 207 | ||
188 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | 208 | list_for_each_entry(cs, &watchdog_list, wd_list) { |
189 | csnow = cs->read(cs); | ||
190 | 209 | ||
191 | if (unlikely(resumed)) { | 210 | /* Clocksource already marked unstable? */ |
192 | cs->wd_last = csnow; | 211 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { |
212 | if (finished_booting) | ||
213 | schedule_work(&watchdog_work); | ||
193 | continue; | 214 | continue; |
194 | } | 215 | } |
195 | 216 | ||
196 | /* Initialized ? */ | 217 | csnow = cs->read(cs); |
218 | |||
219 | /* Clocksource initialized ? */ | ||
197 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | 220 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { |
198 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | ||
199 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | ||
200 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
201 | /* | ||
202 | * We just marked the clocksource as | ||
203 | * highres-capable, notify the rest of the | ||
204 | * system as well so that we transition | ||
205 | * into high-res mode: | ||
206 | */ | ||
207 | tick_clock_notify(); | ||
208 | } | ||
209 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | 221 | cs->flags |= CLOCK_SOURCE_WATCHDOG; |
210 | cs->wd_last = csnow; | 222 | cs->wd_last = csnow; |
211 | } else { | 223 | continue; |
212 | cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); | ||
213 | cs->wd_last = csnow; | ||
214 | /* Check the delta. Might remove from the list ! */ | ||
215 | clocksource_ratewd(cs, cs_nsec - wd_nsec); | ||
216 | } | 224 | } |
217 | } | ||
218 | 225 | ||
219 | if (!list_empty(&watchdog_list)) { | 226 | /* Check the deviation from the watchdog clocksource. */ |
220 | /* | 227 | cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & |
221 | * Cycle through CPUs to check if the CPUs stay | 228 | cs->mask, cs->mult, cs->shift); |
222 | * synchronized to each other. | 229 | cs->wd_last = csnow; |
223 | */ | 230 | if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { |
224 | int next_cpu = cpumask_next(raw_smp_processor_id(), | 231 | clocksource_unstable(cs, cs_nsec - wd_nsec); |
225 | cpu_online_mask); | 232 | continue; |
233 | } | ||
226 | 234 | ||
227 | if (next_cpu >= nr_cpu_ids) | 235 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && |
228 | next_cpu = cpumask_first(cpu_online_mask); | 236 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && |
229 | watchdog_timer.expires += WATCHDOG_INTERVAL; | 237 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { |
230 | add_timer_on(&watchdog_timer, next_cpu); | 238 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
239 | /* | ||
240 | * We just marked the clocksource as highres-capable, | ||
241 | * notify the rest of the system as well so that we | ||
242 | * transition into high-res mode: | ||
243 | */ | ||
244 | tick_clock_notify(); | ||
245 | } | ||
231 | } | 246 | } |
247 | |||
248 | /* | ||
249 | * Cycle through CPUs to check if the CPUs stay synchronized | ||
250 | * to each other. | ||
251 | */ | ||
252 | next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); | ||
253 | if (next_cpu >= nr_cpu_ids) | ||
254 | next_cpu = cpumask_first(cpu_online_mask); | ||
255 | watchdog_timer.expires += WATCHDOG_INTERVAL; | ||
256 | add_timer_on(&watchdog_timer, next_cpu); | ||
257 | out: | ||
232 | spin_unlock(&watchdog_lock); | 258 | spin_unlock(&watchdog_lock); |
233 | } | 259 | } |
260 | |||
261 | static inline void clocksource_start_watchdog(void) | ||
262 | { | ||
263 | if (watchdog_running || !watchdog || list_empty(&watchdog_list)) | ||
264 | return; | ||
265 | init_timer(&watchdog_timer); | ||
266 | watchdog_timer.function = clocksource_watchdog; | ||
267 | watchdog_last = watchdog->read(watchdog); | ||
268 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
269 | add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); | ||
270 | watchdog_running = 1; | ||
271 | } | ||
272 | |||
273 | static inline void clocksource_stop_watchdog(void) | ||
274 | { | ||
275 | if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) | ||
276 | return; | ||
277 | del_timer(&watchdog_timer); | ||
278 | watchdog_running = 0; | ||
279 | } | ||
280 | |||
281 | static inline void clocksource_reset_watchdog(void) | ||
282 | { | ||
283 | struct clocksource *cs; | ||
284 | |||
285 | list_for_each_entry(cs, &watchdog_list, wd_list) | ||
286 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
287 | } | ||
288 | |||
234 | static void clocksource_resume_watchdog(void) | 289 | static void clocksource_resume_watchdog(void) |
235 | { | 290 | { |
236 | set_bit(0, &watchdog_resumed); | 291 | unsigned long flags; |
292 | |||
293 | spin_lock_irqsave(&watchdog_lock, flags); | ||
294 | clocksource_reset_watchdog(); | ||
295 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
237 | } | 296 | } |
238 | 297 | ||
239 | static void clocksource_check_watchdog(struct clocksource *cs) | 298 | static void clocksource_enqueue_watchdog(struct clocksource *cs) |
240 | { | 299 | { |
241 | struct clocksource *cse; | ||
242 | unsigned long flags; | 300 | unsigned long flags; |
243 | 301 | ||
244 | spin_lock_irqsave(&watchdog_lock, flags); | 302 | spin_lock_irqsave(&watchdog_lock, flags); |
245 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | 303 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { |
246 | int started = !list_empty(&watchdog_list); | 304 | /* cs is a clocksource to be watched. */ |
247 | |||
248 | list_add(&cs->wd_list, &watchdog_list); | 305 | list_add(&cs->wd_list, &watchdog_list); |
249 | if (!started && watchdog) { | 306 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; |
250 | watchdog_last = watchdog->read(watchdog); | ||
251 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
252 | add_timer_on(&watchdog_timer, | ||
253 | cpumask_first(cpu_online_mask)); | ||
254 | } | ||
255 | } else { | 307 | } else { |
308 | /* cs is a watchdog. */ | ||
256 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 309 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
257 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 310 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
258 | 311 | /* Pick the best watchdog. */ | |
259 | if (!watchdog || cs->rating > watchdog->rating) { | 312 | if (!watchdog || cs->rating > watchdog->rating) { |
260 | if (watchdog) | ||
261 | del_timer(&watchdog_timer); | ||
262 | watchdog = cs; | 313 | watchdog = cs; |
263 | init_timer(&watchdog_timer); | ||
264 | watchdog_timer.function = clocksource_watchdog; | ||
265 | |||
266 | /* Reset watchdog cycles */ | 314 | /* Reset watchdog cycles */ |
267 | list_for_each_entry(cse, &watchdog_list, wd_list) | 315 | clocksource_reset_watchdog(); |
268 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; | 316 | } |
269 | /* Start if list is not empty */ | 317 | } |
270 | if (!list_empty(&watchdog_list)) { | 318 | /* Check if the watchdog timer needs to be started. */ |
271 | watchdog_last = watchdog->read(watchdog); | 319 | clocksource_start_watchdog(); |
272 | watchdog_timer.expires = | 320 | spin_unlock_irqrestore(&watchdog_lock, flags); |
273 | jiffies + WATCHDOG_INTERVAL; | 321 | } |
274 | add_timer_on(&watchdog_timer, | 322 | |
275 | cpumask_first(cpu_online_mask)); | 323 | static void clocksource_dequeue_watchdog(struct clocksource *cs) |
276 | } | 324 | { |
325 | struct clocksource *tmp; | ||
326 | unsigned long flags; | ||
327 | |||
328 | spin_lock_irqsave(&watchdog_lock, flags); | ||
329 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | ||
330 | /* cs is a watched clocksource. */ | ||
331 | list_del_init(&cs->wd_list); | ||
332 | } else if (cs == watchdog) { | ||
333 | /* Reset watchdog cycles */ | ||
334 | clocksource_reset_watchdog(); | ||
335 | /* Current watchdog is removed. Find an alternative. */ | ||
336 | watchdog = NULL; | ||
337 | list_for_each_entry(tmp, &clocksource_list, list) { | ||
338 | if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) | ||
339 | continue; | ||
340 | if (!watchdog || tmp->rating > watchdog->rating) | ||
341 | watchdog = tmp; | ||
277 | } | 342 | } |
278 | } | 343 | } |
344 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
345 | /* Check if the watchdog timer needs to be stopped. */ | ||
346 | clocksource_stop_watchdog(); | ||
279 | spin_unlock_irqrestore(&watchdog_lock, flags); | 347 | spin_unlock_irqrestore(&watchdog_lock, flags); |
280 | } | 348 | } |
281 | #else | 349 | |
282 | static void clocksource_check_watchdog(struct clocksource *cs) | 350 | static int clocksource_watchdog_kthread(void *data) |
351 | { | ||
352 | struct clocksource *cs, *tmp; | ||
353 | unsigned long flags; | ||
354 | LIST_HEAD(unstable); | ||
355 | |||
356 | mutex_lock(&clocksource_mutex); | ||
357 | spin_lock_irqsave(&watchdog_lock, flags); | ||
358 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) | ||
359 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { | ||
360 | list_del_init(&cs->wd_list); | ||
361 | list_add(&cs->wd_list, &unstable); | ||
362 | } | ||
363 | /* Check if the watchdog timer needs to be stopped. */ | ||
364 | clocksource_stop_watchdog(); | ||
365 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
366 | |||
367 | /* Needs to be done outside of watchdog lock */ | ||
368 | list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { | ||
369 | list_del_init(&cs->wd_list); | ||
370 | __clocksource_change_rating(cs, 0); | ||
371 | } | ||
372 | mutex_unlock(&clocksource_mutex); | ||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ | ||
377 | |||
378 | static void clocksource_enqueue_watchdog(struct clocksource *cs) | ||
283 | { | 379 | { |
284 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 380 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
285 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 381 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
286 | } | 382 | } |
287 | 383 | ||
384 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } | ||
288 | static inline void clocksource_resume_watchdog(void) { } | 385 | static inline void clocksource_resume_watchdog(void) { } |
289 | #endif | 386 | static inline int clocksource_watchdog_kthread(void *data) { return 0; } |
387 | |||
388 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ | ||
290 | 389 | ||
291 | /** | 390 | /** |
292 | * clocksource_resume - resume the clocksource(s) | 391 | * clocksource_resume - resume the clocksource(s) |
@@ -294,18 +393,16 @@ static inline void clocksource_resume_watchdog(void) { } | |||
294 | void clocksource_resume(void) | 393 | void clocksource_resume(void) |
295 | { | 394 | { |
296 | struct clocksource *cs; | 395 | struct clocksource *cs; |
297 | unsigned long flags; | ||
298 | 396 | ||
299 | spin_lock_irqsave(&clocksource_lock, flags); | 397 | mutex_lock(&clocksource_mutex); |
300 | 398 | ||
301 | list_for_each_entry(cs, &clocksource_list, list) { | 399 | list_for_each_entry(cs, &clocksource_list, list) |
302 | if (cs->resume) | 400 | if (cs->resume) |
303 | cs->resume(); | 401 | cs->resume(); |
304 | } | ||
305 | 402 | ||
306 | clocksource_resume_watchdog(); | 403 | clocksource_resume_watchdog(); |
307 | 404 | ||
308 | spin_unlock_irqrestore(&clocksource_lock, flags); | 405 | mutex_unlock(&clocksource_mutex); |
309 | } | 406 | } |
310 | 407 | ||
311 | /** | 408 | /** |
@@ -320,75 +417,94 @@ void clocksource_touch_watchdog(void) | |||
320 | clocksource_resume_watchdog(); | 417 | clocksource_resume_watchdog(); |
321 | } | 418 | } |
322 | 419 | ||
420 | #ifdef CONFIG_GENERIC_TIME | ||
421 | |||
323 | /** | 422 | /** |
324 | * clocksource_get_next - Returns the selected clocksource | 423 | * clocksource_select - Select the best clocksource available |
424 | * | ||
425 | * Private function. Must hold clocksource_mutex when called. | ||
325 | * | 426 | * |
427 | * Select the clocksource with the best rating, or the clocksource, | ||
428 | * which is selected by userspace override. | ||
326 | */ | 429 | */ |
327 | struct clocksource *clocksource_get_next(void) | 430 | static void clocksource_select(void) |
328 | { | 431 | { |
329 | unsigned long flags; | 432 | struct clocksource *best, *cs; |
330 | 433 | ||
331 | spin_lock_irqsave(&clocksource_lock, flags); | 434 | if (!finished_booting || list_empty(&clocksource_list)) |
332 | if (next_clocksource && finished_booting) { | 435 | return; |
333 | curr_clocksource = next_clocksource; | 436 | /* First clocksource on the list has the best rating. */ |
334 | next_clocksource = NULL; | 437 | best = list_first_entry(&clocksource_list, struct clocksource, list); |
438 | /* Check for the override clocksource. */ | ||
439 | list_for_each_entry(cs, &clocksource_list, list) { | ||
440 | if (strcmp(cs->name, override_name) != 0) | ||
441 | continue; | ||
442 | /* | ||
443 | * Check to make sure we don't switch to a non-highres | ||
444 | * capable clocksource if the tick code is in oneshot | ||
445 | * mode (highres or nohz) | ||
446 | */ | ||
447 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | ||
448 | tick_oneshot_mode_active()) { | ||
449 | /* Override clocksource cannot be used. */ | ||
450 | printk(KERN_WARNING "Override clocksource %s is not " | ||
451 | "HRT compatible. Cannot switch while in " | ||
452 | "HRT/NOHZ mode\n", cs->name); | ||
453 | override_name[0] = 0; | ||
454 | } else | ||
455 | /* Override clocksource can be used. */ | ||
456 | best = cs; | ||
457 | break; | ||
458 | } | ||
459 | if (curr_clocksource != best) { | ||
460 | printk(KERN_INFO "Switching to clocksource %s\n", best->name); | ||
461 | curr_clocksource = best; | ||
462 | timekeeping_notify(curr_clocksource); | ||
335 | } | 463 | } |
336 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
337 | |||
338 | return curr_clocksource; | ||
339 | } | 464 | } |
340 | 465 | ||
341 | /** | 466 | #else /* CONFIG_GENERIC_TIME */ |
342 | * select_clocksource - Selects the best registered clocksource. | 467 | |
343 | * | 468 | static inline void clocksource_select(void) { } |
344 | * Private function. Must hold clocksource_lock when called. | 469 | |
470 | #endif | ||
471 | |||
472 | /* | ||
473 | * clocksource_done_booting - Called near the end of core bootup | ||
345 | * | 474 | * |
346 | * Select the clocksource with the best rating, or the clocksource, | 475 | * Hack to avoid lots of clocksource churn at boot time. |
347 | * which is selected by userspace override. | 476 | * We use fs_initcall because we want this to start before |
477 | * device_initcall but after subsys_initcall. | ||
348 | */ | 478 | */ |
349 | static struct clocksource *select_clocksource(void) | 479 | static int __init clocksource_done_booting(void) |
350 | { | 480 | { |
351 | struct clocksource *next; | 481 | finished_booting = 1; |
352 | |||
353 | if (list_empty(&clocksource_list)) | ||
354 | return NULL; | ||
355 | |||
356 | if (clocksource_override) | ||
357 | next = clocksource_override; | ||
358 | else | ||
359 | next = list_entry(clocksource_list.next, struct clocksource, | ||
360 | list); | ||
361 | 482 | ||
362 | if (next == curr_clocksource) | 483 | /* |
363 | return NULL; | 484 | * Run the watchdog first to eliminate unstable clock sources |
485 | */ | ||
486 | clocksource_watchdog_kthread(NULL); | ||
364 | 487 | ||
365 | return next; | 488 | mutex_lock(&clocksource_mutex); |
489 | clocksource_select(); | ||
490 | mutex_unlock(&clocksource_mutex); | ||
491 | return 0; | ||
366 | } | 492 | } |
493 | fs_initcall(clocksource_done_booting); | ||
367 | 494 | ||
368 | /* | 495 | /* |
369 | * Enqueue the clocksource sorted by rating | 496 | * Enqueue the clocksource sorted by rating |
370 | */ | 497 | */ |
371 | static int clocksource_enqueue(struct clocksource *c) | 498 | static void clocksource_enqueue(struct clocksource *cs) |
372 | { | 499 | { |
373 | struct list_head *tmp, *entry = &clocksource_list; | 500 | struct list_head *entry = &clocksource_list; |
501 | struct clocksource *tmp; | ||
374 | 502 | ||
375 | list_for_each(tmp, &clocksource_list) { | 503 | list_for_each_entry(tmp, &clocksource_list, list) |
376 | struct clocksource *cs; | ||
377 | |||
378 | cs = list_entry(tmp, struct clocksource, list); | ||
379 | if (cs == c) | ||
380 | return -EBUSY; | ||
381 | /* Keep track of the place, where to insert */ | 504 | /* Keep track of the place, where to insert */ |
382 | if (cs->rating >= c->rating) | 505 | if (tmp->rating >= cs->rating) |
383 | entry = tmp; | 506 | entry = &tmp->list; |
384 | } | 507 | list_add(&cs->list, entry); |
385 | list_add(&c->list, entry); | ||
386 | |||
387 | if (strlen(c->name) == strlen(override_name) && | ||
388 | !strcmp(c->name, override_name)) | ||
389 | clocksource_override = c; | ||
390 | |||
391 | return 0; | ||
392 | } | 508 | } |
393 | 509 | ||
394 | /** | 510 | /** |
@@ -397,52 +513,48 @@ static int clocksource_enqueue(struct clocksource *c) | |||
397 | * | 513 | * |
398 | * Returns -EBUSY if registration fails, zero otherwise. | 514 | * Returns -EBUSY if registration fails, zero otherwise. |
399 | */ | 515 | */ |
400 | int clocksource_register(struct clocksource *c) | 516 | int clocksource_register(struct clocksource *cs) |
401 | { | 517 | { |
402 | unsigned long flags; | 518 | mutex_lock(&clocksource_mutex); |
403 | int ret; | 519 | clocksource_enqueue(cs); |
404 | 520 | clocksource_select(); | |
405 | spin_lock_irqsave(&clocksource_lock, flags); | 521 | clocksource_enqueue_watchdog(cs); |
406 | ret = clocksource_enqueue(c); | 522 | mutex_unlock(&clocksource_mutex); |
407 | if (!ret) | 523 | return 0; |
408 | next_clocksource = select_clocksource(); | ||
409 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
410 | if (!ret) | ||
411 | clocksource_check_watchdog(c); | ||
412 | return ret; | ||
413 | } | 524 | } |
414 | EXPORT_SYMBOL(clocksource_register); | 525 | EXPORT_SYMBOL(clocksource_register); |
415 | 526 | ||
527 | static void __clocksource_change_rating(struct clocksource *cs, int rating) | ||
528 | { | ||
529 | list_del(&cs->list); | ||
530 | cs->rating = rating; | ||
531 | clocksource_enqueue(cs); | ||
532 | clocksource_select(); | ||
533 | } | ||
534 | |||
416 | /** | 535 | /** |
417 | * clocksource_change_rating - Change the rating of a registered clocksource | 536 | * clocksource_change_rating - Change the rating of a registered clocksource |
418 | * | ||
419 | */ | 537 | */ |
420 | void clocksource_change_rating(struct clocksource *cs, int rating) | 538 | void clocksource_change_rating(struct clocksource *cs, int rating) |
421 | { | 539 | { |
422 | unsigned long flags; | 540 | mutex_lock(&clocksource_mutex); |
423 | 541 | __clocksource_change_rating(cs, rating); | |
424 | spin_lock_irqsave(&clocksource_lock, flags); | 542 | mutex_unlock(&clocksource_mutex); |
425 | list_del(&cs->list); | ||
426 | cs->rating = rating; | ||
427 | clocksource_enqueue(cs); | ||
428 | next_clocksource = select_clocksource(); | ||
429 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
430 | } | 543 | } |
544 | EXPORT_SYMBOL(clocksource_change_rating); | ||
431 | 545 | ||
432 | /** | 546 | /** |
433 | * clocksource_unregister - remove a registered clocksource | 547 | * clocksource_unregister - remove a registered clocksource |
434 | */ | 548 | */ |
435 | void clocksource_unregister(struct clocksource *cs) | 549 | void clocksource_unregister(struct clocksource *cs) |
436 | { | 550 | { |
437 | unsigned long flags; | 551 | mutex_lock(&clocksource_mutex); |
438 | 552 | clocksource_dequeue_watchdog(cs); | |
439 | spin_lock_irqsave(&clocksource_lock, flags); | ||
440 | list_del(&cs->list); | 553 | list_del(&cs->list); |
441 | if (clocksource_override == cs) | 554 | clocksource_select(); |
442 | clocksource_override = NULL; | 555 | mutex_unlock(&clocksource_mutex); |
443 | next_clocksource = select_clocksource(); | ||
444 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
445 | } | 556 | } |
557 | EXPORT_SYMBOL(clocksource_unregister); | ||
446 | 558 | ||
447 | #ifdef CONFIG_SYSFS | 559 | #ifdef CONFIG_SYSFS |
448 | /** | 560 | /** |
@@ -458,9 +570,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, | |||
458 | { | 570 | { |
459 | ssize_t count = 0; | 571 | ssize_t count = 0; |
460 | 572 | ||
461 | spin_lock_irq(&clocksource_lock); | 573 | mutex_lock(&clocksource_mutex); |
462 | count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); | 574 | count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); |
463 | spin_unlock_irq(&clocksource_lock); | 575 | mutex_unlock(&clocksource_mutex); |
464 | 576 | ||
465 | return count; | 577 | return count; |
466 | } | 578 | } |
@@ -478,9 +590,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
478 | struct sysdev_attribute *attr, | 590 | struct sysdev_attribute *attr, |
479 | const char *buf, size_t count) | 591 | const char *buf, size_t count) |
480 | { | 592 | { |
481 | struct clocksource *ovr = NULL; | ||
482 | size_t ret = count; | 593 | size_t ret = count; |
483 | int len; | ||
484 | 594 | ||
485 | /* strings from sysfs write are not 0 terminated! */ | 595 | /* strings from sysfs write are not 0 terminated! */ |
486 | if (count >= sizeof(override_name)) | 596 | if (count >= sizeof(override_name)) |
@@ -490,44 +600,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
490 | if (buf[count-1] == '\n') | 600 | if (buf[count-1] == '\n') |
491 | count--; | 601 | count--; |
492 | 602 | ||
493 | spin_lock_irq(&clocksource_lock); | 603 | mutex_lock(&clocksource_mutex); |
494 | 604 | ||
495 | if (count > 0) | 605 | if (count > 0) |
496 | memcpy(override_name, buf, count); | 606 | memcpy(override_name, buf, count); |
497 | override_name[count] = 0; | 607 | override_name[count] = 0; |
608 | clocksource_select(); | ||
498 | 609 | ||
499 | len = strlen(override_name); | 610 | mutex_unlock(&clocksource_mutex); |
500 | if (len) { | ||
501 | struct clocksource *cs; | ||
502 | |||
503 | ovr = clocksource_override; | ||
504 | /* try to select it: */ | ||
505 | list_for_each_entry(cs, &clocksource_list, list) { | ||
506 | if (strlen(cs->name) == len && | ||
507 | !strcmp(cs->name, override_name)) | ||
508 | ovr = cs; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Check to make sure we don't switch to a non-highres capable | ||
514 | * clocksource if the tick code is in oneshot mode (highres or nohz) | ||
515 | */ | ||
516 | if (tick_oneshot_mode_active() && ovr && | ||
517 | !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { | ||
518 | printk(KERN_WARNING "%s clocksource is not HRT compatible. " | ||
519 | "Cannot switch while in HRT/NOHZ mode\n", ovr->name); | ||
520 | ovr = NULL; | ||
521 | override_name[0] = 0; | ||
522 | } | ||
523 | |||
524 | /* Reselect, when the override name has changed */ | ||
525 | if (ovr != clocksource_override) { | ||
526 | clocksource_override = ovr; | ||
527 | next_clocksource = select_clocksource(); | ||
528 | } | ||
529 | |||
530 | spin_unlock_irq(&clocksource_lock); | ||
531 | 611 | ||
532 | return ret; | 612 | return ret; |
533 | } | 613 | } |
@@ -547,7 +627,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, | |||
547 | struct clocksource *src; | 627 | struct clocksource *src; |
548 | ssize_t count = 0; | 628 | ssize_t count = 0; |
549 | 629 | ||
550 | spin_lock_irq(&clocksource_lock); | 630 | mutex_lock(&clocksource_mutex); |
551 | list_for_each_entry(src, &clocksource_list, list) { | 631 | list_for_each_entry(src, &clocksource_list, list) { |
552 | /* | 632 | /* |
553 | * Don't show non-HRES clocksource if the tick code is | 633 | * Don't show non-HRES clocksource if the tick code is |
@@ -559,7 +639,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, | |||
559 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), | 639 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), |
560 | "%s ", src->name); | 640 | "%s ", src->name); |
561 | } | 641 | } |
562 | spin_unlock_irq(&clocksource_lock); | 642 | mutex_unlock(&clocksource_mutex); |
563 | 643 | ||
564 | count += snprintf(buf + count, | 644 | count += snprintf(buf + count, |
565 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); | 645 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); |
@@ -614,11 +694,10 @@ device_initcall(init_clocksource_sysfs); | |||
614 | */ | 694 | */ |
615 | static int __init boot_override_clocksource(char* str) | 695 | static int __init boot_override_clocksource(char* str) |
616 | { | 696 | { |
617 | unsigned long flags; | 697 | mutex_lock(&clocksource_mutex); |
618 | spin_lock_irqsave(&clocksource_lock, flags); | ||
619 | if (str) | 698 | if (str) |
620 | strlcpy(override_name, str, sizeof(override_name)); | 699 | strlcpy(override_name, str, sizeof(override_name)); |
621 | spin_unlock_irqrestore(&clocksource_lock, flags); | 700 | mutex_unlock(&clocksource_mutex); |
622 | return 1; | 701 | return 1; |
623 | } | 702 | } |
624 | 703 | ||