aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-07-04 16:46:45 -0400
committerThomas Gleixner <tglx@linutronix.de>2013-07-05 05:09:28 -0400
commit332962f2c88868ed3cdab466870baaa34dd58612 (patch)
treed670139ba37bca7b8f5a5179f401c8694636285c /kernel
parent2b0f89317e99735bbf32eaede81f707f98ab1b5e (diff)
clocksource: Reselect clocksource when watchdog validated high-res capability
Up to commit 5d33b883a (clocksource: Always verify highres capability) we had no sanity check when selecting a clocksource, which prevented that a non highres capable clocksource is used when the system already switched to highres/nohz mode. The new sanity check works as Alex and Tim found out. It prevents the TSC from being used. This happens because on x86 the boot process looks like this: tsc_start_freqency_validation(TSC); clocksource_register(HPET); clocksource_done_booting(); clocksource_select() Selects HPET which is valid for high-res switch_to_highres(); clocksource_register(TSC); TSC is not selected, because it is not yet flagged as VALID_HIGH_RES clocksource_watchdog() Validates TSC for highres, but that does not make TSC the current clocksource. Before the sanity check was added, we installed TSC unvalidated which worked most of the time. If the TSC was really detected as unstable, then the unstable logic removed it and installed HPET again. The sanity check is correct and needed. So the watchdog needs to kick a reselection of the clocksource, when it qualifies TSC as a valid high res clocksource. To solve this, we mark the clocksource which got the flag CLOCK_SOURCE_VALID_FOR_HRES set by the watchdog with an new flag CLOCK_SOURCE_RESELECT and trigger the watchdog thread. The watchdog thread evaluates the flag and invokes clocksource_select() when set. To avoid that the clocksource_done_booting() code, which is about to install the first real clocksource anyway, needs to go through clocksource_select and tick_oneshot_notify() pointlessly, split out the clocksource_watchdog_kthread() list walk code and invoke the select/notify only when called from clocksource_watchdog_kthread(). So clocksource_done_booting() can utilize the same splitout code without the select/notify invocation and the clocksource_mutex unlock/relock dance. Reported-and-tested-by: Alex Shi <alex.shi@intel.com> Cc: Hans Peter Anvin <hpa@linux.intel.com> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Andi Kleen <andi.kleen@intel.com> Tested-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Ingo Molnar <mingo@kernel.org> Cc: Davidlohr Bueso <davidlohr.bueso@hp.com> Cc: John Stultz <john.stultz@linaro.org> Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307042239150.11637@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/time/clocksource.c57
1 files changed, 42 insertions, 15 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e713ef7d19a7..50a8736757f3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -181,6 +181,7 @@ static int finished_booting;
181 181
182#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 182#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
183static void clocksource_watchdog_work(struct work_struct *work); 183static void clocksource_watchdog_work(struct work_struct *work);
184static void clocksource_select(void);
184 185
185static LIST_HEAD(watchdog_list); 186static LIST_HEAD(watchdog_list);
186static struct clocksource *watchdog; 187static struct clocksource *watchdog;
@@ -301,13 +302,30 @@ static void clocksource_watchdog(unsigned long data)
301 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 302 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
302 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 303 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
303 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 304 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
305 /* Mark it valid for high-res. */
304 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 306 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
307
308 /*
309 * clocksource_done_booting() will sort it if
310 * finished_booting is not set yet.
311 */
312 if (!finished_booting)
313 continue;
314
305 /* 315 /*
306 * We just marked the clocksource as highres-capable, 316 * If this is not the current clocksource let
307 * notify the rest of the system as well so that we 317 * the watchdog thread reselect it. Due to the
308 * transition into high-res mode: 318 * change to high res this clocksource might
319 * be preferred now. If it is the current
320 * clocksource let the tick code know about
321 * that change.
309 */ 322 */
310 tick_clock_notify(); 323 if (cs != curr_clocksource) {
324 cs->flags |= CLOCK_SOURCE_RESELECT;
325 schedule_work(&watchdog_work);
326 } else {
327 tick_clock_notify();
328 }
311 } 329 }
312 } 330 }
313 331
@@ -404,19 +422,25 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
404 spin_unlock_irqrestore(&watchdog_lock, flags); 422 spin_unlock_irqrestore(&watchdog_lock, flags);
405} 423}
406 424
407static int clocksource_watchdog_kthread(void *data) 425static int __clocksource_watchdog_kthread(void)
408{ 426{
409 struct clocksource *cs, *tmp; 427 struct clocksource *cs, *tmp;
410 unsigned long flags; 428 unsigned long flags;
411 LIST_HEAD(unstable); 429 LIST_HEAD(unstable);
430 int select = 0;
412 431
413 mutex_lock(&clocksource_mutex);
414 spin_lock_irqsave(&watchdog_lock, flags); 432 spin_lock_irqsave(&watchdog_lock, flags);
415 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) 433 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
416 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 434 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
417 list_del_init(&cs->wd_list); 435 list_del_init(&cs->wd_list);
418 list_add(&cs->wd_list, &unstable); 436 list_add(&cs->wd_list, &unstable);
437 select = 1;
438 }
439 if (cs->flags & CLOCK_SOURCE_RESELECT) {
440 cs->flags &= ~CLOCK_SOURCE_RESELECT;
441 select = 1;
419 } 442 }
443 }
420 /* Check if the watchdog timer needs to be stopped. */ 444 /* Check if the watchdog timer needs to be stopped. */
421 clocksource_stop_watchdog(); 445 clocksource_stop_watchdog();
422 spin_unlock_irqrestore(&watchdog_lock, flags); 446 spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -426,6 +450,14 @@ static int clocksource_watchdog_kthread(void *data)
426 list_del_init(&cs->wd_list); 450 list_del_init(&cs->wd_list);
427 __clocksource_change_rating(cs, 0); 451 __clocksource_change_rating(cs, 0);
428 } 452 }
453 return select;
454}
455
456static int clocksource_watchdog_kthread(void *data)
457{
458 mutex_lock(&clocksource_mutex);
459 if (__clocksource_watchdog_kthread())
460 clocksource_select();
429 mutex_unlock(&clocksource_mutex); 461 mutex_unlock(&clocksource_mutex);
430 return 0; 462 return 0;
431} 463}
@@ -445,7 +477,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
445 477
446static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 478static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
447static inline void clocksource_resume_watchdog(void) { } 479static inline void clocksource_resume_watchdog(void) { }
448static inline int clocksource_watchdog_kthread(void *data) { return 0; } 480static inline int __clocksource_watchdog_kthread(void) { return 0; }
449static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } 481static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
450 482
451#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 483#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
@@ -647,16 +679,11 @@ static int __init clocksource_done_booting(void)
647{ 679{
648 mutex_lock(&clocksource_mutex); 680 mutex_lock(&clocksource_mutex);
649 curr_clocksource = clocksource_default_clock(); 681 curr_clocksource = clocksource_default_clock();
650 mutex_unlock(&clocksource_mutex);
651
652 finished_booting = 1; 682 finished_booting = 1;
653
654 /* 683 /*
655 * Run the watchdog first to eliminate unstable clock sources 684 * Run the watchdog first to eliminate unstable clock sources
656 */ 685 */
657 clocksource_watchdog_kthread(NULL); 686 __clocksource_watchdog_kthread();
658
659 mutex_lock(&clocksource_mutex);
660 clocksource_select(); 687 clocksource_select();
661 mutex_unlock(&clocksource_mutex); 688 mutex_unlock(&clocksource_mutex);
662 return 0; 689 return 0;
@@ -789,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
789 list_del(&cs->list); 816 list_del(&cs->list);
790 cs->rating = rating; 817 cs->rating = rating;
791 clocksource_enqueue(cs); 818 clocksource_enqueue(cs);
792 clocksource_select();
793} 819}
794 820
795/** 821/**
@@ -801,6 +827,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
801{ 827{
802 mutex_lock(&clocksource_mutex); 828 mutex_lock(&clocksource_mutex);
803 __clocksource_change_rating(cs, rating); 829 __clocksource_change_rating(cs, rating);
830 clocksource_select();
804 mutex_unlock(&clocksource_mutex); 831 mutex_unlock(&clocksource_mutex);
805} 832}
806EXPORT_SYMBOL(clocksource_change_rating); 833EXPORT_SYMBOL(clocksource_change_rating);