diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2013-07-04 16:46:45 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2013-07-05 05:09:28 -0400 |
commit | 332962f2c88868ed3cdab466870baaa34dd58612 (patch) | |
tree | d670139ba37bca7b8f5a5179f401c8694636285c /kernel/time | |
parent | 2b0f89317e99735bbf32eaede81f707f98ab1b5e (diff) |
clocksource: Reselect clocksource when watchdog validated high-res capability
Up to commit 5d33b883a (clocksource: Always verify highres capability)
we had no sanity check when selecting a clocksource, which prevented
that a non highres capable clocksource is used when the system already
switched to highres/nohz mode.
The new sanity check works as Alex and Tim found out. It prevents the
TSC from being used. This happens because on x86 the boot process
looks like this:
tsc_start_freqency_validation(TSC);
clocksource_register(HPET);
clocksource_done_booting();
clocksource_select()
Selects HPET which is valid for high-res
switch_to_highres();
clocksource_register(TSC);
TSC is not selected, because it is not yet
flagged as VALID_HIGH_RES
clocksource_watchdog()
Validates TSC for highres, but that does not make TSC
the current clocksource.
Before the sanity check was added, we installed TSC unvalidated which
worked most of the time. If the TSC was really detected as unstable,
then the unstable logic removed it and installed HPET again.
The sanity check is correct and needed. So the watchdog needs to kick
a reselection of the clocksource, when it qualifies TSC as a valid
high res clocksource.
To solve this, we mark the clocksource which got the flag
CLOCK_SOURCE_VALID_FOR_HRES set by the watchdog with an new flag
CLOCK_SOURCE_RESELECT and trigger the watchdog thread. The watchdog
thread evaluates the flag and invokes clocksource_select() when set.
To avoid that the clocksource_done_booting() code, which is about to
install the first real clocksource anyway, needs to go through
clocksource_select and tick_oneshot_notify() pointlessly, split out
the clocksource_watchdog_kthread() list walk code and invoke the
select/notify only when called from clocksource_watchdog_kthread().
So clocksource_done_booting() can utilize the same splitout code
without the select/notify invocation and the clocksource_mutex
unlock/relock dance.
Reported-and-tested-by: Alex Shi <alex.shi@intel.com>
Cc: Hans Peter Anvin <hpa@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Tested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1307042239150.11637@ionos.tec.linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/clocksource.c | 57 |
1 files changed, 42 insertions, 15 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e713ef7d19a7..50a8736757f3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -181,6 +181,7 @@ static int finished_booting; | |||
181 | 181 | ||
182 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | 182 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG |
183 | static void clocksource_watchdog_work(struct work_struct *work); | 183 | static void clocksource_watchdog_work(struct work_struct *work); |
184 | static void clocksource_select(void); | ||
184 | 185 | ||
185 | static LIST_HEAD(watchdog_list); | 186 | static LIST_HEAD(watchdog_list); |
186 | static struct clocksource *watchdog; | 187 | static struct clocksource *watchdog; |
@@ -301,13 +302,30 @@ static void clocksource_watchdog(unsigned long data) | |||
301 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | 302 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && |
302 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | 303 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && |
303 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | 304 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { |
305 | /* Mark it valid for high-res. */ | ||
304 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 306 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
307 | |||
308 | /* | ||
309 | * clocksource_done_booting() will sort it if | ||
310 | * finished_booting is not set yet. | ||
311 | */ | ||
312 | if (!finished_booting) | ||
313 | continue; | ||
314 | |||
305 | /* | 315 | /* |
306 | * We just marked the clocksource as highres-capable, | 316 | * If this is not the current clocksource let |
307 | * notify the rest of the system as well so that we | 317 | * the watchdog thread reselect it. Due to the |
308 | * transition into high-res mode: | 318 | * change to high res this clocksource might |
319 | * be preferred now. If it is the current | ||
320 | * clocksource let the tick code know about | ||
321 | * that change. | ||
309 | */ | 322 | */ |
310 | tick_clock_notify(); | 323 | if (cs != curr_clocksource) { |
324 | cs->flags |= CLOCK_SOURCE_RESELECT; | ||
325 | schedule_work(&watchdog_work); | ||
326 | } else { | ||
327 | tick_clock_notify(); | ||
328 | } | ||
311 | } | 329 | } |
312 | } | 330 | } |
313 | 331 | ||
@@ -404,19 +422,25 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) | |||
404 | spin_unlock_irqrestore(&watchdog_lock, flags); | 422 | spin_unlock_irqrestore(&watchdog_lock, flags); |
405 | } | 423 | } |
406 | 424 | ||
407 | static int clocksource_watchdog_kthread(void *data) | 425 | static int __clocksource_watchdog_kthread(void) |
408 | { | 426 | { |
409 | struct clocksource *cs, *tmp; | 427 | struct clocksource *cs, *tmp; |
410 | unsigned long flags; | 428 | unsigned long flags; |
411 | LIST_HEAD(unstable); | 429 | LIST_HEAD(unstable); |
430 | int select = 0; | ||
412 | 431 | ||
413 | mutex_lock(&clocksource_mutex); | ||
414 | spin_lock_irqsave(&watchdog_lock, flags); | 432 | spin_lock_irqsave(&watchdog_lock, flags); |
415 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) | 433 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { |
416 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { | 434 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { |
417 | list_del_init(&cs->wd_list); | 435 | list_del_init(&cs->wd_list); |
418 | list_add(&cs->wd_list, &unstable); | 436 | list_add(&cs->wd_list, &unstable); |
437 | select = 1; | ||
438 | } | ||
439 | if (cs->flags & CLOCK_SOURCE_RESELECT) { | ||
440 | cs->flags &= ~CLOCK_SOURCE_RESELECT; | ||
441 | select = 1; | ||
419 | } | 442 | } |
443 | } | ||
420 | /* Check if the watchdog timer needs to be stopped. */ | 444 | /* Check if the watchdog timer needs to be stopped. */ |
421 | clocksource_stop_watchdog(); | 445 | clocksource_stop_watchdog(); |
422 | spin_unlock_irqrestore(&watchdog_lock, flags); | 446 | spin_unlock_irqrestore(&watchdog_lock, flags); |
@@ -426,6 +450,14 @@ static int clocksource_watchdog_kthread(void *data) | |||
426 | list_del_init(&cs->wd_list); | 450 | list_del_init(&cs->wd_list); |
427 | __clocksource_change_rating(cs, 0); | 451 | __clocksource_change_rating(cs, 0); |
428 | } | 452 | } |
453 | return select; | ||
454 | } | ||
455 | |||
456 | static int clocksource_watchdog_kthread(void *data) | ||
457 | { | ||
458 | mutex_lock(&clocksource_mutex); | ||
459 | if (__clocksource_watchdog_kthread()) | ||
460 | clocksource_select(); | ||
429 | mutex_unlock(&clocksource_mutex); | 461 | mutex_unlock(&clocksource_mutex); |
430 | return 0; | 462 | return 0; |
431 | } | 463 | } |
@@ -445,7 +477,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
445 | 477 | ||
446 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } | 478 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } |
447 | static inline void clocksource_resume_watchdog(void) { } | 479 | static inline void clocksource_resume_watchdog(void) { } |
448 | static inline int clocksource_watchdog_kthread(void *data) { return 0; } | 480 | static inline int __clocksource_watchdog_kthread(void) { return 0; } |
449 | static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } | 481 | static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } |
450 | 482 | ||
451 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ | 483 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ |
@@ -647,16 +679,11 @@ static int __init clocksource_done_booting(void) | |||
647 | { | 679 | { |
648 | mutex_lock(&clocksource_mutex); | 680 | mutex_lock(&clocksource_mutex); |
649 | curr_clocksource = clocksource_default_clock(); | 681 | curr_clocksource = clocksource_default_clock(); |
650 | mutex_unlock(&clocksource_mutex); | ||
651 | |||
652 | finished_booting = 1; | 682 | finished_booting = 1; |
653 | |||
654 | /* | 683 | /* |
655 | * Run the watchdog first to eliminate unstable clock sources | 684 | * Run the watchdog first to eliminate unstable clock sources |
656 | */ | 685 | */ |
657 | clocksource_watchdog_kthread(NULL); | 686 | __clocksource_watchdog_kthread(); |
658 | |||
659 | mutex_lock(&clocksource_mutex); | ||
660 | clocksource_select(); | 687 | clocksource_select(); |
661 | mutex_unlock(&clocksource_mutex); | 688 | mutex_unlock(&clocksource_mutex); |
662 | return 0; | 689 | return 0; |
@@ -789,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating) | |||
789 | list_del(&cs->list); | 816 | list_del(&cs->list); |
790 | cs->rating = rating; | 817 | cs->rating = rating; |
791 | clocksource_enqueue(cs); | 818 | clocksource_enqueue(cs); |
792 | clocksource_select(); | ||
793 | } | 819 | } |
794 | 820 | ||
795 | /** | 821 | /** |
@@ -801,6 +827,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) | |||
801 | { | 827 | { |
802 | mutex_lock(&clocksource_mutex); | 828 | mutex_lock(&clocksource_mutex); |
803 | __clocksource_change_rating(cs, rating); | 829 | __clocksource_change_rating(cs, rating); |
830 | clocksource_select(); | ||
804 | mutex_unlock(&clocksource_mutex); | 831 | mutex_unlock(&clocksource_mutex); |
805 | } | 832 | } |
806 | EXPORT_SYMBOL(clocksource_change_rating); | 833 | EXPORT_SYMBOL(clocksource_change_rating); |