From f1b82746c1e93daf24e1ab9bfbd39bcdb2e7018b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:21 +0200 Subject: clocksource: Cleanup clocksource selection If a non high-resolution clocksource is first set as override clock and then registered it becomes active even if the system is in one-shot mode. Move the override check from sysfs_override_clocksource to the clocksource selection. That fixes the bug and simplifies the code. The check in clocksource_register for double registration of the same clocksource is removed without replacement. To find the initial clocksource a new weak function in jiffies.c is defined that returns the jiffies clocksource. The architecture code can then override the weak function with a more suitable clocksource, e.g. the TOD clock on s390. [ tglx: Folded in a fix from John Stultz ] Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.388024160@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 134 ++++++++++++++++++---------------------------- 1 file changed, 51 insertions(+), 83 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb811251..e91662e87cde 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -21,7 +21,6 @@ * * TODO WishList: * o Allow clocksource drivers to be unregistered - * o get rid of clocksource_jiffies extern */ #include @@ -107,12 +106,9 @@ u64 timecounter_cyc2time(struct timecounter *tc, } EXPORT_SYMBOL(timecounter_cyc2time); -/* XXX - Would like a better way for initializing curr_clocksource */ -extern struct clocksource clocksource_jiffies; - /*[Clocksource internal variables]--------- * curr_clocksource: - * currently selected clocksource. Initialized to clocksource_jiffies. + * currently selected clocksource. * next_clocksource: * pending next selected clocksource. * clocksource_list: @@ -123,9 +119,8 @@ extern struct clocksource clocksource_jiffies; * override_name: * Name of the user-specified clocksource. */ -static struct clocksource *curr_clocksource = &clocksource_jiffies; +static struct clocksource *curr_clocksource; static struct clocksource *next_clocksource; -static struct clocksource *clocksource_override; static LIST_HEAD(clocksource_list); static DEFINE_SPINLOCK(clocksource_lock); static char override_name[32]; @@ -320,6 +315,7 @@ void clocksource_touch_watchdog(void) clocksource_resume_watchdog(); } +#ifdef CONFIG_GENERIC_TIME /** * clocksource_get_next - Returns the selected clocksource * @@ -339,56 +335,65 @@ struct clocksource *clocksource_get_next(void) } /** - * select_clocksource - Selects the best registered clocksource. + * clocksource_select - Select the best clocksource available * * Private function. Must hold clocksource_lock when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. */ -static struct clocksource *select_clocksource(void) +static void clocksource_select(void) { - struct clocksource *next; + struct clocksource *best, *cs; if (list_empty(&clocksource_list)) - return NULL; + return; + /* First clocksource on the list has the best rating. */ + best = list_first_entry(&clocksource_list, struct clocksource, list); + /* Check for the override clocksource. */ + list_for_each_entry(cs, &clocksource_list, list) { + if (strcmp(cs->name, override_name) != 0) + continue; + /* + * Check to make sure we don't switch to a non-highres + * capable clocksource if the tick code is in oneshot + * mode (highres or nohz) + */ + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && + tick_oneshot_mode_active()) { + /* Override clocksource cannot be used. */ + printk(KERN_WARNING "Override clocksource %s is not " + "HRT compatible. Cannot switch while in " + "HRT/NOHZ mode\n", cs->name); + override_name[0] = 0; + } else + /* Override clocksource can be used. */ + best = cs; + break; + } + if (curr_clocksource != best) + next_clocksource = best; +} - if (clocksource_override) - next = clocksource_override; - else - next = list_entry(clocksource_list.next, struct clocksource, - list); +#else /* CONFIG_GENERIC_TIME */ - if (next == curr_clocksource) - return NULL; +static void clocksource_select(void) { } - return next; -} +#endif /* * Enqueue the clocksource sorted by rating */ -static int clocksource_enqueue(struct clocksource *c) +static void clocksource_enqueue(struct clocksource *cs) { - struct list_head *tmp, *entry = &clocksource_list; - - list_for_each(tmp, &clocksource_list) { - struct clocksource *cs; + struct list_head *entry = &clocksource_list; + struct clocksource *tmp; - cs = list_entry(tmp, struct clocksource, list); - if (cs == c) - return -EBUSY; + list_for_each_entry(tmp, &clocksource_list, list) /* Keep track of the place, where to insert */ - if (cs->rating >= c->rating) - entry = tmp; - } - list_add(&c->list, entry); - - if (strlen(c->name) == strlen(override_name) && - !strcmp(c->name, override_name)) - clocksource_override = c; - - return 0; + if (tmp->rating >= cs->rating) + entry = &tmp->list; + list_add(&cs->list, entry); } /** @@ -397,19 +402,16 @@ static int clocksource_enqueue(struct clocksource *c) * * Returns -EBUSY if registration fails, zero otherwise. */ -int clocksource_register(struct clocksource *c) +int clocksource_register(struct clocksource *cs) { unsigned long flags; - int ret; spin_lock_irqsave(&clocksource_lock, flags); - ret = clocksource_enqueue(c); - if (!ret) - next_clocksource = select_clocksource(); + clocksource_enqueue(cs); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); - if (!ret) - clocksource_check_watchdog(c); - return ret; + clocksource_check_watchdog(cs); + return 0; } EXPORT_SYMBOL(clocksource_register); @@ -425,7 +427,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -438,9 +440,7 @@ void clocksource_unregister(struct clocksource *cs) spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); - if (clocksource_override == cs) - clocksource_override = NULL; - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -478,9 +478,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { - struct clocksource *ovr = NULL; size_t ret = count; - int len; /* strings from sysfs write are not 0 terminated! */ if (count >= sizeof(override_name)) @@ -495,37 +493,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; - - len = strlen(override_name); - if (len) { - struct clocksource *cs; - - ovr = clocksource_override; - /* try to select it: */ - list_for_each_entry(cs, &clocksource_list, list) { - if (strlen(cs->name) == len && - !strcmp(cs->name, override_name)) - ovr = cs; - } - } - - /* - * Check to make sure we don't switch to a non-highres capable - * clocksource if the tick code is in oneshot mode (highres or nohz) - */ - if (tick_oneshot_mode_active() && ovr && - !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { - printk(KERN_WARNING "%s clocksource is not HRT compatible. " - "Cannot switch while in HRT/NOHZ mode\n", ovr->name); - ovr = NULL; - override_name[0] = 0; - } - - /* Reselect, when the override name has changed */ - if (ovr != clocksource_override) { - clocksource_override = ovr; - next_clocksource = select_clocksource(); - } + clocksource_select(); spin_unlock_irq(&clocksource_lock); -- cgit v1.2.2 From 8cf4e750f8459d51c2e8a035a201da4bf7aa996a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:22 +0200 Subject: clocksource: Delay clocksource watchdog highres enablement The clocksource watchdog marks a clock as highres capable before it checked the deviation from the watchdog clocksource even for a single time. Make sure that the deviation is at least checked once before doing the switch to highres mode. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.627795883@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e91662e87cde..76256c5aecb8 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -153,11 +153,8 @@ static unsigned long watchdog_resumed; #define WATCHDOG_INTERVAL (HZ >> 1) #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) -static void clocksource_ratewd(struct clocksource *cs, int64_t delta) +static void clocksource_unstable(struct clocksource *cs, int64_t delta) { - if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) - return; - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); @@ -183,31 +180,31 @@ static void clocksource_watchdog(unsigned long data) list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { csnow = cs->read(cs); - if (unlikely(resumed)) { + /* Clocksource initialized ? */ + if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { + cs->flags |= CLOCK_SOURCE_WATCHDOG; cs->wd_last = csnow; continue; } - /* Initialized ? */ - if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { - if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && - (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { - cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; - /* - * We just marked the clocksource as - * highres-capable, notify the rest of the - * system as well so that we transition - * into high-res mode: - */ - tick_clock_notify(); - } - cs->flags |= CLOCK_SOURCE_WATCHDOG; - cs->wd_last = csnow; - } else { - cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); - cs->wd_last = csnow; - /* Check the delta. Might remove from the list ! */ - clocksource_ratewd(cs, cs_nsec - wd_nsec); + /* Check the deviation from the watchdog clocksource. */ + cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); + cs->wd_last = csnow; + if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { + clocksource_unstable(cs, cs_nsec - wd_nsec); + continue; + } + + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && + (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && + (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { + cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; + /* + * We just marked the clocksource as highres-capable, + * notify the rest of the system as well so that we + * transition into high-res mode: + */ + tick_clock_notify(); } } -- cgit v1.2.2 From 0f8e8ef7c204988246da5a42d576b7fa5277a8e4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:23 +0200 Subject: clocksource: Simplify clocksource watchdog resume logic To resume the clocksource watchdog just remove the CLOCK_SOURCE_WATCHDOG bit from the watched clocksource. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.880925790@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 76256c5aecb8..89a7b91bfbdd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -145,7 +145,6 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; -static unsigned long watchdog_resumed; /* * Interval: 0.5sec Threshold: 0.0625s @@ -167,12 +166,9 @@ static void clocksource_watchdog(unsigned long data) struct clocksource *cs, *tmp; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; - int resumed; spin_lock(&watchdog_lock); - resumed = test_and_clear_bit(0, &watchdog_resumed); - wdnow = watchdog->read(watchdog); wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; @@ -223,14 +219,26 @@ static void clocksource_watchdog(unsigned long data) } spin_unlock(&watchdog_lock); } + +static inline void clocksource_reset_watchdog(void) +{ + struct clocksource *cs; + + list_for_each_entry(cs, &watchdog_list, wd_list) + cs->flags &= ~CLOCK_SOURCE_WATCHDOG; +} + static void clocksource_resume_watchdog(void) { - set_bit(0, &watchdog_resumed); + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + clocksource_reset_watchdog(); + spin_unlock_irqrestore(&watchdog_lock, flags); } static void clocksource_check_watchdog(struct clocksource *cs) { - struct clocksource *cse; unsigned long flags; spin_lock_irqsave(&watchdog_lock, flags); @@ -256,8 +264,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) watchdog_timer.function = clocksource_watchdog; /* Reset watchdog cycles */ - list_for_each_entry(cse, &watchdog_list, wd_list) - cse->flags &= ~CLOCK_SOURCE_WATCHDOG; + clocksource_reset_watchdog(); /* Start if list is not empty */ if (!list_empty(&watchdog_list)) { watchdog_last = watchdog->read(watchdog); -- cgit v1.2.2 From fb63a0ebe615fba9de8c75ea44ded999d1e24c65 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:24 +0200 Subject: clocksource: Refactor clocksource watchdog Refactor clocksource watchdog code to make it more readable. Add clocksource_dequeue_watchdog to remove a clocksource from the watchdog list when it is unregistered. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.110881699@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 97 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 28 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 89a7b91bfbdd..56aaa749645d 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -145,6 +145,7 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; +static int watchdog_running; /* * Interval: 0.5sec Threshold: 0.0625s @@ -168,6 +169,8 @@ static void clocksource_watchdog(unsigned long data) int64_t wd_nsec, cs_nsec; spin_lock(&watchdog_lock); + if (!watchdog_running) + goto out; wdnow = watchdog->read(watchdog); wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); @@ -217,9 +220,30 @@ static void clocksource_watchdog(unsigned long data) watchdog_timer.expires += WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, next_cpu); } +out: spin_unlock(&watchdog_lock); } +static inline void clocksource_start_watchdog(void) +{ + if (watchdog_running || !watchdog || list_empty(&watchdog_list)) + return; + init_timer(&watchdog_timer); + watchdog_timer.function = clocksource_watchdog; + watchdog_last = watchdog->read(watchdog); + watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); + watchdog_running = 1; +} + +static inline void clocksource_stop_watchdog(void) +{ + if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) + return; + del_timer(&watchdog_timer); + watchdog_running = 0; +} + static inline void clocksource_reset_watchdog(void) { struct clocksource *cs; @@ -237,55 +261,70 @@ static void clocksource_resume_watchdog(void) spin_unlock_irqrestore(&watchdog_lock, flags); } -static void clocksource_check_watchdog(struct clocksource *cs) +static void clocksource_enqueue_watchdog(struct clocksource *cs) { unsigned long flags; spin_lock_irqsave(&watchdog_lock, flags); if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { - int started = !list_empty(&watchdog_list); - + /* cs is a clocksource to be watched. */ list_add(&cs->wd_list, &watchdog_list); - if (!started && watchdog) { - watchdog_last = watchdog->read(watchdog); - watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, - cpumask_first(cpu_online_mask)); - } + cs->flags &= ~CLOCK_SOURCE_WATCHDOG; } else { + /* cs is a watchdog. */ if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; - + /* Pick the best watchdog. */ if (!watchdog || cs->rating > watchdog->rating) { - if (watchdog) - del_timer(&watchdog_timer); watchdog = cs; - init_timer(&watchdog_timer); - watchdog_timer.function = clocksource_watchdog; - /* Reset watchdog cycles */ clocksource_reset_watchdog(); - /* Start if list is not empty */ - if (!list_empty(&watchdog_list)) { - watchdog_last = watchdog->read(watchdog); - watchdog_timer.expires = - jiffies + WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, - cpumask_first(cpu_online_mask)); - } } } + /* Check if the watchdog timer needs to be started. */ + clocksource_start_watchdog(); spin_unlock_irqrestore(&watchdog_lock, flags); } -#else -static void clocksource_check_watchdog(struct clocksource *cs) + +static void clocksource_dequeue_watchdog(struct clocksource *cs) +{ + struct clocksource *tmp; + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { + /* cs is a watched clocksource. */ + list_del_init(&cs->wd_list); + } else if (cs == watchdog) { + /* Reset watchdog cycles */ + clocksource_reset_watchdog(); + /* Current watchdog is removed. Find an alternative. */ + watchdog = NULL; + list_for_each_entry(tmp, &clocksource_list, list) { + if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) + continue; + if (!watchdog || tmp->rating > watchdog->rating) + watchdog = tmp; + } + } + cs->flags &= ~CLOCK_SOURCE_WATCHDOG; + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); + spin_unlock_irqrestore(&watchdog_lock, flags); +} + +#else /* CONFIG_CLOCKSOURCE_WATCHDOG */ + +static void clocksource_enqueue_watchdog(struct clocksource *cs) { if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; } +static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } static inline void clocksource_resume_watchdog(void) { } -#endif + +#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ /** * clocksource_resume - resume the clocksource(s) @@ -414,14 +453,13 @@ int clocksource_register(struct clocksource *cs) clocksource_enqueue(cs); clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); - clocksource_check_watchdog(cs); + clocksource_enqueue_watchdog(cs); return 0; } EXPORT_SYMBOL(clocksource_register); /** * clocksource_change_rating - Change the rating of a registered clocksource - * */ void clocksource_change_rating(struct clocksource *cs, int rating) { @@ -434,6 +472,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } +EXPORT_SYMBOL(clocksource_change_rating); /** * clocksource_unregister - remove a registered clocksource @@ -442,11 +481,13 @@ void clocksource_unregister(struct clocksource *cs) { unsigned long flags; + clocksource_dequeue_watchdog(cs); spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } +EXPORT_SYMBOL(clocksource_unregister); #ifdef CONFIG_SYSFS /** -- cgit v1.2.2 From c55c87c892c1875deace0c8fc28787335277fdf2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:25 +0200 Subject: clocksource: Move watchdog downgrade to a work queue thread Move the downgrade of an unstable clocksource from the timer interrupt context into the process context of a work queue thread. This is needed to be able to do the clocksource switch with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.354926067@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 56 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 56aaa749645d..f1508019bfb4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -143,10 +143,13 @@ fs_initcall(clocksource_done_booting); static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; static struct timer_list watchdog_timer; +static struct work_struct watchdog_work; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; static int watchdog_running; +static void clocksource_watchdog_work(struct work_struct *work); + /* * Interval: 0.5sec Threshold: 0.0625s */ @@ -158,15 +161,16 @@ static void clocksource_unstable(struct clocksource *cs, int64_t delta) printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); - clocksource_change_rating(cs, 0); - list_del(&cs->wd_list); + cs->flags |= CLOCK_SOURCE_UNSTABLE; + schedule_work(&watchdog_work); } static void clocksource_watchdog(unsigned long data) { - struct clocksource *cs, *tmp; + struct clocksource *cs; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; + int next_cpu; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -176,7 +180,12 @@ static void clocksource_watchdog(unsigned long data) wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; - list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { + list_for_each_entry(cs, &watchdog_list, wd_list) { + + /* Clocksource already marked unstable? */ + if (cs->flags & CLOCK_SOURCE_UNSTABLE) + continue; + csnow = cs->read(cs); /* Clocksource initialized ? */ @@ -207,19 +216,15 @@ static void clocksource_watchdog(unsigned long data) } } - if (!list_empty(&watchdog_list)) { - /* - * Cycle through CPUs to check if the CPUs stay - * synchronized to each other. - */ - int next_cpu = cpumask_next(raw_smp_processor_id(), - cpu_online_mask); - - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); - } + /* + * Cycle through CPUs to check if the CPUs stay synchronized + * to each other. + */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); out: spin_unlock(&watchdog_lock); } @@ -228,6 +233,7 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; + INIT_WORK(&watchdog_work, clocksource_watchdog_work); init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; watchdog_last = watchdog->read(watchdog); @@ -313,6 +319,22 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } +static void clocksource_watchdog_work(struct work_struct *work) +{ + struct clocksource *cs, *tmp; + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) + if (cs->flags & CLOCK_SOURCE_UNSTABLE) { + list_del_init(&cs->wd_list); + clocksource_change_rating(cs, 0); + } + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); + spin_unlock(&watchdog_lock); +} + #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ static void clocksource_enqueue_watchdog(struct clocksource *cs) -- cgit v1.2.2 From 155ec60226ae0ae2aadaa57c951a58a359331030 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:26 +0200 Subject: timekeeping: Introduce struct timekeeper Add struct timekeeper to keep the internal values timekeeping.c needs in regard to the currently selected clock source. This moves the timekeeping intervals, xtime_nsec and the ntp error value from struct clocksource to struct timekeeper. The raw_time is removed from the clocksource as well. It gets treated like xtime as a global variable. Eventually xtime raw_time should be moved to struct timekeeper. [ tglx: minor cleanup ] Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.613209842@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f1508019bfb4..f18c9a6bdcf4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -177,7 +177,8 @@ static void clocksource_watchdog(unsigned long data) goto out; wdnow = watchdog->read(watchdog); - wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); + wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, + watchdog->mult, watchdog->shift); watchdog_last = wdnow; list_for_each_entry(cs, &watchdog_list, wd_list) { @@ -196,7 +197,8 @@ static void clocksource_watchdog(unsigned long data) } /* Check the deviation from the watchdog clocksource. */ - cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); + cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + cs->mask, cs->mult, cs->shift); cs->wd_last = csnow; if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); -- cgit v1.2.2 From 75c5158f70c065b9704b924503d96e8297838f79 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:30 +0200 Subject: timekeeping: Update clocksource with stop_machine update_wall_time calls change_clocksource HZ times per second to check if a new clock source is available. In close to 100% of all calls there is no new clock. Replace the tick based check by an update done with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.711836357@de.ibm.com> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 112 ++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 68 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f18c9a6bdcf4..a1657b5fdeb9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -109,35 +109,17 @@ EXPORT_SYMBOL(timecounter_cyc2time); /*[Clocksource internal variables]--------- * curr_clocksource: * currently selected clocksource. - * next_clocksource: - * pending next selected clocksource. * clocksource_list: * linked list with the registered clocksources - * clocksource_lock: - * protects manipulations to curr_clocksource and next_clocksource - * and the clocksource_list + * clocksource_mutex: + * protects manipulations to curr_clocksource and the clocksource_list * override_name: * Name of the user-specified clocksource. */ static struct clocksource *curr_clocksource; -static struct clocksource *next_clocksource; static LIST_HEAD(clocksource_list); -static DEFINE_SPINLOCK(clocksource_lock); +static DEFINE_MUTEX(clocksource_mutex); static char override_name[32]; -static int finished_booting; - -/* clocksource_done_booting - Called near the end of core bootup - * - * Hack to avoid lots of clocksource churn at boot time. - * We use fs_initcall because we want this to start before - * device_initcall but after subsys_initcall. - */ -static int __init clocksource_done_booting(void) -{ - finished_booting = 1; - return 0; -} -fs_initcall(clocksource_done_booting); #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static LIST_HEAD(watchdog_list); @@ -356,18 +338,16 @@ static inline void clocksource_resume_watchdog(void) { } void clocksource_resume(void) { struct clocksource *cs; - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); - list_for_each_entry(cs, &clocksource_list, list) { + list_for_each_entry(cs, &clocksource_list, list) if (cs->resume) cs->resume(); - } clocksource_resume_watchdog(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } /** @@ -383,28 +363,13 @@ void clocksource_touch_watchdog(void) } #ifdef CONFIG_GENERIC_TIME -/** - * clocksource_get_next - Returns the selected clocksource - * - */ -struct clocksource *clocksource_get_next(void) -{ - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); - if (next_clocksource && finished_booting) { - curr_clocksource = next_clocksource; - next_clocksource = NULL; - } - spin_unlock_irqrestore(&clocksource_lock, flags); - - return curr_clocksource; -} +static int finished_booting; /** * clocksource_select - Select the best clocksource available * - * Private function. Must hold clocksource_lock when called. + * Private function. Must hold clocksource_mutex when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. @@ -413,7 +378,7 @@ static void clocksource_select(void) { struct clocksource *best, *cs; - if (list_empty(&clocksource_list)) + if (!finished_booting || list_empty(&clocksource_list)) return; /* First clocksource on the list has the best rating. */ best = list_first_entry(&clocksource_list, struct clocksource, list); @@ -438,13 +403,31 @@ static void clocksource_select(void) best = cs; break; } - if (curr_clocksource != best) - next_clocksource = best; + if (curr_clocksource != best) { + printk(KERN_INFO "Switching to clocksource %s\n", best->name); + curr_clocksource = best; + timekeeping_notify(curr_clocksource); + } } +/* + * clocksource_done_booting - Called near the end of core bootup + * + * Hack to avoid lots of clocksource churn at boot time. + * We use fs_initcall because we want this to start before + * device_initcall but after subsys_initcall. + */ +static int __init clocksource_done_booting(void) +{ + finished_booting = 1; + clocksource_select(); + return 0; +} +fs_initcall(clocksource_done_booting); + #else /* CONFIG_GENERIC_TIME */ -static void clocksource_select(void) { } +static inline void clocksource_select(void) { } #endif @@ -471,13 +454,11 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); clocksource_enqueue_watchdog(cs); + mutex_unlock(&clocksource_mutex); return 0; } EXPORT_SYMBOL(clocksource_register); @@ -487,14 +468,12 @@ EXPORT_SYMBOL(clocksource_register); */ void clocksource_change_rating(struct clocksource *cs, int rating) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_change_rating); @@ -503,13 +482,11 @@ EXPORT_SYMBOL(clocksource_change_rating); */ void clocksource_unregister(struct clocksource *cs) { - unsigned long flags; - + mutex_lock(&clocksource_mutex); clocksource_dequeue_watchdog(cs); - spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_unregister); @@ -527,9 +504,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, { ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return count; } @@ -557,14 +534,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (buf[count-1] == '\n') count--; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; clocksource_select(); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return ret; } @@ -584,7 +561,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, struct clocksource *src; ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); list_for_each_entry(src, &clocksource_list, list) { /* * Don't show non-HRES clocksource if the tick code is @@ -596,7 +573,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "%s ", src->name); } - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); count += snprintf(buf + count, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); @@ -651,11 +628,10 @@ device_initcall(init_clocksource_sysfs); */ static int __init boot_override_clocksource(char* str) { - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); if (str) strlcpy(override_name, str, sizeof(override_name)); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); return 1; } -- cgit v1.2.2 From 6ea41d252f35465a2308a4038a323b6b07de06f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 15 Aug 2009 13:20:42 +0200 Subject: clocksource: Call clocksource_change_rating() outside of watchdog_lock The changes to the watchdog logic introduced a lock inversion between watchdog_lock and clocksource_mutex. Change the rating outside of watchdog_lock to avoid it. Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index a1657b5fdeb9..02dc22d888fe 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -307,16 +307,23 @@ static void clocksource_watchdog_work(struct work_struct *work) { struct clocksource *cs, *tmp; unsigned long flags; + LIST_HEAD(unstable); spin_lock_irqsave(&watchdog_lock, flags); list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) if (cs->flags & CLOCK_SOURCE_UNSTABLE) { list_del_init(&cs->wd_list); - clocksource_change_rating(cs, 0); + list_add(&cs->wd_list, &unstable); } /* Check if the watchdog timer needs to be stopped. */ clocksource_stop_watchdog(); - spin_unlock(&watchdog_lock); + spin_unlock_irqrestore(&watchdog_lock, flags); + + /* Needs to be done outside of watchdog lock */ + list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { + list_del_init(&cs->wd_list); + clocksource_change_rating(cs, 0); + } } #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ -- cgit v1.2.2 From d0981a1b21a03866c8da7f44e35e389c2e0d6061 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2009 11:26:09 +0200 Subject: clocksource: Protect the watchdog rating changes with clocksource_mutex Martin pointed out that commit 6ea41d2529 (clocksource: Call clocksource_change_rating() outside of watchdog_lock) has a theoretical reference count problem. The calls to clocksource_change_rating() are now done outside of the clocksource mutex and outside of the watchdog lock. A concurrent clocksource_unregister() could remove the clock. Split out the code which changes the rating from clocksource_change_rating() into __clocksource_change_rating(). Protect the clocksource_watchdog_work() code sequence with the clocksource_mutex() and call __clocksource_change_rating(). LKML-Reference: Signed-off-by: Thomas Gleixner Cc: Martin Schwidefsky --- kernel/time/clocksource.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 02dc22d888fe..c6bff11f7957 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -131,6 +131,7 @@ static cycle_t watchdog_last; static int watchdog_running; static void clocksource_watchdog_work(struct work_struct *work); +static void __clocksource_change_rating(struct clocksource *cs, int rating); /* * Interval: 0.5sec Threshold: 0.0625s @@ -309,6 +310,7 @@ static void clocksource_watchdog_work(struct work_struct *work) unsigned long flags; LIST_HEAD(unstable); + mutex_lock(&clocksource_mutex); spin_lock_irqsave(&watchdog_lock, flags); list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) if (cs->flags & CLOCK_SOURCE_UNSTABLE) { @@ -322,8 +324,9 @@ static void clocksource_watchdog_work(struct work_struct *work) /* Needs to be done outside of watchdog lock */ list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { list_del_init(&cs->wd_list); - clocksource_change_rating(cs, 0); + __clocksource_change_rating(cs, 0); } + mutex_unlock(&clocksource_mutex); } #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ @@ -470,16 +473,21 @@ int clocksource_register(struct clocksource *cs) } EXPORT_SYMBOL(clocksource_register); +static void __clocksource_change_rating(struct clocksource *cs, int rating) +{ + list_del(&cs->list); + cs->rating = rating; + clocksource_enqueue(cs); + clocksource_select(); +} + /** * clocksource_change_rating - Change the rating of a registered clocksource */ void clocksource_change_rating(struct clocksource *cs, int rating) { mutex_lock(&clocksource_mutex); - list_del(&cs->list); - cs->rating = rating; - clocksource_enqueue(cs); - clocksource_select(); + __clocksource_change_rating(cs, rating); mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_change_rating); -- cgit v1.2.2 From 01548f4d3e8e94caf323a4f664eb347fd34a34ab Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 18 Aug 2009 17:09:42 +0200 Subject: clocksource: Avoid clocksource watchdog circular locking dependency stop_machine from a multithreaded workqueue is not allowed because of a circular locking dependency between cpu_down and the workqueue execution. Use a kernel thread to do the clocksource downgrade. Signed-off-by: Martin Schwidefsky Cc: Peter Zijlstra Cc: john stultz LKML-Reference: <20090818170942.3ab80c91@skybase> Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c6bff11f7957..e0c86ad6e9fb 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -29,6 +29,7 @@ #include #include /* for spin_unlock_irq() using preempt_count() m68k */ #include +#include void timecounter_init(struct timecounter *tc, const struct cyclecounter *cc, @@ -130,7 +131,7 @@ static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; static int watchdog_running; -static void clocksource_watchdog_work(struct work_struct *work); +static int clocksource_watchdog_kthread(void *data); static void __clocksource_change_rating(struct clocksource *cs, int rating); /* @@ -139,6 +140,15 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating); #define WATCHDOG_INTERVAL (HZ >> 1) #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) +static void clocksource_watchdog_work(struct work_struct *work) +{ + /* + * If kthread_run fails the next watchdog scan over the + * watchdog_list will find the unstable clock again. + */ + kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); +} + static void clocksource_unstable(struct clocksource *cs, int64_t delta) { printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", @@ -167,8 +177,10 @@ static void clocksource_watchdog(unsigned long data) list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ - if (cs->flags & CLOCK_SOURCE_UNSTABLE) + if (cs->flags & CLOCK_SOURCE_UNSTABLE) { + schedule_work(&watchdog_work); continue; + } csnow = cs->read(cs); @@ -304,7 +316,7 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } -static void clocksource_watchdog_work(struct work_struct *work) +static int clocksource_watchdog_kthread(void *data) { struct clocksource *cs, *tmp; unsigned long flags; @@ -327,6 +339,7 @@ static void clocksource_watchdog_work(struct work_struct *work) __clocksource_change_rating(cs, 0); } mutex_unlock(&clocksource_mutex); + return 0; } #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ -- cgit v1.2.2 From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Aug 2009 20:25:24 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable Martin Schwidefsky analyzed it: To register a clocksource the clocksource_mutex is acquired and if necessary timekeeping_notify is called to install the clocksource as the timekeeper clock. timekeeping_notify uses stop_machine which needs to take cpu_add_remove_lock mutex. Starting a new cpu is done with the cpu_add_remove_lock mutex held. native_cpu_up checks the tsc of the new cpu and if the tsc is no good clocksource_change_rating is called. Which needs the clocksource_mutex and the deadlock is complete. The solution is to replace the TSC via the clocksource watchdog mechanism. Mark the TSC as unstable and schedule the watchdog work so it gets removed in the watchdog thread context. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- kernel/time/clocksource.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0c86ad6e9fb..a0af4ffcb6e5 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -149,15 +149,42 @@ static void clocksource_watchdog_work(struct work_struct *work) kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) +static void __clocksource_unstable(struct clocksource *cs) { - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; schedule_work(&watchdog_work); } +static void clocksource_unstable(struct clocksource *cs, int64_t delta) +{ + printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", + cs->name, delta); + __clocksource_unstable(cs); +} + +/** + * clocksource_mark_unstable - mark clocksource unstable via watchdog + * @cs: clocksource to be marked unstable + * + * This function is called instead of clocksource_change_rating from + * cpu hotplug code to avoid a deadlock between the clocksource mutex + * and the cpu hotplug mutex. It defers the update of the clocksource + * to the watchdog thread. + */ +void clocksource_mark_unstable(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { + if (list_empty(&cs->wd_list)) + list_add(&cs->wd_list, &watchdog_list); + __clocksource_unstable(cs); + } + spin_unlock_irqrestore(&watchdog_lock, flags); +} + static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; -- cgit v1.2.2 From f79e0258ea1f04d63db499479b5fb855dff6dbc5 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 11 Sep 2009 15:33:05 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable, fix crash The watchdog timer is started after the watchdog clocksource and at least one watched clocksource have been registered. The clocksource work element watchdog_work is initialized just before the clocksource timer is started. This is too late for the clocksource_mark_unstable call from native_cpu_up. To fix this use a static initializer for watchdog_work. This resolves a boot crash reported by multiple people. Signed-off-by: Martin Schwidefsky Cc: Jens Axboe Cc: John Stultz LKML-Reference: <20090911153305.3fe9a361@skybase> Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index a0af4ffcb6e5..5697155f1868 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -123,10 +123,12 @@ static DEFINE_MUTEX(clocksource_mutex); static char override_name[32]; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG +static void clocksource_watchdog_work(struct work_struct *work); + static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; static struct timer_list watchdog_timer; -static struct work_struct watchdog_work; +static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; static int watchdog_running; @@ -257,7 +259,6 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; - INIT_WORK(&watchdog_work, clocksource_watchdog_work); init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; watchdog_last = watchdog->read(watchdog); -- cgit v1.2.2 From e6c733050faa93ce616bfedccd279ab12cffdd7b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2009 19:51:11 +0200 Subject: clocksource: clocksource_select must be called with mutex locked The callers of clocksource_select must hold clocksource_mutex to protect the clocksource_list. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky --- kernel/time/clocksource.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5697155f1868..2c2e5ba1453d 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -471,7 +471,9 @@ static void clocksource_select(void) static int __init clocksource_done_booting(void) { finished_booting = 1; + mutex_lock(&clocksource_mutex); clocksource_select(); + mutex_unlock(&clocksource_mutex); return 0; } fs_initcall(clocksource_done_booting); -- cgit v1.2.2 From 54a6bc0b071c50150bc6d1da16c2cd9a963e288c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2009 19:49:02 +0200 Subject: clocksource: Delay clocksource down rating to late boot The down rating of clock sources in the early boot process via the clock source watchdog mechanism can happen way before the per cpu event queues are initialized. This leads to a boot crash on x86 when the TSC is marked unstable in the SMP bring up. The selection of a clock source for time keeping happens in the late boot process so we can safely delay the list manipulation until clocksource_done_booting() is called. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky --- kernel/time/clocksource.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'kernel/time/clocksource.c') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 2c2e5ba1453d..09113347d328 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -121,6 +121,7 @@ static struct clocksource *curr_clocksource; static LIST_HEAD(clocksource_list); static DEFINE_MUTEX(clocksource_mutex); static char override_name[32]; +static int finished_booting; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); @@ -155,7 +156,8 @@ static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; - schedule_work(&watchdog_work); + if (finished_booting) + schedule_work(&watchdog_work); } static void clocksource_unstable(struct clocksource *cs, int64_t delta) @@ -207,7 +209,8 @@ static void clocksource_watchdog(unsigned long data) /* Clocksource already marked unstable? */ if (cs->flags & CLOCK_SOURCE_UNSTABLE) { - schedule_work(&watchdog_work); + if (finished_booting) + schedule_work(&watchdog_work); continue; } @@ -380,6 +383,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } static inline void clocksource_resume_watchdog(void) { } +static inline int clocksource_watchdog_kthread(void *data) { return 0; } #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ @@ -415,8 +419,6 @@ void clocksource_touch_watchdog(void) #ifdef CONFIG_GENERIC_TIME -static int finished_booting; - /** * clocksource_select - Select the best clocksource available * @@ -461,6 +463,12 @@ static void clocksource_select(void) } } +#else /* CONFIG_GENERIC_TIME */ + +static inline void clocksource_select(void) { } + +#endif + /* * clocksource_done_booting - Called near the end of core bootup * @@ -471,6 +479,12 @@ static void clocksource_select(void) static int __init clocksource_done_booting(void) { finished_booting = 1; + + /* + * Run the watchdog first to eliminate unstable clock sources + */ + clocksource_watchdog_kthread(NULL); + mutex_lock(&clocksource_mutex); clocksource_select(); mutex_unlock(&clocksource_mutex); @@ -478,12 +492,6 @@ static int __init clocksource_done_booting(void) } fs_initcall(clocksource_done_booting); -#else /* CONFIG_GENERIC_TIME */ - -static inline void clocksource_select(void) { } - -#endif - /* * Enqueue the clocksource sorted by rating */ -- cgit v1.2.2