diff options
| author | Ingo Molnar <mingo@elte.hu> | 2009-10-01 05:20:33 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-10-01 05:20:48 -0400 |
| commit | 0aa73ba1c4e1ad1d51a29e0df95ccd9f746918b6 (patch) | |
| tree | f0714ddcd02812b4fbe3b5405df9e4068f5587e2 /kernel/time/clocksource.c | |
| parent | 925936ebf35a95c290e010b784c962164e6728f3 (diff) | |
| parent | 33974093c024f08caadd2fc71a83bd811ed1831d (diff) | |
Merge branch 'tracing/urgent' into tracing/core
Merge reason: Pick up latest fixes and update to latest upstream.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/time/clocksource.c')
| -rw-r--r-- | kernel/time/clocksource.c | 529 |
1 files changed, 302 insertions, 227 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb811251..5e18c6ab2c6a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | * | 21 | * |
| 22 | * TODO WishList: | 22 | * TODO WishList: |
| 23 | * o Allow clocksource drivers to be unregistered | 23 | * o Allow clocksource drivers to be unregistered |
| 24 | * o get rid of clocksource_jiffies extern | ||
| 25 | */ | 24 | */ |
| 26 | 25 | ||
| 27 | #include <linux/clocksource.h> | 26 | #include <linux/clocksource.h> |
| @@ -30,6 +29,7 @@ | |||
| 30 | #include <linux/module.h> | 29 | #include <linux/module.h> |
| 31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 30 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
| 32 | #include <linux/tick.h> | 31 | #include <linux/tick.h> |
| 32 | #include <linux/kthread.h> | ||
| 33 | 33 | ||
| 34 | void timecounter_init(struct timecounter *tc, | 34 | void timecounter_init(struct timecounter *tc, |
| 35 | const struct cyclecounter *cc, | 35 | const struct cyclecounter *cc, |
| @@ -107,50 +107,35 @@ u64 timecounter_cyc2time(struct timecounter *tc, | |||
| 107 | } | 107 | } |
| 108 | EXPORT_SYMBOL(timecounter_cyc2time); | 108 | EXPORT_SYMBOL(timecounter_cyc2time); |
| 109 | 109 | ||
| 110 | /* XXX - Would like a better way for initializing curr_clocksource */ | ||
| 111 | extern struct clocksource clocksource_jiffies; | ||
| 112 | |||
| 113 | /*[Clocksource internal variables]--------- | 110 | /*[Clocksource internal variables]--------- |
| 114 | * curr_clocksource: | 111 | * curr_clocksource: |
| 115 | * currently selected clocksource. Initialized to clocksource_jiffies. | 112 | * currently selected clocksource. |
| 116 | * next_clocksource: | ||
| 117 | * pending next selected clocksource. | ||
| 118 | * clocksource_list: | 113 | * clocksource_list: |
| 119 | * linked list with the registered clocksources | 114 | * linked list with the registered clocksources |
| 120 | * clocksource_lock: | 115 | * clocksource_mutex: |
| 121 | * protects manipulations to curr_clocksource and next_clocksource | 116 | * protects manipulations to curr_clocksource and the clocksource_list |
| 122 | * and the clocksource_list | ||
| 123 | * override_name: | 117 | * override_name: |
| 124 | * Name of the user-specified clocksource. | 118 | * Name of the user-specified clocksource. |
| 125 | */ | 119 | */ |
| 126 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | 120 | static struct clocksource *curr_clocksource; |
| 127 | static struct clocksource *next_clocksource; | ||
| 128 | static struct clocksource *clocksource_override; | ||
| 129 | static LIST_HEAD(clocksource_list); | 121 | static LIST_HEAD(clocksource_list); |
| 130 | static DEFINE_SPINLOCK(clocksource_lock); | 122 | static DEFINE_MUTEX(clocksource_mutex); |
| 131 | static char override_name[32]; | 123 | static char override_name[32]; |
| 132 | static int finished_booting; | 124 | static int finished_booting; |
| 133 | 125 | ||
| 134 | /* clocksource_done_booting - Called near the end of core bootup | ||
| 135 | * | ||
| 136 | * Hack to avoid lots of clocksource churn at boot time. | ||
| 137 | * We use fs_initcall because we want this to start before | ||
| 138 | * device_initcall but after subsys_initcall. | ||
| 139 | */ | ||
| 140 | static int __init clocksource_done_booting(void) | ||
| 141 | { | ||
| 142 | finished_booting = 1; | ||
| 143 | return 0; | ||
| 144 | } | ||
| 145 | fs_initcall(clocksource_done_booting); | ||
| 146 | |||
| 147 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | 126 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG |
| 127 | static void clocksource_watchdog_work(struct work_struct *work); | ||
| 128 | |||
| 148 | static LIST_HEAD(watchdog_list); | 129 | static LIST_HEAD(watchdog_list); |
| 149 | static struct clocksource *watchdog; | 130 | static struct clocksource *watchdog; |
| 150 | static struct timer_list watchdog_timer; | 131 | static struct timer_list watchdog_timer; |
| 132 | static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); | ||
| 151 | static DEFINE_SPINLOCK(watchdog_lock); | 133 | static DEFINE_SPINLOCK(watchdog_lock); |
| 152 | static cycle_t watchdog_last; | 134 | static cycle_t watchdog_last; |
| 153 | static unsigned long watchdog_resumed; | 135 | static int watchdog_running; |
| 136 | |||
| 137 | static int clocksource_watchdog_kthread(void *data); | ||
| 138 | static void __clocksource_change_rating(struct clocksource *cs, int rating); | ||
| 154 | 139 | ||
| 155 | /* | 140 | /* |
| 156 | * Interval: 0.5sec Threshold: 0.0625s | 141 | * Interval: 0.5sec Threshold: 0.0625s |
| @@ -158,135 +143,249 @@ static unsigned long watchdog_resumed; | |||
| 158 | #define WATCHDOG_INTERVAL (HZ >> 1) | 143 | #define WATCHDOG_INTERVAL (HZ >> 1) |
| 159 | #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) | 144 | #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) |
| 160 | 145 | ||
| 161 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | 146 | static void clocksource_watchdog_work(struct work_struct *work) |
| 162 | { | 147 | { |
| 163 | if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) | 148 | /* |
| 164 | return; | 149 | * If kthread_run fails the next watchdog scan over the |
| 150 | * watchdog_list will find the unstable clock again. | ||
| 151 | */ | ||
| 152 | kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); | ||
| 153 | } | ||
| 154 | |||
| 155 | static void __clocksource_unstable(struct clocksource *cs) | ||
| 156 | { | ||
| 157 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | ||
| 158 | cs->flags |= CLOCK_SOURCE_UNSTABLE; | ||
| 159 | if (finished_booting) | ||
| 160 | schedule_work(&watchdog_work); | ||
| 161 | } | ||
| 165 | 162 | ||
| 163 | static void clocksource_unstable(struct clocksource *cs, int64_t delta) | ||
| 164 | { | ||
| 166 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | 165 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", |
| 167 | cs->name, delta); | 166 | cs->name, delta); |
| 168 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | 167 | __clocksource_unstable(cs); |
| 169 | clocksource_change_rating(cs, 0); | 168 | } |
| 170 | list_del(&cs->wd_list); | 169 | |
| 170 | /** | ||
| 171 | * clocksource_mark_unstable - mark clocksource unstable via watchdog | ||
| 172 | * @cs: clocksource to be marked unstable | ||
| 173 | * | ||
| 174 | * This function is called instead of clocksource_change_rating from | ||
| 175 | * cpu hotplug code to avoid a deadlock between the clocksource mutex | ||
| 176 | * and the cpu hotplug mutex. It defers the update of the clocksource | ||
| 177 | * to the watchdog thread. | ||
| 178 | */ | ||
| 179 | void clocksource_mark_unstable(struct clocksource *cs) | ||
| 180 | { | ||
| 181 | unsigned long flags; | ||
| 182 | |||
| 183 | spin_lock_irqsave(&watchdog_lock, flags); | ||
| 184 | if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { | ||
| 185 | if (list_empty(&cs->wd_list)) | ||
| 186 | list_add(&cs->wd_list, &watchdog_list); | ||
| 187 | __clocksource_unstable(cs); | ||
| 188 | } | ||
| 189 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
| 171 | } | 190 | } |
| 172 | 191 | ||
| 173 | static void clocksource_watchdog(unsigned long data) | 192 | static void clocksource_watchdog(unsigned long data) |
| 174 | { | 193 | { |
| 175 | struct clocksource *cs, *tmp; | 194 | struct clocksource *cs; |
| 176 | cycle_t csnow, wdnow; | 195 | cycle_t csnow, wdnow; |
| 177 | int64_t wd_nsec, cs_nsec; | 196 | int64_t wd_nsec, cs_nsec; |
| 178 | int resumed; | 197 | int next_cpu; |
| 179 | 198 | ||
| 180 | spin_lock(&watchdog_lock); | 199 | spin_lock(&watchdog_lock); |
| 181 | 200 | if (!watchdog_running) | |
| 182 | resumed = test_and_clear_bit(0, &watchdog_resumed); | 201 | goto out; |
| 183 | 202 | ||
| 184 | wdnow = watchdog->read(watchdog); | 203 | wdnow = watchdog->read(watchdog); |
| 185 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | 204 | wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, |
| 205 | watchdog->mult, watchdog->shift); | ||
| 186 | watchdog_last = wdnow; | 206 | watchdog_last = wdnow; |
| 187 | 207 | ||
| 188 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | 208 | list_for_each_entry(cs, &watchdog_list, wd_list) { |
| 189 | csnow = cs->read(cs); | ||
| 190 | 209 | ||
| 191 | if (unlikely(resumed)) { | 210 | /* Clocksource already marked unstable? */ |
| 192 | cs->wd_last = csnow; | 211 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { |
| 212 | if (finished_booting) | ||
| 213 | schedule_work(&watchdog_work); | ||
| 193 | continue; | 214 | continue; |
| 194 | } | 215 | } |
| 195 | 216 | ||
| 196 | /* Initialized ? */ | 217 | csnow = cs->read(cs); |
| 218 | |||
| 219 | /* Clocksource initialized ? */ | ||
| 197 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | 220 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { |
| 198 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | ||
| 199 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | ||
| 200 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
| 201 | /* | ||
| 202 | * We just marked the clocksource as | ||
| 203 | * highres-capable, notify the rest of the | ||
| 204 | * system as well so that we transition | ||
| 205 | * into high-res mode: | ||
| 206 | */ | ||
| 207 | tick_clock_notify(); | ||
| 208 | } | ||
| 209 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | 221 | cs->flags |= CLOCK_SOURCE_WATCHDOG; |
| 210 | cs->wd_last = csnow; | 222 | cs->wd_last = csnow; |
| 211 | } else { | 223 | continue; |
| 212 | cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); | ||
| 213 | cs->wd_last = csnow; | ||
| 214 | /* Check the delta. Might remove from the list ! */ | ||
| 215 | clocksource_ratewd(cs, cs_nsec - wd_nsec); | ||
| 216 | } | 224 | } |
| 217 | } | ||
| 218 | 225 | ||
| 219 | if (!list_empty(&watchdog_list)) { | 226 | /* Check the deviation from the watchdog clocksource. */ |
| 220 | /* | 227 | cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & |
| 221 | * Cycle through CPUs to check if the CPUs stay | 228 | cs->mask, cs->mult, cs->shift); |
| 222 | * synchronized to each other. | 229 | cs->wd_last = csnow; |
| 223 | */ | 230 | if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { |
| 224 | int next_cpu = cpumask_next(raw_smp_processor_id(), | 231 | clocksource_unstable(cs, cs_nsec - wd_nsec); |
| 225 | cpu_online_mask); | 232 | continue; |
| 233 | } | ||
| 226 | 234 | ||
| 227 | if (next_cpu >= nr_cpu_ids) | 235 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && |
| 228 | next_cpu = cpumask_first(cpu_online_mask); | 236 | (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && |
| 229 | watchdog_timer.expires += WATCHDOG_INTERVAL; | 237 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { |
| 230 | add_timer_on(&watchdog_timer, next_cpu); | 238 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
| 239 | /* | ||
| 240 | * We just marked the clocksource as highres-capable, | ||
| 241 | * notify the rest of the system as well so that we | ||
| 242 | * transition into high-res mode: | ||
| 243 | */ | ||
| 244 | tick_clock_notify(); | ||
| 245 | } | ||
| 231 | } | 246 | } |
| 247 | |||
| 248 | /* | ||
| 249 | * Cycle through CPUs to check if the CPUs stay synchronized | ||
| 250 | * to each other. | ||
| 251 | */ | ||
| 252 | next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); | ||
| 253 | if (next_cpu >= nr_cpu_ids) | ||
| 254 | next_cpu = cpumask_first(cpu_online_mask); | ||
| 255 | watchdog_timer.expires += WATCHDOG_INTERVAL; | ||
| 256 | add_timer_on(&watchdog_timer, next_cpu); | ||
| 257 | out: | ||
| 232 | spin_unlock(&watchdog_lock); | 258 | spin_unlock(&watchdog_lock); |
| 233 | } | 259 | } |
| 260 | |||
| 261 | static inline void clocksource_start_watchdog(void) | ||
| 262 | { | ||
| 263 | if (watchdog_running || !watchdog || list_empty(&watchdog_list)) | ||
| 264 | return; | ||
| 265 | init_timer(&watchdog_timer); | ||
| 266 | watchdog_timer.function = clocksource_watchdog; | ||
| 267 | watchdog_last = watchdog->read(watchdog); | ||
| 268 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
| 269 | add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); | ||
| 270 | watchdog_running = 1; | ||
| 271 | } | ||
| 272 | |||
| 273 | static inline void clocksource_stop_watchdog(void) | ||
| 274 | { | ||
| 275 | if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) | ||
| 276 | return; | ||
| 277 | del_timer(&watchdog_timer); | ||
| 278 | watchdog_running = 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | static inline void clocksource_reset_watchdog(void) | ||
| 282 | { | ||
| 283 | struct clocksource *cs; | ||
| 284 | |||
| 285 | list_for_each_entry(cs, &watchdog_list, wd_list) | ||
| 286 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
| 287 | } | ||
| 288 | |||
| 234 | static void clocksource_resume_watchdog(void) | 289 | static void clocksource_resume_watchdog(void) |
| 235 | { | 290 | { |
| 236 | set_bit(0, &watchdog_resumed); | 291 | unsigned long flags; |
| 292 | |||
| 293 | spin_lock_irqsave(&watchdog_lock, flags); | ||
| 294 | clocksource_reset_watchdog(); | ||
| 295 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
| 237 | } | 296 | } |
| 238 | 297 | ||
| 239 | static void clocksource_check_watchdog(struct clocksource *cs) | 298 | static void clocksource_enqueue_watchdog(struct clocksource *cs) |
| 240 | { | 299 | { |
| 241 | struct clocksource *cse; | ||
| 242 | unsigned long flags; | 300 | unsigned long flags; |
| 243 | 301 | ||
| 244 | spin_lock_irqsave(&watchdog_lock, flags); | 302 | spin_lock_irqsave(&watchdog_lock, flags); |
| 245 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | 303 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { |
| 246 | int started = !list_empty(&watchdog_list); | 304 | /* cs is a clocksource to be watched. */ |
| 247 | |||
| 248 | list_add(&cs->wd_list, &watchdog_list); | 305 | list_add(&cs->wd_list, &watchdog_list); |
| 249 | if (!started && watchdog) { | 306 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; |
| 250 | watchdog_last = watchdog->read(watchdog); | ||
| 251 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
| 252 | add_timer_on(&watchdog_timer, | ||
| 253 | cpumask_first(cpu_online_mask)); | ||
| 254 | } | ||
| 255 | } else { | 307 | } else { |
| 308 | /* cs is a watchdog. */ | ||
| 256 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 309 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
| 257 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 310 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
| 258 | 311 | /* Pick the best watchdog. */ | |
| 259 | if (!watchdog || cs->rating > watchdog->rating) { | 312 | if (!watchdog || cs->rating > watchdog->rating) { |
| 260 | if (watchdog) | ||
| 261 | del_timer(&watchdog_timer); | ||
| 262 | watchdog = cs; | 313 | watchdog = cs; |
| 263 | init_timer(&watchdog_timer); | ||
| 264 | watchdog_timer.function = clocksource_watchdog; | ||
| 265 | |||
| 266 | /* Reset watchdog cycles */ | 314 | /* Reset watchdog cycles */ |
| 267 | list_for_each_entry(cse, &watchdog_list, wd_list) | 315 | clocksource_reset_watchdog(); |
| 268 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; | 316 | } |
| 269 | /* Start if list is not empty */ | 317 | } |
| 270 | if (!list_empty(&watchdog_list)) { | 318 | /* Check if the watchdog timer needs to be started. */ |
| 271 | watchdog_last = watchdog->read(watchdog); | 319 | clocksource_start_watchdog(); |
| 272 | watchdog_timer.expires = | 320 | spin_unlock_irqrestore(&watchdog_lock, flags); |
| 273 | jiffies + WATCHDOG_INTERVAL; | 321 | } |
| 274 | add_timer_on(&watchdog_timer, | 322 | |
| 275 | cpumask_first(cpu_online_mask)); | 323 | static void clocksource_dequeue_watchdog(struct clocksource *cs) |
| 276 | } | 324 | { |
| 325 | struct clocksource *tmp; | ||
| 326 | unsigned long flags; | ||
| 327 | |||
| 328 | spin_lock_irqsave(&watchdog_lock, flags); | ||
| 329 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | ||
| 330 | /* cs is a watched clocksource. */ | ||
| 331 | list_del_init(&cs->wd_list); | ||
| 332 | } else if (cs == watchdog) { | ||
| 333 | /* Reset watchdog cycles */ | ||
| 334 | clocksource_reset_watchdog(); | ||
| 335 | /* Current watchdog is removed. Find an alternative. */ | ||
| 336 | watchdog = NULL; | ||
| 337 | list_for_each_entry(tmp, &clocksource_list, list) { | ||
| 338 | if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) | ||
| 339 | continue; | ||
| 340 | if (!watchdog || tmp->rating > watchdog->rating) | ||
| 341 | watchdog = tmp; | ||
| 277 | } | 342 | } |
| 278 | } | 343 | } |
| 344 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
| 345 | /* Check if the watchdog timer needs to be stopped. */ | ||
| 346 | clocksource_stop_watchdog(); | ||
| 279 | spin_unlock_irqrestore(&watchdog_lock, flags); | 347 | spin_unlock_irqrestore(&watchdog_lock, flags); |
| 280 | } | 348 | } |
| 281 | #else | 349 | |
| 282 | static void clocksource_check_watchdog(struct clocksource *cs) | 350 | static int clocksource_watchdog_kthread(void *data) |
| 351 | { | ||
| 352 | struct clocksource *cs, *tmp; | ||
| 353 | unsigned long flags; | ||
| 354 | LIST_HEAD(unstable); | ||
| 355 | |||
| 356 | mutex_lock(&clocksource_mutex); | ||
| 357 | spin_lock_irqsave(&watchdog_lock, flags); | ||
| 358 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) | ||
| 359 | if (cs->flags & CLOCK_SOURCE_UNSTABLE) { | ||
| 360 | list_del_init(&cs->wd_list); | ||
| 361 | list_add(&cs->wd_list, &unstable); | ||
| 362 | } | ||
| 363 | /* Check if the watchdog timer needs to be stopped. */ | ||
| 364 | clocksource_stop_watchdog(); | ||
| 365 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
| 366 | |||
| 367 | /* Needs to be done outside of watchdog lock */ | ||
| 368 | list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { | ||
| 369 | list_del_init(&cs->wd_list); | ||
| 370 | __clocksource_change_rating(cs, 0); | ||
| 371 | } | ||
| 372 | mutex_unlock(&clocksource_mutex); | ||
| 373 | return 0; | ||
| 374 | } | ||
| 375 | |||
| 376 | #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ | ||
| 377 | |||
| 378 | static void clocksource_enqueue_watchdog(struct clocksource *cs) | ||
| 283 | { | 379 | { |
| 284 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 380 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
| 285 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 381 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
| 286 | } | 382 | } |
| 287 | 383 | ||
| 384 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } | ||
| 288 | static inline void clocksource_resume_watchdog(void) { } | 385 | static inline void clocksource_resume_watchdog(void) { } |
| 289 | #endif | 386 | static inline int clocksource_watchdog_kthread(void *data) { return 0; } |
| 387 | |||
| 388 | #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ | ||
| 290 | 389 | ||
| 291 | /** | 390 | /** |
| 292 | * clocksource_resume - resume the clocksource(s) | 391 | * clocksource_resume - resume the clocksource(s) |
| @@ -294,18 +393,12 @@ static inline void clocksource_resume_watchdog(void) { } | |||
| 294 | void clocksource_resume(void) | 393 | void clocksource_resume(void) |
| 295 | { | 394 | { |
| 296 | struct clocksource *cs; | 395 | struct clocksource *cs; |
| 297 | unsigned long flags; | ||
| 298 | 396 | ||
| 299 | spin_lock_irqsave(&clocksource_lock, flags); | 397 | list_for_each_entry(cs, &clocksource_list, list) |
| 300 | |||
| 301 | list_for_each_entry(cs, &clocksource_list, list) { | ||
| 302 | if (cs->resume) | 398 | if (cs->resume) |
| 303 | cs->resume(); | 399 | cs->resume(); |
| 304 | } | ||
| 305 | 400 | ||
| 306 | clocksource_resume_watchdog(); | 401 | clocksource_resume_watchdog(); |
| 307 | |||
| 308 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 309 | } | 402 | } |
| 310 | 403 | ||
| 311 | /** | 404 | /** |
| @@ -320,75 +413,94 @@ void clocksource_touch_watchdog(void) | |||
| 320 | clocksource_resume_watchdog(); | 413 | clocksource_resume_watchdog(); |
| 321 | } | 414 | } |
| 322 | 415 | ||
| 416 | #ifdef CONFIG_GENERIC_TIME | ||
| 417 | |||
| 323 | /** | 418 | /** |
| 324 | * clocksource_get_next - Returns the selected clocksource | 419 | * clocksource_select - Select the best clocksource available |
| 325 | * | 420 | * |
| 421 | * Private function. Must hold clocksource_mutex when called. | ||
| 422 | * | ||
| 423 | * Select the clocksource with the best rating, or the clocksource, | ||
| 424 | * which is selected by userspace override. | ||
| 326 | */ | 425 | */ |
| 327 | struct clocksource *clocksource_get_next(void) | 426 | static void clocksource_select(void) |
| 328 | { | 427 | { |
| 329 | unsigned long flags; | 428 | struct clocksource *best, *cs; |
| 330 | 429 | ||
| 331 | spin_lock_irqsave(&clocksource_lock, flags); | 430 | if (!finished_booting || list_empty(&clocksource_list)) |
| 332 | if (next_clocksource && finished_booting) { | 431 | return; |
| 333 | curr_clocksource = next_clocksource; | 432 | /* First clocksource on the list has the best rating. */ |
| 334 | next_clocksource = NULL; | 433 | best = list_first_entry(&clocksource_list, struct clocksource, list); |
| 434 | /* Check for the override clocksource. */ | ||
| 435 | list_for_each_entry(cs, &clocksource_list, list) { | ||
| 436 | if (strcmp(cs->name, override_name) != 0) | ||
| 437 | continue; | ||
| 438 | /* | ||
| 439 | * Check to make sure we don't switch to a non-highres | ||
| 440 | * capable clocksource if the tick code is in oneshot | ||
| 441 | * mode (highres or nohz) | ||
| 442 | */ | ||
| 443 | if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && | ||
| 444 | tick_oneshot_mode_active()) { | ||
| 445 | /* Override clocksource cannot be used. */ | ||
| 446 | printk(KERN_WARNING "Override clocksource %s is not " | ||
| 447 | "HRT compatible. Cannot switch while in " | ||
| 448 | "HRT/NOHZ mode\n", cs->name); | ||
| 449 | override_name[0] = 0; | ||
| 450 | } else | ||
| 451 | /* Override clocksource can be used. */ | ||
| 452 | best = cs; | ||
| 453 | break; | ||
| 454 | } | ||
| 455 | if (curr_clocksource != best) { | ||
| 456 | printk(KERN_INFO "Switching to clocksource %s\n", best->name); | ||
| 457 | curr_clocksource = best; | ||
| 458 | timekeeping_notify(curr_clocksource); | ||
| 335 | } | 459 | } |
| 336 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 337 | |||
| 338 | return curr_clocksource; | ||
| 339 | } | 460 | } |
| 340 | 461 | ||
| 341 | /** | 462 | #else /* CONFIG_GENERIC_TIME */ |
| 342 | * select_clocksource - Selects the best registered clocksource. | 463 | |
| 343 | * | 464 | static inline void clocksource_select(void) { } |
| 344 | * Private function. Must hold clocksource_lock when called. | 465 | |
| 466 | #endif | ||
| 467 | |||
| 468 | /* | ||
| 469 | * clocksource_done_booting - Called near the end of core bootup | ||
| 345 | * | 470 | * |
| 346 | * Select the clocksource with the best rating, or the clocksource, | 471 | * Hack to avoid lots of clocksource churn at boot time. |
| 347 | * which is selected by userspace override. | 472 | * We use fs_initcall because we want this to start before |
| 473 | * device_initcall but after subsys_initcall. | ||
| 348 | */ | 474 | */ |
| 349 | static struct clocksource *select_clocksource(void) | 475 | static int __init clocksource_done_booting(void) |
| 350 | { | 476 | { |
| 351 | struct clocksource *next; | 477 | finished_booting = 1; |
| 352 | |||
| 353 | if (list_empty(&clocksource_list)) | ||
| 354 | return NULL; | ||
| 355 | |||
| 356 | if (clocksource_override) | ||
| 357 | next = clocksource_override; | ||
| 358 | else | ||
| 359 | next = list_entry(clocksource_list.next, struct clocksource, | ||
| 360 | list); | ||
| 361 | 478 | ||
| 362 | if (next == curr_clocksource) | 479 | /* |
| 363 | return NULL; | 480 | * Run the watchdog first to eliminate unstable clock sources |
| 481 | */ | ||
| 482 | clocksource_watchdog_kthread(NULL); | ||
| 364 | 483 | ||
| 365 | return next; | 484 | mutex_lock(&clocksource_mutex); |
| 485 | clocksource_select(); | ||
| 486 | mutex_unlock(&clocksource_mutex); | ||
| 487 | return 0; | ||
| 366 | } | 488 | } |
| 489 | fs_initcall(clocksource_done_booting); | ||
| 367 | 490 | ||
| 368 | /* | 491 | /* |
| 369 | * Enqueue the clocksource sorted by rating | 492 | * Enqueue the clocksource sorted by rating |
| 370 | */ | 493 | */ |
| 371 | static int clocksource_enqueue(struct clocksource *c) | 494 | static void clocksource_enqueue(struct clocksource *cs) |
| 372 | { | 495 | { |
| 373 | struct list_head *tmp, *entry = &clocksource_list; | 496 | struct list_head *entry = &clocksource_list; |
| 497 | struct clocksource *tmp; | ||
| 374 | 498 | ||
| 375 | list_for_each(tmp, &clocksource_list) { | 499 | list_for_each_entry(tmp, &clocksource_list, list) |
| 376 | struct clocksource *cs; | ||
| 377 | |||
| 378 | cs = list_entry(tmp, struct clocksource, list); | ||
| 379 | if (cs == c) | ||
| 380 | return -EBUSY; | ||
| 381 | /* Keep track of the place, where to insert */ | 500 | /* Keep track of the place, where to insert */ |
| 382 | if (cs->rating >= c->rating) | 501 | if (tmp->rating >= cs->rating) |
| 383 | entry = tmp; | 502 | entry = &tmp->list; |
| 384 | } | 503 | list_add(&cs->list, entry); |
| 385 | list_add(&c->list, entry); | ||
| 386 | |||
| 387 | if (strlen(c->name) == strlen(override_name) && | ||
| 388 | !strcmp(c->name, override_name)) | ||
| 389 | clocksource_override = c; | ||
| 390 | |||
| 391 | return 0; | ||
| 392 | } | 504 | } |
| 393 | 505 | ||
| 394 | /** | 506 | /** |
| @@ -397,52 +509,48 @@ static int clocksource_enqueue(struct clocksource *c) | |||
| 397 | * | 509 | * |
| 398 | * Returns -EBUSY if registration fails, zero otherwise. | 510 | * Returns -EBUSY if registration fails, zero otherwise. |
| 399 | */ | 511 | */ |
| 400 | int clocksource_register(struct clocksource *c) | 512 | int clocksource_register(struct clocksource *cs) |
| 401 | { | 513 | { |
| 402 | unsigned long flags; | 514 | mutex_lock(&clocksource_mutex); |
| 403 | int ret; | 515 | clocksource_enqueue(cs); |
| 404 | 516 | clocksource_select(); | |
| 405 | spin_lock_irqsave(&clocksource_lock, flags); | 517 | clocksource_enqueue_watchdog(cs); |
| 406 | ret = clocksource_enqueue(c); | 518 | mutex_unlock(&clocksource_mutex); |
| 407 | if (!ret) | 519 | return 0; |
| 408 | next_clocksource = select_clocksource(); | ||
| 409 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 410 | if (!ret) | ||
| 411 | clocksource_check_watchdog(c); | ||
| 412 | return ret; | ||
| 413 | } | 520 | } |
| 414 | EXPORT_SYMBOL(clocksource_register); | 521 | EXPORT_SYMBOL(clocksource_register); |
| 415 | 522 | ||
| 523 | static void __clocksource_change_rating(struct clocksource *cs, int rating) | ||
| 524 | { | ||
| 525 | list_del(&cs->list); | ||
| 526 | cs->rating = rating; | ||
| 527 | clocksource_enqueue(cs); | ||
| 528 | clocksource_select(); | ||
| 529 | } | ||
| 530 | |||
| 416 | /** | 531 | /** |
| 417 | * clocksource_change_rating - Change the rating of a registered clocksource | 532 | * clocksource_change_rating - Change the rating of a registered clocksource |
| 418 | * | ||
| 419 | */ | 533 | */ |
| 420 | void clocksource_change_rating(struct clocksource *cs, int rating) | 534 | void clocksource_change_rating(struct clocksource *cs, int rating) |
| 421 | { | 535 | { |
| 422 | unsigned long flags; | 536 | mutex_lock(&clocksource_mutex); |
| 423 | 537 | __clocksource_change_rating(cs, rating); | |
| 424 | spin_lock_irqsave(&clocksource_lock, flags); | 538 | mutex_unlock(&clocksource_mutex); |
| 425 | list_del(&cs->list); | ||
| 426 | cs->rating = rating; | ||
| 427 | clocksource_enqueue(cs); | ||
| 428 | next_clocksource = select_clocksource(); | ||
| 429 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 430 | } | 539 | } |
| 540 | EXPORT_SYMBOL(clocksource_change_rating); | ||
| 431 | 541 | ||
| 432 | /** | 542 | /** |
| 433 | * clocksource_unregister - remove a registered clocksource | 543 | * clocksource_unregister - remove a registered clocksource |
| 434 | */ | 544 | */ |
| 435 | void clocksource_unregister(struct clocksource *cs) | 545 | void clocksource_unregister(struct clocksource *cs) |
| 436 | { | 546 | { |
| 437 | unsigned long flags; | 547 | mutex_lock(&clocksource_mutex); |
| 438 | 548 | clocksource_dequeue_watchdog(cs); | |
| 439 | spin_lock_irqsave(&clocksource_lock, flags); | ||
| 440 | list_del(&cs->list); | 549 | list_del(&cs->list); |
| 441 | if (clocksource_override == cs) | 550 | clocksource_select(); |
| 442 | clocksource_override = NULL; | 551 | mutex_unlock(&clocksource_mutex); |
| 443 | next_clocksource = select_clocksource(); | ||
| 444 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
| 445 | } | 552 | } |
| 553 | EXPORT_SYMBOL(clocksource_unregister); | ||
| 446 | 554 | ||
| 447 | #ifdef CONFIG_SYSFS | 555 | #ifdef CONFIG_SYSFS |
| 448 | /** | 556 | /** |
| @@ -458,9 +566,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, | |||
| 458 | { | 566 | { |
| 459 | ssize_t count = 0; | 567 | ssize_t count = 0; |
| 460 | 568 | ||
| 461 | spin_lock_irq(&clocksource_lock); | 569 | mutex_lock(&clocksource_mutex); |
| 462 | count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); | 570 | count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); |
| 463 | spin_unlock_irq(&clocksource_lock); | 571 | mutex_unlock(&clocksource_mutex); |
| 464 | 572 | ||
| 465 | return count; | 573 | return count; |
| 466 | } | 574 | } |
| @@ -478,9 +586,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
| 478 | struct sysdev_attribute *attr, | 586 | struct sysdev_attribute *attr, |
| 479 | const char *buf, size_t count) | 587 | const char *buf, size_t count) |
| 480 | { | 588 | { |
| 481 | struct clocksource *ovr = NULL; | ||
| 482 | size_t ret = count; | 589 | size_t ret = count; |
| 483 | int len; | ||
| 484 | 590 | ||
| 485 | /* strings from sysfs write are not 0 terminated! */ | 591 | /* strings from sysfs write are not 0 terminated! */ |
| 486 | if (count >= sizeof(override_name)) | 592 | if (count >= sizeof(override_name)) |
| @@ -490,44 +596,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
| 490 | if (buf[count-1] == '\n') | 596 | if (buf[count-1] == '\n') |
| 491 | count--; | 597 | count--; |
| 492 | 598 | ||
| 493 | spin_lock_irq(&clocksource_lock); | 599 | mutex_lock(&clocksource_mutex); |
| 494 | 600 | ||
| 495 | if (count > 0) | 601 | if (count > 0) |
| 496 | memcpy(override_name, buf, count); | 602 | memcpy(override_name, buf, count); |
| 497 | override_name[count] = 0; | 603 | override_name[count] = 0; |
| 604 | clocksource_select(); | ||
| 498 | 605 | ||
| 499 | len = strlen(override_name); | 606 | mutex_unlock(&clocksource_mutex); |
| 500 | if (len) { | ||
| 501 | struct clocksource *cs; | ||
| 502 | |||
| 503 | ovr = clocksource_override; | ||
| 504 | /* try to select it: */ | ||
| 505 | list_for_each_entry(cs, &clocksource_list, list) { | ||
| 506 | if (strlen(cs->name) == len && | ||
| 507 | !strcmp(cs->name, override_name)) | ||
| 508 | ovr = cs; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | |||
| 512 | /* | ||
| 513 | * Check to make sure we don't switch to a non-highres capable | ||
| 514 | * clocksource if the tick code is in oneshot mode (highres or nohz) | ||
| 515 | */ | ||
| 516 | if (tick_oneshot_mode_active() && ovr && | ||
| 517 | !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { | ||
| 518 | printk(KERN_WARNING "%s clocksource is not HRT compatible. " | ||
| 519 | "Cannot switch while in HRT/NOHZ mode\n", ovr->name); | ||
| 520 | ovr = NULL; | ||
| 521 | override_name[0] = 0; | ||
| 522 | } | ||
| 523 | |||
| 524 | /* Reselect, when the override name has changed */ | ||
| 525 | if (ovr != clocksource_override) { | ||
| 526 | clocksource_override = ovr; | ||
| 527 | next_clocksource = select_clocksource(); | ||
| 528 | } | ||
| 529 | |||
| 530 | spin_unlock_irq(&clocksource_lock); | ||
| 531 | 607 | ||
| 532 | return ret; | 608 | return ret; |
| 533 | } | 609 | } |
| @@ -547,7 +623,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, | |||
| 547 | struct clocksource *src; | 623 | struct clocksource *src; |
| 548 | ssize_t count = 0; | 624 | ssize_t count = 0; |
| 549 | 625 | ||
| 550 | spin_lock_irq(&clocksource_lock); | 626 | mutex_lock(&clocksource_mutex); |
| 551 | list_for_each_entry(src, &clocksource_list, list) { | 627 | list_for_each_entry(src, &clocksource_list, list) { |
| 552 | /* | 628 | /* |
| 553 | * Don't show non-HRES clocksource if the tick code is | 629 | * Don't show non-HRES clocksource if the tick code is |
| @@ -559,7 +635,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, | |||
| 559 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), | 635 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), |
| 560 | "%s ", src->name); | 636 | "%s ", src->name); |
| 561 | } | 637 | } |
| 562 | spin_unlock_irq(&clocksource_lock); | 638 | mutex_unlock(&clocksource_mutex); |
| 563 | 639 | ||
| 564 | count += snprintf(buf + count, | 640 | count += snprintf(buf + count, |
| 565 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); | 641 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); |
| @@ -614,11 +690,10 @@ device_initcall(init_clocksource_sysfs); | |||
| 614 | */ | 690 | */ |
| 615 | static int __init boot_override_clocksource(char* str) | 691 | static int __init boot_override_clocksource(char* str) |
| 616 | { | 692 | { |
| 617 | unsigned long flags; | 693 | mutex_lock(&clocksource_mutex); |
| 618 | spin_lock_irqsave(&clocksource_lock, flags); | ||
| 619 | if (str) | 694 | if (str) |
| 620 | strlcpy(override_name, str, sizeof(override_name)); | 695 | strlcpy(override_name, str, sizeof(override_name)); |
| 621 | spin_unlock_irqrestore(&clocksource_lock, flags); | 696 | mutex_unlock(&clocksource_mutex); |
| 622 | return 1; | 697 | return 1; |
| 623 | } | 698 | } |
| 624 | 699 | ||
