aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-07-12 06:34:42 -0400
committerThomas Gleixner <tglx@linutronix.de>2013-07-12 06:34:42 -0400
commitf2006e27396f55276f24434f56e208d86e7f9908 (patch)
tree71896db916d33888b4286f80117d3cac0da40e6d /kernel/time
parente399eb56a6110e13f97e644658648602e2b08de7 (diff)
parent9903883f1dd6e86f286b7bfa6e4b423f98c1cd9e (diff)
Merge branch 'linus' into timers/urgent
Get upstream changes so we can apply fixes against them Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/alarmtimer.c47
-rw-r--r--kernel/time/clockevents.c271
-rw-r--r--kernel/time/clocksource.c266
-rw-r--r--kernel/time/sched_clock.c212
-rw-r--r--kernel/time/tick-broadcast.c126
-rw-r--r--kernel/time/tick-common.c197
-rw-r--r--kernel/time/tick-internal.h17
-rw-r--r--kernel/time/timekeeping.c65
-rw-r--r--kernel/time/timekeeping_debug.c72
-rw-r--r--kernel/time/timekeeping_internal.h14
11 files changed, 1026 insertions, 263 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ff7d9d2ab504..9250130646f5 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -4,6 +4,8 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
6obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o 6obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
7obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
7obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o 8obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
8obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o 9obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
9obj-$(CONFIG_TIMER_STATS) += timer_stats.o 10obj-$(CONFIG_TIMER_STATS) += timer_stats.o
11obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f11d83b12949..eec50fcef9e4 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -199,6 +199,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
199 199
200} 200}
201 201
202ktime_t alarm_expires_remaining(const struct alarm *alarm)
203{
204 struct alarm_base *base = &alarm_bases[alarm->type];
205 return ktime_sub(alarm->node.expires, base->gettime());
206}
207EXPORT_SYMBOL_GPL(alarm_expires_remaining);
208
202#ifdef CONFIG_RTC_CLASS 209#ifdef CONFIG_RTC_CLASS
203/** 210/**
204 * alarmtimer_suspend - Suspend time callback 211 * alarmtimer_suspend - Suspend time callback
@@ -303,9 +310,10 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
303 alarm->type = type; 310 alarm->type = type;
304 alarm->state = ALARMTIMER_STATE_INACTIVE; 311 alarm->state = ALARMTIMER_STATE_INACTIVE;
305} 312}
313EXPORT_SYMBOL_GPL(alarm_init);
306 314
307/** 315/**
308 * alarm_start - Sets an alarm to fire 316 * alarm_start - Sets an absolute alarm to fire
309 * @alarm: ptr to alarm to set 317 * @alarm: ptr to alarm to set
310 * @start: time to run the alarm 318 * @start: time to run the alarm
311 */ 319 */
@@ -323,6 +331,34 @@ int alarm_start(struct alarm *alarm, ktime_t start)
323 spin_unlock_irqrestore(&base->lock, flags); 331 spin_unlock_irqrestore(&base->lock, flags);
324 return ret; 332 return ret;
325} 333}
334EXPORT_SYMBOL_GPL(alarm_start);
335
336/**
337 * alarm_start_relative - Sets a relative alarm to fire
338 * @alarm: ptr to alarm to set
339 * @start: time relative to now to run the alarm
340 */
341int alarm_start_relative(struct alarm *alarm, ktime_t start)
342{
343 struct alarm_base *base = &alarm_bases[alarm->type];
344
345 start = ktime_add(start, base->gettime());
346 return alarm_start(alarm, start);
347}
348EXPORT_SYMBOL_GPL(alarm_start_relative);
349
350void alarm_restart(struct alarm *alarm)
351{
352 struct alarm_base *base = &alarm_bases[alarm->type];
353 unsigned long flags;
354
355 spin_lock_irqsave(&base->lock, flags);
356 hrtimer_set_expires(&alarm->timer, alarm->node.expires);
357 hrtimer_restart(&alarm->timer);
358 alarmtimer_enqueue(base, alarm);
359 spin_unlock_irqrestore(&base->lock, flags);
360}
361EXPORT_SYMBOL_GPL(alarm_restart);
326 362
327/** 363/**
328 * alarm_try_to_cancel - Tries to cancel an alarm timer 364 * alarm_try_to_cancel - Tries to cancel an alarm timer
@@ -344,6 +380,7 @@ int alarm_try_to_cancel(struct alarm *alarm)
344 spin_unlock_irqrestore(&base->lock, flags); 380 spin_unlock_irqrestore(&base->lock, flags);
345 return ret; 381 return ret;
346} 382}
383EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
347 384
348 385
349/** 386/**
@@ -361,6 +398,7 @@ int alarm_cancel(struct alarm *alarm)
361 cpu_relax(); 398 cpu_relax();
362 } 399 }
363} 400}
401EXPORT_SYMBOL_GPL(alarm_cancel);
364 402
365 403
366u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) 404u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
@@ -393,8 +431,15 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
393 alarm->node.expires = ktime_add(alarm->node.expires, interval); 431 alarm->node.expires = ktime_add(alarm->node.expires, interval);
394 return overrun; 432 return overrun;
395} 433}
434EXPORT_SYMBOL_GPL(alarm_forward);
396 435
436u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
437{
438 struct alarm_base *base = &alarm_bases[alarm->type];
397 439
440 return alarm_forward(alarm, base->gettime(), interval);
441}
442EXPORT_SYMBOL_GPL(alarm_forward_now);
398 443
399 444
400/** 445/**
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c6d6400ee137..38959c866789 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -15,20 +15,23 @@
15#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/notifier.h>
19#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/device.h>
20 20
21#include "tick-internal.h" 21#include "tick-internal.h"
22 22
23/* The registered clock event devices */ 23/* The registered clock event devices */
24static LIST_HEAD(clockevent_devices); 24static LIST_HEAD(clockevent_devices);
25static LIST_HEAD(clockevents_released); 25static LIST_HEAD(clockevents_released);
26
27/* Notification for clock events */
28static RAW_NOTIFIER_HEAD(clockevents_chain);
29
30/* Protection for the above */ 26/* Protection for the above */
31static DEFINE_RAW_SPINLOCK(clockevents_lock); 27static DEFINE_RAW_SPINLOCK(clockevents_lock);
28/* Protection for unbind operations */
29static DEFINE_MUTEX(clockevents_mutex);
30
31struct ce_unbind {
32 struct clock_event_device *ce;
33 int res;
34};
32 35
33/** 36/**
34 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds 37 * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -232,47 +235,107 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
232 return (rc && force) ? clockevents_program_min_delta(dev) : rc; 235 return (rc && force) ? clockevents_program_min_delta(dev) : rc;
233} 236}
234 237
235/** 238/*
236 * clockevents_register_notifier - register a clock events change listener 239 * Called after a notify add to make devices available which were
240 * released from the notifier call.
237 */ 241 */
238int clockevents_register_notifier(struct notifier_block *nb) 242static void clockevents_notify_released(void)
239{ 243{
240 unsigned long flags; 244 struct clock_event_device *dev;
241 int ret;
242 245
243 raw_spin_lock_irqsave(&clockevents_lock, flags); 246 while (!list_empty(&clockevents_released)) {
244 ret = raw_notifier_chain_register(&clockevents_chain, nb); 247 dev = list_entry(clockevents_released.next,
245 raw_spin_unlock_irqrestore(&clockevents_lock, flags); 248 struct clock_event_device, list);
249 list_del(&dev->list);
250 list_add(&dev->list, &clockevent_devices);
251 tick_check_new_device(dev);
252 }
253}
246 254
247 return ret; 255/*
256 * Try to install a replacement clock event device
257 */
258static int clockevents_replace(struct clock_event_device *ced)
259{
260 struct clock_event_device *dev, *newdev = NULL;
261
262 list_for_each_entry(dev, &clockevent_devices, list) {
263 if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
264 continue;
265
266 if (!tick_check_replacement(newdev, dev))
267 continue;
268
269 if (!try_module_get(dev->owner))
270 continue;
271
272 if (newdev)
273 module_put(newdev->owner);
274 newdev = dev;
275 }
276 if (newdev) {
277 tick_install_replacement(newdev);
278 list_del_init(&ced->list);
279 }
280 return newdev ? 0 : -EBUSY;
248} 281}
249 282
250/* 283/*
251 * Notify about a clock event change. Called with clockevents_lock 284 * Called with clockevents_mutex and clockevents_lock held
252 * held.
253 */ 285 */
254static void clockevents_do_notify(unsigned long reason, void *dev) 286static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
255{ 287{
256 raw_notifier_call_chain(&clockevents_chain, reason, dev); 288 /* Fast track. Device is unused */
289 if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
290 list_del_init(&ced->list);
291 return 0;
292 }
293
294 return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
257} 295}
258 296
259/* 297/*
260 * Called after a notify add to make devices available which were 298 * SMP function call to unbind a device
261 * released from the notifier call.
262 */ 299 */
263static void clockevents_notify_released(void) 300static void __clockevents_unbind(void *arg)
264{ 301{
265 struct clock_event_device *dev; 302 struct ce_unbind *cu = arg;
303 int res;
304
305 raw_spin_lock(&clockevents_lock);
306 res = __clockevents_try_unbind(cu->ce, smp_processor_id());
307 if (res == -EAGAIN)
308 res = clockevents_replace(cu->ce);
309 cu->res = res;
310 raw_spin_unlock(&clockevents_lock);
311}
266 312
267 while (!list_empty(&clockevents_released)) { 313/*
268 dev = list_entry(clockevents_released.next, 314 * Issues smp function call to unbind a per cpu device. Called with
269 struct clock_event_device, list); 315 * clockevents_mutex held.
270 list_del(&dev->list); 316 */
271 list_add(&dev->list, &clockevent_devices); 317static int clockevents_unbind(struct clock_event_device *ced, int cpu)
272 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); 318{
273 } 319 struct ce_unbind cu = { .ce = ced, .res = -ENODEV };
320
321 smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
322 return cu.res;
274} 323}
275 324
325/*
326 * Unbind a clockevents device.
327 */
328int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
329{
330 int ret;
331
332 mutex_lock(&clockevents_mutex);
333 ret = clockevents_unbind(ced, cpu);
334 mutex_unlock(&clockevents_mutex);
335 return ret;
336}
337EXPORT_SYMBOL_GPL(clockevents_unbind);
338
276/** 339/**
277 * clockevents_register_device - register a clock event device 340 * clockevents_register_device - register a clock event device
278 * @dev: device to register 341 * @dev: device to register
@@ -290,7 +353,7 @@ void clockevents_register_device(struct clock_event_device *dev)
290 raw_spin_lock_irqsave(&clockevents_lock, flags); 353 raw_spin_lock_irqsave(&clockevents_lock, flags);
291 354
292 list_add(&dev->list, &clockevent_devices); 355 list_add(&dev->list, &clockevent_devices);
293 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); 356 tick_check_new_device(dev);
294 clockevents_notify_released(); 357 clockevents_notify_released();
295 358
296 raw_spin_unlock_irqrestore(&clockevents_lock, flags); 359 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
@@ -386,6 +449,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
386 * released list and do a notify add later. 449 * released list and do a notify add later.
387 */ 450 */
388 if (old) { 451 if (old) {
452 module_put(old->owner);
389 clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); 453 clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
390 list_del(&old->list); 454 list_del(&old->list);
391 list_add(&old->list, &clockevents_released); 455 list_add(&old->list, &clockevents_released);
@@ -433,10 +497,36 @@ void clockevents_notify(unsigned long reason, void *arg)
433 int cpu; 497 int cpu;
434 498
435 raw_spin_lock_irqsave(&clockevents_lock, flags); 499 raw_spin_lock_irqsave(&clockevents_lock, flags);
436 clockevents_do_notify(reason, arg);
437 500
438 switch (reason) { 501 switch (reason) {
502 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
503 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
504 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
505 tick_broadcast_on_off(reason, arg);
506 break;
507
508 case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
509 case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
510 tick_broadcast_oneshot_control(reason);
511 break;
512
513 case CLOCK_EVT_NOTIFY_CPU_DYING:
514 tick_handover_do_timer(arg);
515 break;
516
517 case CLOCK_EVT_NOTIFY_SUSPEND:
518 tick_suspend();
519 tick_suspend_broadcast();
520 break;
521
522 case CLOCK_EVT_NOTIFY_RESUME:
523 tick_resume();
524 break;
525
439 case CLOCK_EVT_NOTIFY_CPU_DEAD: 526 case CLOCK_EVT_NOTIFY_CPU_DEAD:
527 tick_shutdown_broadcast_oneshot(arg);
528 tick_shutdown_broadcast(arg);
529 tick_shutdown(arg);
440 /* 530 /*
441 * Unregister the clock event devices which were 531 * Unregister the clock event devices which were
442 * released from the users in the notify chain. 532 * released from the users in the notify chain.
@@ -462,4 +552,123 @@ void clockevents_notify(unsigned long reason, void *arg)
462 raw_spin_unlock_irqrestore(&clockevents_lock, flags); 552 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
463} 553}
464EXPORT_SYMBOL_GPL(clockevents_notify); 554EXPORT_SYMBOL_GPL(clockevents_notify);
555
556#ifdef CONFIG_SYSFS
557struct bus_type clockevents_subsys = {
558 .name = "clockevents",
559 .dev_name = "clockevent",
560};
561
562static DEFINE_PER_CPU(struct device, tick_percpu_dev);
563static struct tick_device *tick_get_tick_dev(struct device *dev);
564
565static ssize_t sysfs_show_current_tick_dev(struct device *dev,
566 struct device_attribute *attr,
567 char *buf)
568{
569 struct tick_device *td;
570 ssize_t count = 0;
571
572 raw_spin_lock_irq(&clockevents_lock);
573 td = tick_get_tick_dev(dev);
574 if (td && td->evtdev)
575 count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
576 raw_spin_unlock_irq(&clockevents_lock);
577 return count;
578}
579static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
580
581/* We don't support the abomination of removable broadcast devices */
582static ssize_t sysfs_unbind_tick_dev(struct device *dev,
583 struct device_attribute *attr,
584 const char *buf, size_t count)
585{
586 char name[CS_NAME_LEN];
587 size_t ret = sysfs_get_uname(buf, name, count);
588 struct clock_event_device *ce;
589
590 if (ret < 0)
591 return ret;
592
593 ret = -ENODEV;
594 mutex_lock(&clockevents_mutex);
595 raw_spin_lock_irq(&clockevents_lock);
596 list_for_each_entry(ce, &clockevent_devices, list) {
597 if (!strcmp(ce->name, name)) {
598 ret = __clockevents_try_unbind(ce, dev->id);
599 break;
600 }
601 }
602 raw_spin_unlock_irq(&clockevents_lock);
603 /*
604 * We hold clockevents_mutex, so ce can't go away
605 */
606 if (ret == -EAGAIN)
607 ret = clockevents_unbind(ce, dev->id);
608 mutex_unlock(&clockevents_mutex);
609 return ret ? ret : count;
610}
611static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
612
613#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
614static struct device tick_bc_dev = {
615 .init_name = "broadcast",
616 .id = 0,
617 .bus = &clockevents_subsys,
618};
619
620static struct tick_device *tick_get_tick_dev(struct device *dev)
621{
622 return dev == &tick_bc_dev ? tick_get_broadcast_device() :
623 &per_cpu(tick_cpu_device, dev->id);
624}
625
626static __init int tick_broadcast_init_sysfs(void)
627{
628 int err = device_register(&tick_bc_dev);
629
630 if (!err)
631 err = device_create_file(&tick_bc_dev, &dev_attr_current_device);
632 return err;
633}
634#else
635static struct tick_device *tick_get_tick_dev(struct device *dev)
636{
637 return &per_cpu(tick_cpu_device, dev->id);
638}
639static inline int tick_broadcast_init_sysfs(void) { return 0; }
465#endif 640#endif
641
642static int __init tick_init_sysfs(void)
643{
644 int cpu;
645
646 for_each_possible_cpu(cpu) {
647 struct device *dev = &per_cpu(tick_percpu_dev, cpu);
648 int err;
649
650 dev->id = cpu;
651 dev->bus = &clockevents_subsys;
652 err = device_register(dev);
653 if (!err)
654 err = device_create_file(dev, &dev_attr_current_device);
655 if (!err)
656 err = device_create_file(dev, &dev_attr_unbind_device);
657 if (err)
658 return err;
659 }
660 return tick_broadcast_init_sysfs();
661}
662
663static int __init clockevents_init_sysfs(void)
664{
665 int err = subsys_system_register(&clockevents_subsys, NULL);
666
667 if (!err)
668 err = tick_init_sysfs();
669 return err;
670}
671device_initcall(clockevents_init_sysfs);
672#endif /* SYSFS */
673
674#endif /* GENERIC_CLOCK_EVENTS */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c9583382141a..50a8736757f3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -31,6 +31,8 @@
31#include <linux/tick.h> 31#include <linux/tick.h>
32#include <linux/kthread.h> 32#include <linux/kthread.h>
33 33
34#include "tick-internal.h"
35
34void timecounter_init(struct timecounter *tc, 36void timecounter_init(struct timecounter *tc,
35 const struct cyclecounter *cc, 37 const struct cyclecounter *cc,
36 u64 start_tstamp) 38 u64 start_tstamp)
@@ -174,11 +176,12 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
174static struct clocksource *curr_clocksource; 176static struct clocksource *curr_clocksource;
175static LIST_HEAD(clocksource_list); 177static LIST_HEAD(clocksource_list);
176static DEFINE_MUTEX(clocksource_mutex); 178static DEFINE_MUTEX(clocksource_mutex);
177static char override_name[32]; 179static char override_name[CS_NAME_LEN];
178static int finished_booting; 180static int finished_booting;
179 181
180#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 182#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
181static void clocksource_watchdog_work(struct work_struct *work); 183static void clocksource_watchdog_work(struct work_struct *work);
184static void clocksource_select(void);
182 185
183static LIST_HEAD(watchdog_list); 186static LIST_HEAD(watchdog_list);
184static struct clocksource *watchdog; 187static struct clocksource *watchdog;
@@ -299,13 +302,30 @@ static void clocksource_watchdog(unsigned long data)
299 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 302 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
300 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 303 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
301 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 304 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
305 /* Mark it valid for high-res. */
302 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 306 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
307
308 /*
309 * clocksource_done_booting() will sort it if
310 * finished_booting is not set yet.
311 */
312 if (!finished_booting)
313 continue;
314
303 /* 315 /*
304 * We just marked the clocksource as highres-capable, 316 * If this is not the current clocksource let
305 * notify the rest of the system as well so that we 317 * the watchdog thread reselect it. Due to the
306 * transition into high-res mode: 318 * change to high res this clocksource might
319 * be preferred now. If it is the current
320 * clocksource let the tick code know about
321 * that change.
307 */ 322 */
308 tick_clock_notify(); 323 if (cs != curr_clocksource) {
324 cs->flags |= CLOCK_SOURCE_RESELECT;
325 schedule_work(&watchdog_work);
326 } else {
327 tick_clock_notify();
328 }
309 } 329 }
310 } 330 }
311 331
@@ -388,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
388 408
389static void clocksource_dequeue_watchdog(struct clocksource *cs) 409static void clocksource_dequeue_watchdog(struct clocksource *cs)
390{ 410{
391 struct clocksource *tmp;
392 unsigned long flags; 411 unsigned long flags;
393 412
394 spin_lock_irqsave(&watchdog_lock, flags); 413 spin_lock_irqsave(&watchdog_lock, flags);
395 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 414 if (cs != watchdog) {
396 /* cs is a watched clocksource. */ 415 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
397 list_del_init(&cs->wd_list); 416 /* cs is a watched clocksource. */
398 } else if (cs == watchdog) { 417 list_del_init(&cs->wd_list);
399 /* Reset watchdog cycles */ 418 /* Check if the watchdog timer needs to be stopped. */
400 clocksource_reset_watchdog(); 419 clocksource_stop_watchdog();
401 /* Current watchdog is removed. Find an alternative. */
402 watchdog = NULL;
403 list_for_each_entry(tmp, &clocksource_list, list) {
404 if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
405 continue;
406 if (!watchdog || tmp->rating > watchdog->rating)
407 watchdog = tmp;
408 } 420 }
409 } 421 }
410 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
411 /* Check if the watchdog timer needs to be stopped. */
412 clocksource_stop_watchdog();
413 spin_unlock_irqrestore(&watchdog_lock, flags); 422 spin_unlock_irqrestore(&watchdog_lock, flags);
414} 423}
415 424
416static int clocksource_watchdog_kthread(void *data) 425static int __clocksource_watchdog_kthread(void)
417{ 426{
418 struct clocksource *cs, *tmp; 427 struct clocksource *cs, *tmp;
419 unsigned long flags; 428 unsigned long flags;
420 LIST_HEAD(unstable); 429 LIST_HEAD(unstable);
430 int select = 0;
421 431
422 mutex_lock(&clocksource_mutex);
423 spin_lock_irqsave(&watchdog_lock, flags); 432 spin_lock_irqsave(&watchdog_lock, flags);
424 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) 433 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
425 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 434 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
426 list_del_init(&cs->wd_list); 435 list_del_init(&cs->wd_list);
427 list_add(&cs->wd_list, &unstable); 436 list_add(&cs->wd_list, &unstable);
437 select = 1;
428 } 438 }
439 if (cs->flags & CLOCK_SOURCE_RESELECT) {
440 cs->flags &= ~CLOCK_SOURCE_RESELECT;
441 select = 1;
442 }
443 }
429 /* Check if the watchdog timer needs to be stopped. */ 444 /* Check if the watchdog timer needs to be stopped. */
430 clocksource_stop_watchdog(); 445 clocksource_stop_watchdog();
431 spin_unlock_irqrestore(&watchdog_lock, flags); 446 spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -435,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data)
435 list_del_init(&cs->wd_list); 450 list_del_init(&cs->wd_list);
436 __clocksource_change_rating(cs, 0); 451 __clocksource_change_rating(cs, 0);
437 } 452 }
453 return select;
454}
455
456static int clocksource_watchdog_kthread(void *data)
457{
458 mutex_lock(&clocksource_mutex);
459 if (__clocksource_watchdog_kthread())
460 clocksource_select();
438 mutex_unlock(&clocksource_mutex); 461 mutex_unlock(&clocksource_mutex);
439 return 0; 462 return 0;
440} 463}
441 464
465static bool clocksource_is_watchdog(struct clocksource *cs)
466{
467 return cs == watchdog;
468}
469
442#else /* CONFIG_CLOCKSOURCE_WATCHDOG */ 470#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
443 471
444static void clocksource_enqueue_watchdog(struct clocksource *cs) 472static void clocksource_enqueue_watchdog(struct clocksource *cs)
@@ -449,7 +477,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
449 477
450static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 478static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
451static inline void clocksource_resume_watchdog(void) { } 479static inline void clocksource_resume_watchdog(void) { }
452static inline int clocksource_watchdog_kthread(void *data) { return 0; } 480static inline int __clocksource_watchdog_kthread(void) { return 0; }
481static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
453 482
454#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 483#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
455 484
@@ -553,24 +582,42 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
553 582
554#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 583#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
555 584
556/** 585static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
557 * clocksource_select - Select the best clocksource available
558 *
559 * Private function. Must hold clocksource_mutex when called.
560 *
561 * Select the clocksource with the best rating, or the clocksource,
562 * which is selected by userspace override.
563 */
564static void clocksource_select(void)
565{ 586{
566 struct clocksource *best, *cs; 587 struct clocksource *cs;
567 588
568 if (!finished_booting || list_empty(&clocksource_list)) 589 if (!finished_booting || list_empty(&clocksource_list))
590 return NULL;
591
592 /*
593 * We pick the clocksource with the highest rating. If oneshot
594 * mode is active, we pick the highres valid clocksource with
595 * the best rating.
596 */
597 list_for_each_entry(cs, &clocksource_list, list) {
598 if (skipcur && cs == curr_clocksource)
599 continue;
600 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
601 continue;
602 return cs;
603 }
604 return NULL;
605}
606
607static void __clocksource_select(bool skipcur)
608{
609 bool oneshot = tick_oneshot_mode_active();
610 struct clocksource *best, *cs;
611
612 /* Find the best suitable clocksource */
613 best = clocksource_find_best(oneshot, skipcur);
614 if (!best)
569 return; 615 return;
570 /* First clocksource on the list has the best rating. */ 616
571 best = list_first_entry(&clocksource_list, struct clocksource, list);
572 /* Check for the override clocksource. */ 617 /* Check for the override clocksource. */
573 list_for_each_entry(cs, &clocksource_list, list) { 618 list_for_each_entry(cs, &clocksource_list, list) {
619 if (skipcur && cs == curr_clocksource)
620 continue;
574 if (strcmp(cs->name, override_name) != 0) 621 if (strcmp(cs->name, override_name) != 0)
575 continue; 622 continue;
576 /* 623 /*
@@ -578,8 +625,7 @@ static void clocksource_select(void)
578 * capable clocksource if the tick code is in oneshot 625 * capable clocksource if the tick code is in oneshot
579 * mode (highres or nohz) 626 * mode (highres or nohz)
580 */ 627 */
581 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 628 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
582 tick_oneshot_mode_active()) {
583 /* Override clocksource cannot be used. */ 629 /* Override clocksource cannot be used. */
584 printk(KERN_WARNING "Override clocksource %s is not " 630 printk(KERN_WARNING "Override clocksource %s is not "
585 "HRT compatible. Cannot switch while in " 631 "HRT compatible. Cannot switch while in "
@@ -590,16 +636,35 @@ static void clocksource_select(void)
590 best = cs; 636 best = cs;
591 break; 637 break;
592 } 638 }
593 if (curr_clocksource != best) { 639
594 printk(KERN_INFO "Switching to clocksource %s\n", best->name); 640 if (curr_clocksource != best && !timekeeping_notify(best)) {
641 pr_info("Switched to clocksource %s\n", best->name);
595 curr_clocksource = best; 642 curr_clocksource = best;
596 timekeeping_notify(curr_clocksource);
597 } 643 }
598} 644}
599 645
646/**
647 * clocksource_select - Select the best clocksource available
648 *
649 * Private function. Must hold clocksource_mutex when called.
650 *
651 * Select the clocksource with the best rating, or the clocksource,
652 * which is selected by userspace override.
653 */
654static void clocksource_select(void)
655{
656 return __clocksource_select(false);
657}
658
659static void clocksource_select_fallback(void)
660{
661 return __clocksource_select(true);
662}
663
600#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ 664#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
601 665
602static inline void clocksource_select(void) { } 666static inline void clocksource_select(void) { }
667static inline void clocksource_select_fallback(void) { }
603 668
604#endif 669#endif
605 670
@@ -614,16 +679,11 @@ static int __init clocksource_done_booting(void)
614{ 679{
615 mutex_lock(&clocksource_mutex); 680 mutex_lock(&clocksource_mutex);
616 curr_clocksource = clocksource_default_clock(); 681 curr_clocksource = clocksource_default_clock();
617 mutex_unlock(&clocksource_mutex);
618
619 finished_booting = 1; 682 finished_booting = 1;
620
621 /* 683 /*
622 * Run the watchdog first to eliminate unstable clock sources 684 * Run the watchdog first to eliminate unstable clock sources
623 */ 685 */
624 clocksource_watchdog_kthread(NULL); 686 __clocksource_watchdog_kthread();
625
626 mutex_lock(&clocksource_mutex);
627 clocksource_select(); 687 clocksource_select();
628 mutex_unlock(&clocksource_mutex); 688 mutex_unlock(&clocksource_mutex);
629 return 0; 689 return 0;
@@ -756,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
756 list_del(&cs->list); 816 list_del(&cs->list);
757 cs->rating = rating; 817 cs->rating = rating;
758 clocksource_enqueue(cs); 818 clocksource_enqueue(cs);
759 clocksource_select();
760} 819}
761 820
762/** 821/**
@@ -768,21 +827,47 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
768{ 827{
769 mutex_lock(&clocksource_mutex); 828 mutex_lock(&clocksource_mutex);
770 __clocksource_change_rating(cs, rating); 829 __clocksource_change_rating(cs, rating);
830 clocksource_select();
771 mutex_unlock(&clocksource_mutex); 831 mutex_unlock(&clocksource_mutex);
772} 832}
773EXPORT_SYMBOL(clocksource_change_rating); 833EXPORT_SYMBOL(clocksource_change_rating);
774 834
835/*
836 * Unbind clocksource @cs. Called with clocksource_mutex held
837 */
838static int clocksource_unbind(struct clocksource *cs)
839{
840 /*
841 * I really can't convince myself to support this on hardware
842 * designed by lobotomized monkeys.
843 */
844 if (clocksource_is_watchdog(cs))
845 return -EBUSY;
846
847 if (cs == curr_clocksource) {
848 /* Select and try to install a replacement clock source */
849 clocksource_select_fallback();
850 if (curr_clocksource == cs)
851 return -EBUSY;
852 }
853 clocksource_dequeue_watchdog(cs);
854 list_del_init(&cs->list);
855 return 0;
856}
857
775/** 858/**
776 * clocksource_unregister - remove a registered clocksource 859 * clocksource_unregister - remove a registered clocksource
777 * @cs: clocksource to be unregistered 860 * @cs: clocksource to be unregistered
778 */ 861 */
779void clocksource_unregister(struct clocksource *cs) 862int clocksource_unregister(struct clocksource *cs)
780{ 863{
864 int ret = 0;
865
781 mutex_lock(&clocksource_mutex); 866 mutex_lock(&clocksource_mutex);
782 clocksource_dequeue_watchdog(cs); 867 if (!list_empty(&cs->list))
783 list_del(&cs->list); 868 ret = clocksource_unbind(cs);
784 clocksource_select();
785 mutex_unlock(&clocksource_mutex); 869 mutex_unlock(&clocksource_mutex);
870 return ret;
786} 871}
787EXPORT_SYMBOL(clocksource_unregister); 872EXPORT_SYMBOL(clocksource_unregister);
788 873
@@ -808,6 +893,23 @@ sysfs_show_current_clocksources(struct device *dev,
808 return count; 893 return count;
809} 894}
810 895
896size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
897{
898 size_t ret = cnt;
899
900 /* strings from sysfs write are not 0 terminated! */
901 if (!cnt || cnt >= CS_NAME_LEN)
902 return -EINVAL;
903
904 /* strip of \n: */
905 if (buf[cnt-1] == '\n')
906 cnt--;
907 if (cnt > 0)
908 memcpy(dst, buf, cnt);
909 dst[cnt] = 0;
910 return ret;
911}
912
811/** 913/**
812 * sysfs_override_clocksource - interface for manually overriding clocksource 914 * sysfs_override_clocksource - interface for manually overriding clocksource
813 * @dev: unused 915 * @dev: unused
@@ -822,22 +924,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
822 struct device_attribute *attr, 924 struct device_attribute *attr,
823 const char *buf, size_t count) 925 const char *buf, size_t count)
824{ 926{
825 size_t ret = count; 927 size_t ret;
826
827 /* strings from sysfs write are not 0 terminated! */
828 if (count >= sizeof(override_name))
829 return -EINVAL;
830
831 /* strip of \n: */
832 if (buf[count-1] == '\n')
833 count--;
834 928
835 mutex_lock(&clocksource_mutex); 929 mutex_lock(&clocksource_mutex);
836 930
837 if (count > 0) 931 ret = sysfs_get_uname(buf, override_name, count);
838 memcpy(override_name, buf, count); 932 if (ret >= 0)
839 override_name[count] = 0; 933 clocksource_select();
840 clocksource_select();
841 934
842 mutex_unlock(&clocksource_mutex); 935 mutex_unlock(&clocksource_mutex);
843 936
@@ -845,6 +938,40 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
845} 938}
846 939
847/** 940/**
941 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
942 * @dev: unused
943 * @attr: unused
944 * @buf: unused
945 * @count: length of buffer
946 *
947 * Takes input from sysfs interface for manually unbinding a clocksource.
948 */
949static ssize_t sysfs_unbind_clocksource(struct device *dev,
950 struct device_attribute *attr,
951 const char *buf, size_t count)
952{
953 struct clocksource *cs;
954 char name[CS_NAME_LEN];
955 size_t ret;
956
957 ret = sysfs_get_uname(buf, name, count);
958 if (ret < 0)
959 return ret;
960
961 ret = -ENODEV;
962 mutex_lock(&clocksource_mutex);
963 list_for_each_entry(cs, &clocksource_list, list) {
964 if (strcmp(cs->name, name))
965 continue;
966 ret = clocksource_unbind(cs);
967 break;
968 }
969 mutex_unlock(&clocksource_mutex);
970
971 return ret ? ret : count;
972}
973
974/**
848 * sysfs_show_available_clocksources - sysfs interface for listing clocksource 975 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
849 * @dev: unused 976 * @dev: unused
850 * @attr: unused 977 * @attr: unused
@@ -886,6 +1013,8 @@ sysfs_show_available_clocksources(struct device *dev,
886static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, 1013static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
887 sysfs_override_clocksource); 1014 sysfs_override_clocksource);
888 1015
1016static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
1017
889static DEVICE_ATTR(available_clocksource, 0444, 1018static DEVICE_ATTR(available_clocksource, 0444,
890 sysfs_show_available_clocksources, NULL); 1019 sysfs_show_available_clocksources, NULL);
891 1020
@@ -910,6 +1039,9 @@ static int __init init_clocksource_sysfs(void)
910 &device_clocksource, 1039 &device_clocksource,
911 &dev_attr_current_clocksource); 1040 &dev_attr_current_clocksource);
912 if (!error) 1041 if (!error)
1042 error = device_create_file(&device_clocksource,
1043 &dev_attr_unbind_clocksource);
1044 if (!error)
913 error = device_create_file( 1045 error = device_create_file(
914 &device_clocksource, 1046 &device_clocksource,
915 &dev_attr_available_clocksource); 1047 &dev_attr_available_clocksource);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
new file mode 100644
index 000000000000..a326f27d7f09
--- /dev/null
+++ b/kernel/time/sched_clock.c
@@ -0,0 +1,212 @@
1/*
2 * sched_clock.c: support for extending counters to full 64-bit ns counter
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/clocksource.h>
9#include <linux/init.h>
10#include <linux/jiffies.h>
11#include <linux/kernel.h>
12#include <linux/moduleparam.h>
13#include <linux/sched.h>
14#include <linux/syscore_ops.h>
15#include <linux/timer.h>
16#include <linux/sched_clock.h>
17
18struct clock_data {
19 u64 epoch_ns;
20 u32 epoch_cyc;
21 u32 epoch_cyc_copy;
22 unsigned long rate;
23 u32 mult;
24 u32 shift;
25 bool suspended;
26};
27
28static void sched_clock_poll(unsigned long wrap_ticks);
29static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
30static int irqtime = -1;
31
32core_param(irqtime, irqtime, int, 0400);
33
34static struct clock_data cd = {
35 .mult = NSEC_PER_SEC / HZ,
36};
37
38static u32 __read_mostly sched_clock_mask = 0xffffffff;
39
40static u32 notrace jiffy_sched_clock_read(void)
41{
42 return (u32)(jiffies - INITIAL_JIFFIES);
43}
44
45static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
46
47static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
48{
49 return (cyc * mult) >> shift;
50}
51
52static unsigned long long notrace sched_clock_32(void)
53{
54 u64 epoch_ns;
55 u32 epoch_cyc;
56 u32 cyc;
57
58 if (cd.suspended)
59 return cd.epoch_ns;
60
61 /*
62 * Load the epoch_cyc and epoch_ns atomically. We do this by
63 * ensuring that we always write epoch_cyc, epoch_ns and
64 * epoch_cyc_copy in strict order, and read them in strict order.
65 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
66 * the middle of an update, and we should repeat the load.
67 */
68 do {
69 epoch_cyc = cd.epoch_cyc;
70 smp_rmb();
71 epoch_ns = cd.epoch_ns;
72 smp_rmb();
73 } while (epoch_cyc != cd.epoch_cyc_copy);
74
75 cyc = read_sched_clock();
76 cyc = (cyc - epoch_cyc) & sched_clock_mask;
77 return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
78}
79
80/*
81 * Atomically update the sched_clock epoch.
82 */
83static void notrace update_sched_clock(void)
84{
85 unsigned long flags;
86 u32 cyc;
87 u64 ns;
88
89 cyc = read_sched_clock();
90 ns = cd.epoch_ns +
91 cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
92 cd.mult, cd.shift);
93 /*
94 * Write epoch_cyc and epoch_ns in a way that the update is
95 * detectable in cyc_to_fixed_sched_clock().
96 */
97 raw_local_irq_save(flags);
98 cd.epoch_cyc_copy = cyc;
99 smp_wmb();
100 cd.epoch_ns = ns;
101 smp_wmb();
102 cd.epoch_cyc = cyc;
103 raw_local_irq_restore(flags);
104}
105
106static void sched_clock_poll(unsigned long wrap_ticks)
107{
108 mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
109 update_sched_clock();
110}
111
112void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
113{
114 unsigned long r, w;
115 u64 res, wrap;
116 char r_unit;
117
118 if (cd.rate > rate)
119 return;
120
121 BUG_ON(bits > 32);
122 WARN_ON(!irqs_disabled());
123 read_sched_clock = read;
124 sched_clock_mask = (1 << bits) - 1;
125 cd.rate = rate;
126
127 /* calculate the mult/shift to convert counter ticks to ns. */
128 clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);
129
130 r = rate;
131 if (r >= 4000000) {
132 r /= 1000000;
133 r_unit = 'M';
134 } else if (r >= 1000) {
135 r /= 1000;
136 r_unit = 'k';
137 } else
138 r_unit = ' ';
139
140 /* calculate how many ns until we wrap */
141 wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
142 do_div(wrap, NSEC_PER_MSEC);
143 w = wrap;
144
145 /* calculate the ns resolution of this counter */
146 res = cyc_to_ns(1ULL, cd.mult, cd.shift);
147 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
148 bits, r, r_unit, res, w);
149
150 /*
151 * Start the timer to keep sched_clock() properly updated and
152 * sets the initial epoch.
153 */
154 sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
155 update_sched_clock();
156
157 /*
158 * Ensure that sched_clock() starts off at 0ns
159 */
160 cd.epoch_ns = 0;
161
162 /* Enable IRQ time accounting if we have a fast enough sched_clock */
163 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
164 enable_sched_clock_irqtime();
165
166 pr_debug("Registered %pF as sched_clock source\n", read);
167}
168
169unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
170
171unsigned long long notrace sched_clock(void)
172{
173 return sched_clock_func();
174}
175
176void __init sched_clock_postinit(void)
177{
178 /*
179 * If no sched_clock function has been provided at that point,
180 * make it the final one one.
181 */
182 if (read_sched_clock == jiffy_sched_clock_read)
183 setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
184
185 sched_clock_poll(sched_clock_timer.data);
186}
187
188static int sched_clock_suspend(void)
189{
190 sched_clock_poll(sched_clock_timer.data);
191 cd.suspended = true;
192 return 0;
193}
194
195static void sched_clock_resume(void)
196{
197 cd.epoch_cyc = read_sched_clock();
198 cd.epoch_cyc_copy = cd.epoch_cyc;
199 cd.suspended = false;
200}
201
202static struct syscore_ops sched_clock_ops = {
203 .suspend = sched_clock_suspend,
204 .resume = sched_clock_resume,
205};
206
207static int __init sched_clock_syscore_init(void)
208{
209 register_syscore_ops(&sched_clock_ops);
210 return 0;
211}
212device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 20d6fba70652..6d3f91631de6 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -19,6 +19,7 @@
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/smp.h> 21#include <linux/smp.h>
22#include <linux/module.h>
22 23
23#include "tick-internal.h" 24#include "tick-internal.h"
24 25
@@ -29,6 +30,7 @@
29 30
30static struct tick_device tick_broadcast_device; 31static struct tick_device tick_broadcast_device;
31static cpumask_var_t tick_broadcast_mask; 32static cpumask_var_t tick_broadcast_mask;
33static cpumask_var_t tick_broadcast_on;
32static cpumask_var_t tmpmask; 34static cpumask_var_t tmpmask;
33static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
34static int tick_broadcast_force; 36static int tick_broadcast_force;
@@ -64,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
64/* 66/*
65 * Check, if the device can be utilized as broadcast device: 67 * Check, if the device can be utilized as broadcast device:
66 */ 68 */
67int tick_check_broadcast_device(struct clock_event_device *dev) 69static bool tick_check_broadcast_device(struct clock_event_device *curdev,
70 struct clock_event_device *newdev)
71{
72 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
73 (newdev->features & CLOCK_EVT_FEAT_C3STOP))
74 return false;
75
76 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
77 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
78 return false;
79
80 return !curdev || newdev->rating > curdev->rating;
81}
82
83/*
84 * Conditionally install/replace broadcast device
85 */
86void tick_install_broadcast_device(struct clock_event_device *dev)
68{ 87{
69 struct clock_event_device *cur = tick_broadcast_device.evtdev; 88 struct clock_event_device *cur = tick_broadcast_device.evtdev;
70 89
71 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || 90 if (!tick_check_broadcast_device(cur, dev))
72 (tick_broadcast_device.evtdev && 91 return;
73 tick_broadcast_device.evtdev->rating >= dev->rating) ||
74 (dev->features & CLOCK_EVT_FEAT_C3STOP))
75 return 0;
76 92
77 clockevents_exchange_device(tick_broadcast_device.evtdev, dev); 93 if (!try_module_get(dev->owner))
94 return;
95
96 clockevents_exchange_device(cur, dev);
78 if (cur) 97 if (cur)
79 cur->event_handler = clockevents_handle_noop; 98 cur->event_handler = clockevents_handle_noop;
80 tick_broadcast_device.evtdev = dev; 99 tick_broadcast_device.evtdev = dev;
@@ -90,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
90 */ 109 */
91 if (dev->features & CLOCK_EVT_FEAT_ONESHOT) 110 if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
92 tick_clock_notify(); 111 tick_clock_notify();
93 return 1;
94} 112}
95 113
96/* 114/*
@@ -123,8 +141,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
123 */ 141 */
124int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 142int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
125{ 143{
144 struct clock_event_device *bc = tick_broadcast_device.evtdev;
126 unsigned long flags; 145 unsigned long flags;
127 int ret = 0; 146 int ret;
128 147
129 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 148 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
130 149
@@ -138,20 +157,59 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
138 dev->event_handler = tick_handle_periodic; 157 dev->event_handler = tick_handle_periodic;
139 tick_device_setup_broadcast_func(dev); 158 tick_device_setup_broadcast_func(dev);
140 cpumask_set_cpu(cpu, tick_broadcast_mask); 159 cpumask_set_cpu(cpu, tick_broadcast_mask);
141 tick_broadcast_start_periodic(tick_broadcast_device.evtdev); 160 tick_broadcast_start_periodic(bc);
142 ret = 1; 161 ret = 1;
143 } else { 162 } else {
144 /* 163 /*
145 * When the new device is not affected by the stop 164 * Clear the broadcast bit for this cpu if the
146 * feature and the cpu is marked in the broadcast mask 165 * device is not power state affected.
147 * then clear the broadcast bit.
148 */ 166 */
149 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { 167 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
150 int cpu = smp_processor_id();
151 cpumask_clear_cpu(cpu, tick_broadcast_mask); 168 cpumask_clear_cpu(cpu, tick_broadcast_mask);
152 tick_broadcast_clear_oneshot(cpu); 169 else
153 } else {
154 tick_device_setup_broadcast_func(dev); 170 tick_device_setup_broadcast_func(dev);
171
172 /*
173 * Clear the broadcast bit if the CPU is not in
174 * periodic broadcast on state.
175 */
176 if (!cpumask_test_cpu(cpu, tick_broadcast_on))
177 cpumask_clear_cpu(cpu, tick_broadcast_mask);
178
179 switch (tick_broadcast_device.mode) {
180 case TICKDEV_MODE_ONESHOT:
181 /*
182 * If the system is in oneshot mode we can
183 * unconditionally clear the oneshot mask bit,
184 * because the CPU is running and therefore
185 * not in an idle state which causes the power
186 * state affected device to stop. Let the
187 * caller initialize the device.
188 */
189 tick_broadcast_clear_oneshot(cpu);
190 ret = 0;
191 break;
192
193 case TICKDEV_MODE_PERIODIC:
194 /*
195 * If the system is in periodic mode, check
196 * whether the broadcast device can be
197 * switched off now.
198 */
199 if (cpumask_empty(tick_broadcast_mask) && bc)
200 clockevents_shutdown(bc);
201 /*
202 * If we kept the cpu in the broadcast mask,
203 * tell the caller to leave the per cpu device
204 * in shutdown state. The periodic interrupt
205 * is delivered by the broadcast device.
206 */
207 ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
208 break;
209 default:
210 /* Nothing to do */
211 ret = 0;
212 break;
155 } 213 }
156 } 214 }
157 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 215 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@ -281,6 +339,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
281 switch (*reason) { 339 switch (*reason) {
282 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 340 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
283 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: 341 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
342 cpumask_set_cpu(cpu, tick_broadcast_on);
284 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 343 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
285 if (tick_broadcast_device.mode == 344 if (tick_broadcast_device.mode ==
286 TICKDEV_MODE_PERIODIC) 345 TICKDEV_MODE_PERIODIC)
@@ -290,8 +349,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
290 tick_broadcast_force = 1; 349 tick_broadcast_force = 1;
291 break; 350 break;
292 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 351 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
293 if (!tick_broadcast_force && 352 if (tick_broadcast_force)
294 cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 353 break;
354 cpumask_clear_cpu(cpu, tick_broadcast_on);
355 if (!tick_device_is_functional(dev))
356 break;
357 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
295 if (tick_broadcast_device.mode == 358 if (tick_broadcast_device.mode ==
296 TICKDEV_MODE_PERIODIC) 359 TICKDEV_MODE_PERIODIC)
297 tick_setup_periodic(dev, 0); 360 tick_setup_periodic(dev, 0);
@@ -349,6 +412,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
349 412
350 bc = tick_broadcast_device.evtdev; 413 bc = tick_broadcast_device.evtdev;
351 cpumask_clear_cpu(cpu, tick_broadcast_mask); 414 cpumask_clear_cpu(cpu, tick_broadcast_mask);
415 cpumask_clear_cpu(cpu, tick_broadcast_on);
352 416
353 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 417 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
354 if (bc && cpumask_empty(tick_broadcast_mask)) 418 if (bc && cpumask_empty(tick_broadcast_mask))
@@ -475,7 +539,15 @@ void tick_check_oneshot_broadcast(int cpu)
475 if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { 539 if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
476 struct tick_device *td = &per_cpu(tick_cpu_device, cpu); 540 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
477 541
478 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); 542 /*
543 * We might be in the middle of switching over from
544 * periodic to oneshot. If the CPU has not yet
545 * switched over, leave the device alone.
546 */
547 if (td->mode == TICKDEV_MODE_ONESHOT) {
548 clockevents_set_mode(td->evtdev,
549 CLOCK_EVT_MODE_ONESHOT);
550 }
479 } 551 }
480} 552}
481 553
@@ -522,6 +594,13 @@ again:
522 cpumask_clear(tick_broadcast_force_mask); 594 cpumask_clear(tick_broadcast_force_mask);
523 595
524 /* 596 /*
597 * Sanity check. Catch the case where we try to broadcast to
598 * offline cpus.
599 */
600 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
601 cpumask_and(tmpmask, tmpmask, cpu_online_mask);
602
603 /*
525 * Wakeup the cpus which have an expired event. 604 * Wakeup the cpus which have an expired event.
526 */ 605 */
527 tick_do_broadcast(tmpmask); 606 tick_do_broadcast(tmpmask);
@@ -761,10 +840,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
761 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 840 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
762 841
763 /* 842 /*
764 * Clear the broadcast mask flag for the dead cpu, but do not 843 * Clear the broadcast masks for the dead cpu, but do not stop
765 * stop the broadcast device! 844 * the broadcast device!
766 */ 845 */
767 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 846 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
847 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
848 cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
768 849
769 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 850 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
770} 851}
@@ -792,6 +873,7 @@ bool tick_broadcast_oneshot_available(void)
792void __init tick_broadcast_init(void) 873void __init tick_broadcast_init(void)
793{ 874{
794 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 875 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
876 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
795 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 877 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
796#ifdef CONFIG_TICK_ONESHOT 878#ifdef CONFIG_TICK_ONESHOT
797 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 879 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 5d3fb100bc06..64522ecdfe0e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,6 +18,7 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/module.h>
21 22
22#include <asm/irq_regs.h> 23#include <asm/irq_regs.h>
23 24
@@ -33,7 +34,6 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
33ktime_t tick_next_period; 34ktime_t tick_next_period;
34ktime_t tick_period; 35ktime_t tick_period;
35int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; 36int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
36static DEFINE_RAW_SPINLOCK(tick_device_lock);
37 37
38/* 38/*
39 * Debugging: see timer_list.c 39 * Debugging: see timer_list.c
@@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td,
194 * When global broadcasting is active, check if the current 194 * When global broadcasting is active, check if the current
195 * device is registered as a placeholder for broadcast mode. 195 * device is registered as a placeholder for broadcast mode.
196 * This allows us to handle this x86 misfeature in a generic 196 * This allows us to handle this x86 misfeature in a generic
197 * way. 197 * way. This function also returns !=0 when we keep the
198 * current active broadcast state for this CPU.
198 */ 199 */
199 if (tick_device_uses_broadcast(newdev, cpu)) 200 if (tick_device_uses_broadcast(newdev, cpu))
200 return; 201 return;
@@ -205,17 +206,75 @@ static void tick_setup_device(struct tick_device *td,
205 tick_setup_oneshot(newdev, handler, next_event); 206 tick_setup_oneshot(newdev, handler, next_event);
206} 207}
207 208
209void tick_install_replacement(struct clock_event_device *newdev)
210{
211 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
212 int cpu = smp_processor_id();
213
214 clockevents_exchange_device(td->evtdev, newdev);
215 tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
216 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
217 tick_oneshot_notify();
218}
219
220static bool tick_check_percpu(struct clock_event_device *curdev,
221 struct clock_event_device *newdev, int cpu)
222{
223 if (!cpumask_test_cpu(cpu, newdev->cpumask))
224 return false;
225 if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
226 return true;
227 /* Check if irq affinity can be set */
228 if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
229 return false;
230 /* Prefer an existing cpu local device */
231 if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
232 return false;
233 return true;
234}
235
236static bool tick_check_preferred(struct clock_event_device *curdev,
237 struct clock_event_device *newdev)
238{
239 /* Prefer oneshot capable device */
240 if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
241 if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
242 return false;
243 if (tick_oneshot_mode_active())
244 return false;
245 }
246
247 /*
248 * Use the higher rated one, but prefer a CPU local device with a lower
249 * rating than a non-CPU local device
250 */
251 return !curdev ||
252 newdev->rating > curdev->rating ||
253 !cpumask_equal(curdev->cpumask, newdev->cpumask);
254}
255
256/*
257 * Check whether the new device is a better fit than curdev. curdev
258 * can be NULL !
259 */
260bool tick_check_replacement(struct clock_event_device *curdev,
261 struct clock_event_device *newdev)
262{
263 if (tick_check_percpu(curdev, newdev, smp_processor_id()))
264 return false;
265
266 return tick_check_preferred(curdev, newdev);
267}
268
208/* 269/*
209 * Check, if the new registered device should be used. 270 * Check, if the new registered device should be used. Called with
271 * clockevents_lock held and interrupts disabled.
210 */ 272 */
211static int tick_check_new_device(struct clock_event_device *newdev) 273void tick_check_new_device(struct clock_event_device *newdev)
212{ 274{
213 struct clock_event_device *curdev; 275 struct clock_event_device *curdev;
214 struct tick_device *td; 276 struct tick_device *td;
215 int cpu, ret = NOTIFY_OK; 277 int cpu;
216 unsigned long flags;
217
218 raw_spin_lock_irqsave(&tick_device_lock, flags);
219 278
220 cpu = smp_processor_id(); 279 cpu = smp_processor_id();
221 if (!cpumask_test_cpu(cpu, newdev->cpumask)) 280 if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -225,40 +284,15 @@ static int tick_check_new_device(struct clock_event_device *newdev)
225 curdev = td->evtdev; 284 curdev = td->evtdev;
226 285
227 /* cpu local device ? */ 286 /* cpu local device ? */
228 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { 287 if (!tick_check_percpu(curdev, newdev, cpu))
229 288 goto out_bc;
230 /*
231 * If the cpu affinity of the device interrupt can not
232 * be set, ignore it.
233 */
234 if (!irq_can_set_affinity(newdev->irq))
235 goto out_bc;
236 289
237 /* 290 /* Preference decision */
238 * If we have a cpu local device already, do not replace it 291 if (!tick_check_preferred(curdev, newdev))
239 * by a non cpu local device 292 goto out_bc;
240 */
241 if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
242 goto out_bc;
243 }
244 293
245 /* 294 if (!try_module_get(newdev->owner))
246 * If we have an active device, then check the rating and the oneshot 295 return;
247 * feature.
248 */
249 if (curdev) {
250 /*
251 * Prefer one shot capable devices !
252 */
253 if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
254 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
255 goto out_bc;
256 /*
257 * Check the rating
258 */
259 if (curdev->rating >= newdev->rating)
260 goto out_bc;
261 }
262 296
263 /* 297 /*
264 * Replace the eventually existing device by the new 298 * Replace the eventually existing device by the new
@@ -273,20 +307,13 @@ static int tick_check_new_device(struct clock_event_device *newdev)
273 tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); 307 tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
274 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 308 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
275 tick_oneshot_notify(); 309 tick_oneshot_notify();
276 310 return;
277 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
278 return NOTIFY_STOP;
279 311
280out_bc: 312out_bc:
281 /* 313 /*
282 * Can the new device be used as a broadcast device ? 314 * Can the new device be used as a broadcast device ?
283 */ 315 */
284 if (tick_check_broadcast_device(newdev)) 316 tick_install_broadcast_device(newdev);
285 ret = NOTIFY_STOP;
286
287 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
288
289 return ret;
290} 317}
291 318
292/* 319/*
@@ -294,7 +321,7 @@ out_bc:
294 * 321 *
295 * Called with interrupts disabled. 322 * Called with interrupts disabled.
296 */ 323 */
297static void tick_handover_do_timer(int *cpup) 324void tick_handover_do_timer(int *cpup)
298{ 325{
299 if (*cpup == tick_do_timer_cpu) { 326 if (*cpup == tick_do_timer_cpu) {
300 int cpu = cpumask_first(cpu_online_mask); 327 int cpu = cpumask_first(cpu_online_mask);
@@ -311,13 +338,11 @@ static void tick_handover_do_timer(int *cpup)
311 * access the hardware device itself. 338 * access the hardware device itself.
312 * We just set the mode and remove it from the lists. 339 * We just set the mode and remove it from the lists.
313 */ 340 */
314static void tick_shutdown(unsigned int *cpup) 341void tick_shutdown(unsigned int *cpup)
315{ 342{
316 struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); 343 struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
317 struct clock_event_device *dev = td->evtdev; 344 struct clock_event_device *dev = td->evtdev;
318 unsigned long flags;
319 345
320 raw_spin_lock_irqsave(&tick_device_lock, flags);
321 td->mode = TICKDEV_MODE_PERIODIC; 346 td->mode = TICKDEV_MODE_PERIODIC;
322 if (dev) { 347 if (dev) {
323 /* 348 /*
@@ -329,26 +354,20 @@ static void tick_shutdown(unsigned int *cpup)
329 dev->event_handler = clockevents_handle_noop; 354 dev->event_handler = clockevents_handle_noop;
330 td->evtdev = NULL; 355 td->evtdev = NULL;
331 } 356 }
332 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
333} 357}
334 358
335static void tick_suspend(void) 359void tick_suspend(void)
336{ 360{
337 struct tick_device *td = &__get_cpu_var(tick_cpu_device); 361 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
338 unsigned long flags;
339 362
340 raw_spin_lock_irqsave(&tick_device_lock, flags);
341 clockevents_shutdown(td->evtdev); 363 clockevents_shutdown(td->evtdev);
342 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
343} 364}
344 365
345static void tick_resume(void) 366void tick_resume(void)
346{ 367{
347 struct tick_device *td = &__get_cpu_var(tick_cpu_device); 368 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
348 unsigned long flags;
349 int broadcast = tick_resume_broadcast(); 369 int broadcast = tick_resume_broadcast();
350 370
351 raw_spin_lock_irqsave(&tick_device_lock, flags);
352 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); 371 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
353 372
354 if (!broadcast) { 373 if (!broadcast) {
@@ -357,68 +376,12 @@ static void tick_resume(void)
357 else 376 else
358 tick_resume_oneshot(); 377 tick_resume_oneshot();
359 } 378 }
360 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
361} 379}
362 380
363/*
364 * Notification about clock event devices
365 */
366static int tick_notify(struct notifier_block *nb, unsigned long reason,
367 void *dev)
368{
369 switch (reason) {
370
371 case CLOCK_EVT_NOTIFY_ADD:
372 return tick_check_new_device(dev);
373
374 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
375 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
376 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
377 tick_broadcast_on_off(reason, dev);
378 break;
379
380 case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
381 case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
382 tick_broadcast_oneshot_control(reason);
383 break;
384
385 case CLOCK_EVT_NOTIFY_CPU_DYING:
386 tick_handover_do_timer(dev);
387 break;
388
389 case CLOCK_EVT_NOTIFY_CPU_DEAD:
390 tick_shutdown_broadcast_oneshot(dev);
391 tick_shutdown_broadcast(dev);
392 tick_shutdown(dev);
393 break;
394
395 case CLOCK_EVT_NOTIFY_SUSPEND:
396 tick_suspend();
397 tick_suspend_broadcast();
398 break;
399
400 case CLOCK_EVT_NOTIFY_RESUME:
401 tick_resume();
402 break;
403
404 default:
405 break;
406 }
407
408 return NOTIFY_OK;
409}
410
411static struct notifier_block tick_notifier = {
412 .notifier_call = tick_notify,
413};
414
415/** 381/**
416 * tick_init - initialize the tick control 382 * tick_init - initialize the tick control
417 *
418 * Register the notifier with the clockevents framework
419 */ 383 */
420void __init tick_init(void) 384void __init tick_init(void)
421{ 385{
422 clockevents_register_notifier(&tick_notifier);
423 tick_broadcast_init(); 386 tick_broadcast_init();
424} 387}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f0299eae4602..bc906cad709b 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,6 +6,8 @@
6 6
7extern seqlock_t jiffies_lock; 7extern seqlock_t jiffies_lock;
8 8
9#define CS_NAME_LEN 32
10
9#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD 11#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
10 12
11#define TICK_DO_TIMER_NONE -1 13#define TICK_DO_TIMER_NONE -1
@@ -18,9 +20,19 @@ extern int tick_do_timer_cpu __read_mostly;
18 20
19extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); 21extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
20extern void tick_handle_periodic(struct clock_event_device *dev); 22extern void tick_handle_periodic(struct clock_event_device *dev);
23extern void tick_check_new_device(struct clock_event_device *dev);
24extern void tick_handover_do_timer(int *cpup);
25extern void tick_shutdown(unsigned int *cpup);
26extern void tick_suspend(void);
27extern void tick_resume(void);
28extern bool tick_check_replacement(struct clock_event_device *curdev,
29 struct clock_event_device *newdev);
30extern void tick_install_replacement(struct clock_event_device *dev);
21 31
22extern void clockevents_shutdown(struct clock_event_device *dev); 32extern void clockevents_shutdown(struct clock_event_device *dev);
23 33
34extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
35
24/* 36/*
25 * NO_HZ / high resolution timer shared code 37 * NO_HZ / high resolution timer shared code
26 */ 38 */
@@ -90,7 +102,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; }
90 */ 102 */
91#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 103#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
92extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); 104extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
93extern int tick_check_broadcast_device(struct clock_event_device *dev); 105extern void tick_install_broadcast_device(struct clock_event_device *dev);
94extern int tick_is_broadcast_device(struct clock_event_device *dev); 106extern int tick_is_broadcast_device(struct clock_event_device *dev);
95extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); 107extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
96extern void tick_shutdown_broadcast(unsigned int *cpup); 108extern void tick_shutdown_broadcast(unsigned int *cpup);
@@ -102,9 +114,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
102 114
103#else /* !BROADCAST */ 115#else /* !BROADCAST */
104 116
105static inline int tick_check_broadcast_device(struct clock_event_device *dev) 117static inline void tick_install_broadcast_device(struct clock_event_device *dev)
106{ 118{
107 return 0;
108} 119}
109 120
110static inline int tick_is_broadcast_device(struct clock_event_device *dev) 121static inline int tick_is_broadcast_device(struct clock_event_device *dev)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index baeeb5c87cf1..48b9fffabdc2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -25,6 +25,11 @@
25 25
26#include "tick-internal.h" 26#include "tick-internal.h"
27#include "ntp_internal.h" 27#include "ntp_internal.h"
28#include "timekeeping_internal.h"
29
30#define TK_CLEAR_NTP (1 << 0)
31#define TK_MIRROR (1 << 1)
32#define TK_CLOCK_WAS_SET (1 << 2)
28 33
29static struct timekeeper timekeeper; 34static struct timekeeper timekeeper;
30static DEFINE_RAW_SPINLOCK(timekeeper_lock); 35static DEFINE_RAW_SPINLOCK(timekeeper_lock);
@@ -200,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
200 205
201static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); 206static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
202 207
203static void update_pvclock_gtod(struct timekeeper *tk) 208static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
204{ 209{
205 raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); 210 raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
206} 211}
207 212
208/** 213/**
@@ -216,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
216 221
217 raw_spin_lock_irqsave(&timekeeper_lock, flags); 222 raw_spin_lock_irqsave(&timekeeper_lock, flags);
218 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 223 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
219 update_pvclock_gtod(tk); 224 update_pvclock_gtod(tk, true);
220 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 225 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
221 226
222 return ret; 227 return ret;
@@ -241,16 +246,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
241EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 246EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
242 247
243/* must hold timekeeper_lock */ 248/* must hold timekeeper_lock */
244static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror) 249static void timekeeping_update(struct timekeeper *tk, unsigned int action)
245{ 250{
246 if (clearntp) { 251 if (action & TK_CLEAR_NTP) {
247 tk->ntp_error = 0; 252 tk->ntp_error = 0;
248 ntp_clear(); 253 ntp_clear();
249 } 254 }
250 update_vsyscall(tk); 255 update_vsyscall(tk);
251 update_pvclock_gtod(tk); 256 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
252 257
253 if (mirror) 258 if (action & TK_MIRROR)
254 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); 259 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
255} 260}
256 261
@@ -508,7 +513,7 @@ int do_settimeofday(const struct timespec *tv)
508 513
509 tk_set_xtime(tk, tv); 514 tk_set_xtime(tk, tv);
510 515
511 timekeeping_update(tk, true, true); 516 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
512 517
513 write_seqcount_end(&timekeeper_seq); 518 write_seqcount_end(&timekeeper_seq);
514 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 519 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -552,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts)
552 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); 557 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
553 558
554error: /* even if we error out, we forwarded the time, so call update */ 559error: /* even if we error out, we forwarded the time, so call update */
555 timekeeping_update(tk, true, true); 560 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
556 561
557 write_seqcount_end(&timekeeper_seq); 562 write_seqcount_end(&timekeeper_seq);
558 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 563 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -627,13 +632,22 @@ static int change_clocksource(void *data)
627 write_seqcount_begin(&timekeeper_seq); 632 write_seqcount_begin(&timekeeper_seq);
628 633
629 timekeeping_forward_now(tk); 634 timekeeping_forward_now(tk);
630 if (!new->enable || new->enable(new) == 0) { 635 /*
631 old = tk->clock; 636 * If the cs is in module, get a module reference. Succeeds
632 tk_setup_internals(tk, new); 637 * for built-in code (owner == NULL) as well.
633 if (old->disable) 638 */
634 old->disable(old); 639 if (try_module_get(new->owner)) {
640 if (!new->enable || new->enable(new) == 0) {
641 old = tk->clock;
642 tk_setup_internals(tk, new);
643 if (old->disable)
644 old->disable(old);
645 module_put(old->owner);
646 } else {
647 module_put(new->owner);
648 }
635 } 649 }
636 timekeeping_update(tk, true, true); 650 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
637 651
638 write_seqcount_end(&timekeeper_seq); 652 write_seqcount_end(&timekeeper_seq);
639 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 653 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -648,14 +662,15 @@ static int change_clocksource(void *data)
648 * This function is called from clocksource.c after a new, better clock 662 * This function is called from clocksource.c after a new, better clock
649 * source has been registered. The caller holds the clocksource_mutex. 663 * source has been registered. The caller holds the clocksource_mutex.
650 */ 664 */
651void timekeeping_notify(struct clocksource *clock) 665int timekeeping_notify(struct clocksource *clock)
652{ 666{
653 struct timekeeper *tk = &timekeeper; 667 struct timekeeper *tk = &timekeeper;
654 668
655 if (tk->clock == clock) 669 if (tk->clock == clock)
656 return; 670 return 0;
657 stop_machine(change_clocksource, clock, NULL); 671 stop_machine(change_clocksource, clock, NULL);
658 tick_clock_notify(); 672 tick_clock_notify();
673 return tk->clock == clock ? 0 : -1;
659} 674}
660 675
661/** 676/**
@@ -841,6 +856,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
841 tk_xtime_add(tk, delta); 856 tk_xtime_add(tk, delta);
842 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); 857 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
843 tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); 858 tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
859 tk_debug_account_sleep_time(delta);
844} 860}
845 861
846/** 862/**
@@ -872,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
872 888
873 __timekeeping_inject_sleeptime(tk, delta); 889 __timekeeping_inject_sleeptime(tk, delta);
874 890
875 timekeeping_update(tk, true, true); 891 timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
876 892
877 write_seqcount_end(&timekeeper_seq); 893 write_seqcount_end(&timekeeper_seq);
878 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 894 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -954,7 +970,7 @@ static void timekeeping_resume(void)
954 tk->cycle_last = clock->cycle_last = cycle_now; 970 tk->cycle_last = clock->cycle_last = cycle_now;
955 tk->ntp_error = 0; 971 tk->ntp_error = 0;
956 timekeeping_suspended = 0; 972 timekeeping_suspended = 0;
957 timekeeping_update(tk, false, true); 973 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
958 write_seqcount_end(&timekeeper_seq); 974 write_seqcount_end(&timekeeper_seq);
959 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 975 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
960 976
@@ -1236,9 +1252,10 @@ out_adjust:
1236 * It also calls into the NTP code to handle leapsecond processing. 1252 * It also calls into the NTP code to handle leapsecond processing.
1237 * 1253 *
1238 */ 1254 */
1239static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) 1255static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
1240{ 1256{
1241 u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; 1257 u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
1258 unsigned int action = 0;
1242 1259
1243 while (tk->xtime_nsec >= nsecps) { 1260 while (tk->xtime_nsec >= nsecps) {
1244 int leap; 1261 int leap;
@@ -1261,8 +1278,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1261 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); 1278 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
1262 1279
1263 clock_was_set_delayed(); 1280 clock_was_set_delayed();
1281 action = TK_CLOCK_WAS_SET;
1264 } 1282 }
1265 } 1283 }
1284 return action;
1266} 1285}
1267 1286
1268/** 1287/**
@@ -1347,6 +1366,7 @@ static void update_wall_time(void)
1347 struct timekeeper *tk = &shadow_timekeeper; 1366 struct timekeeper *tk = &shadow_timekeeper;
1348 cycle_t offset; 1367 cycle_t offset;
1349 int shift = 0, maxshift; 1368 int shift = 0, maxshift;
1369 unsigned int action;
1350 unsigned long flags; 1370 unsigned long flags;
1351 1371
1352 raw_spin_lock_irqsave(&timekeeper_lock, flags); 1372 raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1399,7 +1419,7 @@ static void update_wall_time(void)
1399 * Finally, make sure that after the rounding 1419 * Finally, make sure that after the rounding
1400 * xtime_nsec isn't larger than NSEC_PER_SEC 1420 * xtime_nsec isn't larger than NSEC_PER_SEC
1401 */ 1421 */
1402 accumulate_nsecs_to_secs(tk); 1422 action = accumulate_nsecs_to_secs(tk);
1403 1423
1404 write_seqcount_begin(&timekeeper_seq); 1424 write_seqcount_begin(&timekeeper_seq);
1405 /* Update clock->cycle_last with the new value */ 1425 /* Update clock->cycle_last with the new value */
@@ -1415,7 +1435,7 @@ static void update_wall_time(void)
1415 * updating. 1435 * updating.
1416 */ 1436 */
1417 memcpy(real_tk, tk, sizeof(*tk)); 1437 memcpy(real_tk, tk, sizeof(*tk));
1418 timekeeping_update(real_tk, false, false); 1438 timekeeping_update(real_tk, action);
1419 write_seqcount_end(&timekeeper_seq); 1439 write_seqcount_end(&timekeeper_seq);
1420out: 1440out:
1421 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1441 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1677,6 +1697,7 @@ int do_adjtimex(struct timex *txc)
1677 1697
1678 if (tai != orig_tai) { 1698 if (tai != orig_tai) {
1679 __timekeeping_set_tai_offset(tk, tai); 1699 __timekeeping_set_tai_offset(tk, tai);
1700 update_pvclock_gtod(tk, true);
1680 clock_was_set_delayed(); 1701 clock_was_set_delayed();
1681 } 1702 }
1682 write_seqcount_end(&timekeeper_seq); 1703 write_seqcount_end(&timekeeper_seq);
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
new file mode 100644
index 000000000000..802433a4f5eb
--- /dev/null
+++ b/kernel/time/timekeeping_debug.c
@@ -0,0 +1,72 @@
1/*
2 * debugfs file to track time spent in suspend
3 *
4 * Copyright (c) 2011, Google, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 */
16
17#include <linux/debugfs.h>
18#include <linux/err.h>
19#include <linux/init.h>
20#include <linux/kernel.h>
21#include <linux/seq_file.h>
22#include <linux/time.h>
23
24static unsigned int sleep_time_bin[32] = {0};
25
26static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
27{
28 unsigned int bin;
29 seq_puts(s, " time (secs) count\n");
30 seq_puts(s, "------------------------------\n");
31 for (bin = 0; bin < 32; bin++) {
32 if (sleep_time_bin[bin] == 0)
33 continue;
34 seq_printf(s, "%10u - %-10u %4u\n",
35 bin ? 1 << (bin - 1) : 0, 1 << bin,
36 sleep_time_bin[bin]);
37 }
38 return 0;
39}
40
41static int tk_debug_sleep_time_open(struct inode *inode, struct file *file)
42{
43 return single_open(file, tk_debug_show_sleep_time, NULL);
44}
45
46static const struct file_operations tk_debug_sleep_time_fops = {
47 .open = tk_debug_sleep_time_open,
48 .read = seq_read,
49 .llseek = seq_lseek,
50 .release = single_release,
51};
52
53static int __init tk_debug_sleep_time_init(void)
54{
55 struct dentry *d;
56
57 d = debugfs_create_file("sleep_time", 0444, NULL, NULL,
58 &tk_debug_sleep_time_fops);
59 if (!d) {
60 pr_err("Failed to create sleep_time debug file\n");
61 return -ENOMEM;
62 }
63
64 return 0;
65}
66late_initcall(tk_debug_sleep_time_init);
67
68void tk_debug_account_sleep_time(struct timespec *t)
69{
70 sleep_time_bin[fls(t->tv_sec)]++;
71}
72
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
new file mode 100644
index 000000000000..13323ea08ffa
--- /dev/null
+++ b/kernel/time/timekeeping_internal.h
@@ -0,0 +1,14 @@
1#ifndef _TIMEKEEPING_INTERNAL_H
2#define _TIMEKEEPING_INTERNAL_H
3/*
4 * timekeeping debug functions
5 */
6#include <linux/time.h>
7
8#ifdef CONFIG_DEBUG_FS
9extern void tk_debug_account_sleep_time(struct timespec *t);
10#else
11#define tk_debug_account_sleep_time(x)
12#endif
13
14#endif /* _TIMEKEEPING_INTERNAL_H */