aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig6
-rw-r--r--kernel/time/Makefile6
-rw-r--r--kernel/time/clockevents.c229
-rw-r--r--kernel/time/clocksource.c173
-rw-r--r--kernel/time/hrtimer.c9
-rw-r--r--kernel/time/jiffies.c7
-rw-r--r--kernel/time/ntp.c14
-rw-r--r--kernel/time/sched_clock.c236
-rw-r--r--kernel/time/tick-broadcast.c179
-rw-r--r--kernel/time/tick-common.c82
-rw-r--r--kernel/time/tick-internal.h211
-rw-r--r--kernel/time/tick-oneshot.c6
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/time/tick-sched.h74
-rw-r--r--kernel/time/timekeeping.c490
-rw-r--r--kernel/time/timekeeping.h7
-rw-r--r--kernel/time/timer.c149
-rw-r--r--kernel/time/timer_list.c34
18 files changed, 1167 insertions, 752 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index d626dc98e8df..579ce1b929af 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET
33config GENERIC_CLOCKEVENTS 33config GENERIC_CLOCKEVENTS
34 bool 34 bool
35 35
36# Migration helper. Builds, but does not invoke
37config GENERIC_CLOCKEVENTS_BUILD
38 bool
39 default y
40 depends on GENERIC_CLOCKEVENTS
41
42# Architecture can handle broadcast in a driver-agnostic way 36# Architecture can handle broadcast in a driver-agnostic way
43config ARCH_HAS_TICK_BROADCAST 37config ARCH_HAS_TICK_BROADCAST
44 bool 38 bool
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index c09c07817d7a..01f0312419b3 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
2obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o 2obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
3obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o 3obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
4 4
5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o tick-common.o
6obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
7ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) 6ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
8 obj-y += tick-broadcast.o 7 obj-y += tick-broadcast.o
9 obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o 8 obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o
10endif 9endif
11obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o 10obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
12obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o 11obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
13obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
14obj-$(CONFIG_TIMER_STATS) += timer_stats.o 12obj-$(CONFIG_TIMER_STATS) += timer_stats.o
15obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o 13obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
16obj-$(CONFIG_TEST_UDELAY) += test_udelay.o 14obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 55449909f114..25d942d1da27 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,25 +94,76 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
94} 94}
95EXPORT_SYMBOL_GPL(clockevent_delta2ns); 95EXPORT_SYMBOL_GPL(clockevent_delta2ns);
96 96
97static int __clockevents_set_state(struct clock_event_device *dev,
98 enum clock_event_state state)
99{
100 /* Transition with legacy set_mode() callback */
101 if (dev->set_mode) {
102 /* Legacy callback doesn't support new modes */
103 if (state > CLOCK_EVT_STATE_ONESHOT)
104 return -ENOSYS;
105 /*
106 * 'clock_event_state' and 'clock_event_mode' have 1-to-1
107 * mapping until *_ONESHOT, and so a simple cast will work.
108 */
109 dev->set_mode((enum clock_event_mode)state, dev);
110 dev->mode = (enum clock_event_mode)state;
111 return 0;
112 }
113
114 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
115 return 0;
116
117 /* Transition with new state-specific callbacks */
118 switch (state) {
119 case CLOCK_EVT_STATE_DETACHED:
120 /*
121 * This is an internal state, which is guaranteed to go from
122 * SHUTDOWN to DETACHED. No driver interaction required.
123 */
124 return 0;
125
126 case CLOCK_EVT_STATE_SHUTDOWN:
127 return dev->set_state_shutdown(dev);
128
129 case CLOCK_EVT_STATE_PERIODIC:
130 /* Core internal bug */
131 if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
132 return -ENOSYS;
133 return dev->set_state_periodic(dev);
134
135 case CLOCK_EVT_STATE_ONESHOT:
136 /* Core internal bug */
137 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
138 return -ENOSYS;
139 return dev->set_state_oneshot(dev);
140
141 default:
142 return -ENOSYS;
143 }
144}
145
97/** 146/**
98 * clockevents_set_mode - set the operating mode of a clock event device 147 * clockevents_set_state - set the operating state of a clock event device
99 * @dev: device to modify 148 * @dev: device to modify
100 * @mode: new mode 149 * @state: new state
101 * 150 *
102 * Must be called with interrupts disabled ! 151 * Must be called with interrupts disabled !
103 */ 152 */
104void clockevents_set_mode(struct clock_event_device *dev, 153void clockevents_set_state(struct clock_event_device *dev,
105 enum clock_event_mode mode) 154 enum clock_event_state state)
106{ 155{
107 if (dev->mode != mode) { 156 if (dev->state != state) {
108 dev->set_mode(mode, dev); 157 if (__clockevents_set_state(dev, state))
109 dev->mode = mode; 158 return;
159
160 dev->state = state;
110 161
111 /* 162 /*
112 * A nsec2cyc multiplicator of 0 is invalid and we'd crash 163 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
113 * on it, so fix it up and emit a warning: 164 * on it, so fix it up and emit a warning:
114 */ 165 */
115 if (mode == CLOCK_EVT_MODE_ONESHOT) { 166 if (state == CLOCK_EVT_STATE_ONESHOT) {
116 if (unlikely(!dev->mult)) { 167 if (unlikely(!dev->mult)) {
117 dev->mult = 1; 168 dev->mult = 1;
118 WARN_ON(1); 169 WARN_ON(1);
@@ -127,10 +178,28 @@ void clockevents_set_mode(struct clock_event_device *dev,
127 */ 178 */
128void clockevents_shutdown(struct clock_event_device *dev) 179void clockevents_shutdown(struct clock_event_device *dev)
129{ 180{
130 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 181 clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
131 dev->next_event.tv64 = KTIME_MAX; 182 dev->next_event.tv64 = KTIME_MAX;
132} 183}
133 184
185/**
186 * clockevents_tick_resume - Resume the tick device before using it again
187 * @dev: device to resume
188 */
189int clockevents_tick_resume(struct clock_event_device *dev)
190{
191 int ret = 0;
192
193 if (dev->set_mode) {
194 dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
195 dev->mode = CLOCK_EVT_MODE_RESUME;
196 } else if (dev->tick_resume) {
197 ret = dev->tick_resume(dev);
198 }
199
200 return ret;
201}
202
134#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST 203#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
135 204
136/* Limit min_delta to a jiffie */ 205/* Limit min_delta to a jiffie */
@@ -183,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
183 delta = dev->min_delta_ns; 252 delta = dev->min_delta_ns;
184 dev->next_event = ktime_add_ns(ktime_get(), delta); 253 dev->next_event = ktime_add_ns(ktime_get(), delta);
185 254
186 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) 255 if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
187 return 0; 256 return 0;
188 257
189 dev->retries++; 258 dev->retries++;
@@ -220,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
220 delta = dev->min_delta_ns; 289 delta = dev->min_delta_ns;
221 dev->next_event = ktime_add_ns(ktime_get(), delta); 290 dev->next_event = ktime_add_ns(ktime_get(), delta);
222 291
223 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) 292 if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
224 return 0; 293 return 0;
225 294
226 dev->retries++; 295 dev->retries++;
@@ -252,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
252 321
253 dev->next_event = expires; 322 dev->next_event = expires;
254 323
255 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) 324 if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
256 return 0; 325 return 0;
257 326
258 /* Shortcut for clockevent devices that can deal with ktime. */ 327 /* Shortcut for clockevent devices that can deal with ktime. */
@@ -297,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced)
297 struct clock_event_device *dev, *newdev = NULL; 366 struct clock_event_device *dev, *newdev = NULL;
298 367
299 list_for_each_entry(dev, &clockevent_devices, list) { 368 list_for_each_entry(dev, &clockevent_devices, list) {
300 if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) 369 if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
301 continue; 370 continue;
302 371
303 if (!tick_check_replacement(newdev, dev)) 372 if (!tick_check_replacement(newdev, dev))
@@ -323,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced)
323static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) 392static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
324{ 393{
325 /* Fast track. Device is unused */ 394 /* Fast track. Device is unused */
326 if (ced->mode == CLOCK_EVT_MODE_UNUSED) { 395 if (ced->state == CLOCK_EVT_STATE_DETACHED) {
327 list_del_init(&ced->list); 396 list_del_init(&ced->list);
328 return 0; 397 return 0;
329 } 398 }
@@ -373,6 +442,37 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
373} 442}
374EXPORT_SYMBOL_GPL(clockevents_unbind); 443EXPORT_SYMBOL_GPL(clockevents_unbind);
375 444
445/* Sanity check of state transition callbacks */
446static int clockevents_sanity_check(struct clock_event_device *dev)
447{
448 /* Legacy set_mode() callback */
449 if (dev->set_mode) {
450 /* We shouldn't be supporting new modes now */
451 WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
452 dev->set_state_shutdown || dev->tick_resume);
453
454 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
455 return 0;
456 }
457
458 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
459 return 0;
460
461 /* New state-specific callbacks */
462 if (!dev->set_state_shutdown)
463 return -EINVAL;
464
465 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
466 !dev->set_state_periodic)
467 return -EINVAL;
468
469 if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
470 !dev->set_state_oneshot)
471 return -EINVAL;
472
473 return 0;
474}
475
376/** 476/**
377 * clockevents_register_device - register a clock event device 477 * clockevents_register_device - register a clock event device
378 * @dev: device to register 478 * @dev: device to register
@@ -381,7 +481,11 @@ void clockevents_register_device(struct clock_event_device *dev)
381{ 481{
382 unsigned long flags; 482 unsigned long flags;
383 483
384 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 484 BUG_ON(clockevents_sanity_check(dev));
485
486 /* Initialize state to DETACHED */
487 dev->state = CLOCK_EVT_STATE_DETACHED;
488
385 if (!dev->cpumask) { 489 if (!dev->cpumask) {
386 WARN_ON(num_possible_cpus() > 1); 490 WARN_ON(num_possible_cpus() > 1);
387 dev->cpumask = cpumask_of(smp_processor_id()); 491 dev->cpumask = cpumask_of(smp_processor_id());
@@ -445,11 +549,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
445{ 549{
446 clockevents_config(dev, freq); 550 clockevents_config(dev, freq);
447 551
448 if (dev->mode == CLOCK_EVT_MODE_ONESHOT) 552 if (dev->state == CLOCK_EVT_STATE_ONESHOT)
449 return clockevents_program_event(dev, dev->next_event, false); 553 return clockevents_program_event(dev, dev->next_event, false);
450 554
451 if (dev->mode == CLOCK_EVT_MODE_PERIODIC) 555 if (dev->state == CLOCK_EVT_STATE_PERIODIC)
452 dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); 556 return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
453 557
454 return 0; 558 return 0;
455} 559}
@@ -491,30 +595,27 @@ void clockevents_handle_noop(struct clock_event_device *dev)
491 * @old: device to release (can be NULL) 595 * @old: device to release (can be NULL)
492 * @new: device to request (can be NULL) 596 * @new: device to request (can be NULL)
493 * 597 *
494 * Called from the notifier chain. clockevents_lock is held already 598 * Called from various tick functions with clockevents_lock held and
599 * interrupts disabled.
495 */ 600 */
496void clockevents_exchange_device(struct clock_event_device *old, 601void clockevents_exchange_device(struct clock_event_device *old,
497 struct clock_event_device *new) 602 struct clock_event_device *new)
498{ 603{
499 unsigned long flags;
500
501 local_irq_save(flags);
502 /* 604 /*
503 * Caller releases a clock event device. We queue it into the 605 * Caller releases a clock event device. We queue it into the
504 * released list and do a notify add later. 606 * released list and do a notify add later.
505 */ 607 */
506 if (old) { 608 if (old) {
507 module_put(old->owner); 609 module_put(old->owner);
508 clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); 610 clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
509 list_del(&old->list); 611 list_del(&old->list);
510 list_add(&old->list, &clockevents_released); 612 list_add(&old->list, &clockevents_released);
511 } 613 }
512 614
513 if (new) { 615 if (new) {
514 BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); 616 BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
515 clockevents_shutdown(new); 617 clockevents_shutdown(new);
516 } 618 }
517 local_irq_restore(flags);
518} 619}
519 620
520/** 621/**
@@ -541,74 +642,40 @@ void clockevents_resume(void)
541 dev->resume(dev); 642 dev->resume(dev);
542} 643}
543 644
544#ifdef CONFIG_GENERIC_CLOCKEVENTS 645#ifdef CONFIG_HOTPLUG_CPU
545/** 646/**
546 * clockevents_notify - notification about relevant events 647 * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
547 * Returns 0 on success, any other value on error
548 */ 648 */
549int clockevents_notify(unsigned long reason, void *arg) 649void tick_cleanup_dead_cpu(int cpu)
550{ 650{
551 struct clock_event_device *dev, *tmp; 651 struct clock_event_device *dev, *tmp;
552 unsigned long flags; 652 unsigned long flags;
553 int cpu, ret = 0;
554 653
555 raw_spin_lock_irqsave(&clockevents_lock, flags); 654 raw_spin_lock_irqsave(&clockevents_lock, flags);
556 655
557 switch (reason) { 656 tick_shutdown_broadcast_oneshot(cpu);
558 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 657 tick_shutdown_broadcast(cpu);
559 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 658 tick_shutdown(cpu);
560 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: 659 /*
561 tick_broadcast_on_off(reason, arg); 660 * Unregister the clock event devices which were
562 break; 661 * released from the users in the notify chain.
563 662 */
564 case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: 663 list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
565 case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: 664 list_del(&dev->list);
566 ret = tick_broadcast_oneshot_control(reason); 665 /*
567 break; 666 * Now check whether the CPU has left unused per cpu devices
568 667 */
569 case CLOCK_EVT_NOTIFY_CPU_DYING: 668 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
570 tick_handover_do_timer(arg); 669 if (cpumask_test_cpu(cpu, dev->cpumask) &&
571 break; 670 cpumask_weight(dev->cpumask) == 1 &&
572 671 !tick_is_broadcast_device(dev)) {
573 case CLOCK_EVT_NOTIFY_SUSPEND: 672 BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
574 tick_suspend();
575 tick_suspend_broadcast();
576 break;
577
578 case CLOCK_EVT_NOTIFY_RESUME:
579 tick_resume();
580 break;
581
582 case CLOCK_EVT_NOTIFY_CPU_DEAD:
583 tick_shutdown_broadcast_oneshot(arg);
584 tick_shutdown_broadcast(arg);
585 tick_shutdown(arg);
586 /*
587 * Unregister the clock event devices which were
588 * released from the users in the notify chain.
589 */
590 list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
591 list_del(&dev->list); 673 list_del(&dev->list);
592 /*
593 * Now check whether the CPU has left unused per cpu devices
594 */
595 cpu = *((int *)arg);
596 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
597 if (cpumask_test_cpu(cpu, dev->cpumask) &&
598 cpumask_weight(dev->cpumask) == 1 &&
599 !tick_is_broadcast_device(dev)) {
600 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
601 list_del(&dev->list);
602 }
603 } 674 }
604 break;
605 default:
606 break;
607 } 675 }
608 raw_spin_unlock_irqrestore(&clockevents_lock, flags); 676 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
609 return ret;
610} 677}
611EXPORT_SYMBOL_GPL(clockevents_notify); 678#endif
612 679
613#ifdef CONFIG_SYSFS 680#ifdef CONFIG_SYSFS
614struct bus_type clockevents_subsys = { 681struct bus_type clockevents_subsys = {
@@ -727,5 +794,3 @@ static int __init clockevents_init_sysfs(void)
727} 794}
728device_initcall(clockevents_init_sysfs); 795device_initcall(clockevents_init_sysfs);
729#endif /* SYSFS */ 796#endif /* SYSFS */
730
731#endif /* GENERIC_CLOCK_EVENTS */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4892352f0e49..15facb1b9c60 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
142 schedule_work(&watchdog_work); 142 schedule_work(&watchdog_work);
143} 143}
144 144
145static void clocksource_unstable(struct clocksource *cs, int64_t delta)
146{
147 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
148 cs->name, delta);
149 __clocksource_unstable(cs);
150}
151
152/** 145/**
153 * clocksource_mark_unstable - mark clocksource unstable via watchdog 146 * clocksource_mark_unstable - mark clocksource unstable via watchdog
154 * @cs: clocksource to be marked unstable 147 * @cs: clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
174static void clocksource_watchdog(unsigned long data) 167static void clocksource_watchdog(unsigned long data)
175{ 168{
176 struct clocksource *cs; 169 struct clocksource *cs;
177 cycle_t csnow, wdnow, delta; 170 cycle_t csnow, wdnow, cslast, wdlast, delta;
178 int64_t wd_nsec, cs_nsec; 171 int64_t wd_nsec, cs_nsec;
179 int next_cpu, reset_pending; 172 int next_cpu, reset_pending;
180 173
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
213 206
214 delta = clocksource_delta(csnow, cs->cs_last, cs->mask); 207 delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
215 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); 208 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
209 wdlast = cs->wd_last; /* save these in case we print them */
210 cslast = cs->cs_last;
216 cs->cs_last = csnow; 211 cs->cs_last = csnow;
217 cs->wd_last = wdnow; 212 cs->wd_last = wdnow;
218 213
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
221 216
222 /* Check the deviation from the watchdog clocksource. */ 217 /* Check the deviation from the watchdog clocksource. */
223 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 218 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
224 clocksource_unstable(cs, cs_nsec - wd_nsec); 219 pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
220 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
221 watchdog->name, wdnow, wdlast, watchdog->mask);
222 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
223 cs->name, csnow, cslast, cs->mask);
224 __clocksource_unstable(cs);
225 continue; 225 continue;
226 } 226 }
227 227
@@ -469,26 +469,25 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
469 * @shift: cycle to nanosecond divisor (power of two) 469 * @shift: cycle to nanosecond divisor (power of two)
470 * @maxadj: maximum adjustment value to mult (~11%) 470 * @maxadj: maximum adjustment value to mult (~11%)
471 * @mask: bitmask for two's complement subtraction of non 64 bit counters 471 * @mask: bitmask for two's complement subtraction of non 64 bit counters
472 * @max_cyc: maximum cycle value before potential overflow (does not include
473 * any safety margin)
474 *
475 * NOTE: This function includes a safety margin of 50%, in other words, we
476 * return half the number of nanoseconds the hardware counter can technically
477 * cover. This is done so that we can potentially detect problems caused by
478 * delayed timers or bad hardware, which might result in time intervals that
479 * are larger then what the math used can handle without overflows.
472 */ 480 */
473u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) 481u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
474{ 482{
475 u64 max_nsecs, max_cycles; 483 u64 max_nsecs, max_cycles;
476 484
477 /* 485 /*
478 * Calculate the maximum number of cycles that we can pass to the 486 * Calculate the maximum number of cycles that we can pass to the
479 * cyc2ns function without overflowing a 64-bit signed result. The 487 * cyc2ns() function without overflowing a 64-bit result.
480 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
481 * which is equivalent to the below.
482 * max_cycles < (2^63)/(mult + maxadj)
483 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
484 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
485 * max_cycles < 2^(63 - log2(mult + maxadj))
486 * max_cycles < 1 << (63 - log2(mult + maxadj))
487 * Please note that we add 1 to the result of the log2 to account for
488 * any rounding errors, ensure the above inequality is satisfied and
489 * no overflow will occur.
490 */ 488 */
491 max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); 489 max_cycles = ULLONG_MAX;
490 do_div(max_cycles, mult+maxadj);
492 491
493 /* 492 /*
494 * The actual maximum number of cycles we can defer the clocksource is 493 * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +498,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
499 max_cycles = min(max_cycles, mask); 498 max_cycles = min(max_cycles, mask);
500 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 499 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
501 500
501 /* return the max_cycles value as well if requested */
502 if (max_cyc)
503 *max_cyc = max_cycles;
504
505 /* Return 50% of the actual maximum, so we can detect bad values */
506 max_nsecs >>= 1;
507
502 return max_nsecs; 508 return max_nsecs;
503} 509}
504 510
505/** 511/**
506 * clocksource_max_deferment - Returns max time the clocksource can be deferred 512 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
507 * @cs: Pointer to clocksource 513 * @cs: Pointer to clocksource to be updated
508 * 514 *
509 */ 515 */
510static u64 clocksource_max_deferment(struct clocksource *cs) 516static inline void clocksource_update_max_deferment(struct clocksource *cs)
511{ 517{
512 u64 max_nsecs; 518 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
513 519 cs->maxadj, cs->mask,
514 max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, 520 &cs->max_cycles);
515 cs->mask);
516 /*
517 * To ensure that the clocksource does not wrap whilst we are idle,
518 * limit the time the clocksource can be deferred by 12.5%. Please
519 * note a margin of 12.5% is used because this can be computed with
520 * a shift, versus say 10% which would require division.
521 */
522 return max_nsecs - (max_nsecs >> 3);
523} 521}
524 522
525#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 523#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +646,7 @@ static void clocksource_enqueue(struct clocksource *cs)
648} 646}
649 647
650/** 648/**
651 * __clocksource_updatefreq_scale - Used update clocksource with new freq 649 * __clocksource_update_freq_scale - Used update clocksource with new freq
652 * @cs: clocksource to be registered 650 * @cs: clocksource to be registered
653 * @scale: Scale factor multiplied against freq to get clocksource hz 651 * @scale: Scale factor multiplied against freq to get clocksource hz
654 * @freq: clocksource frequency (cycles per second) divided by scale 652 * @freq: clocksource frequency (cycles per second) divided by scale
@@ -656,48 +654,64 @@ static void clocksource_enqueue(struct clocksource *cs)
656 * This should only be called from the clocksource->enable() method. 654 * This should only be called from the clocksource->enable() method.
657 * 655 *
658 * This *SHOULD NOT* be called directly! Please use the 656 * This *SHOULD NOT* be called directly! Please use the
659 * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. 657 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
658 * functions.
660 */ 659 */
661void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) 660void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
662{ 661{
663 u64 sec; 662 u64 sec;
663
664 /* 664 /*
665 * Calc the maximum number of seconds which we can run before 665 * Default clocksources are *special* and self-define their mult/shift.
666 * wrapping around. For clocksources which have a mask > 32bit 666 * But, you're not special, so you should specify a freq value.
667 * we need to limit the max sleep time to have a good
668 * conversion precision. 10 minutes is still a reasonable
669 * amount. That results in a shift value of 24 for a
670 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
671 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
672 * margin as we do in clocksource_max_deferment()
673 */ 667 */
674 sec = (cs->mask - (cs->mask >> 3)); 668 if (freq) {
675 do_div(sec, freq); 669 /*
676 do_div(sec, scale); 670 * Calc the maximum number of seconds which we can run before
677 if (!sec) 671 * wrapping around. For clocksources which have a mask > 32-bit
678 sec = 1; 672 * we need to limit the max sleep time to have a good
679 else if (sec > 600 && cs->mask > UINT_MAX) 673 * conversion precision. 10 minutes is still a reasonable
680 sec = 600; 674 * amount. That results in a shift value of 24 for a
681 675 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
682 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 676 * ~ 0.06ppm granularity for NTP.
683 NSEC_PER_SEC / scale, sec * scale); 677 */
684 678 sec = cs->mask;
679 do_div(sec, freq);
680 do_div(sec, scale);
681 if (!sec)
682 sec = 1;
683 else if (sec > 600 && cs->mask > UINT_MAX)
684 sec = 600;
685
686 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
687 NSEC_PER_SEC / scale, sec * scale);
688 }
685 /* 689 /*
686 * for clocksources that have large mults, to avoid overflow. 690 * Ensure clocksources that have large 'mult' values don't overflow
687 * Since mult may be adjusted by ntp, add an safety extra margin 691 * when adjusted.
688 *
689 */ 692 */
690 cs->maxadj = clocksource_max_adjustment(cs); 693 cs->maxadj = clocksource_max_adjustment(cs);
691 while ((cs->mult + cs->maxadj < cs->mult) 694 while (freq && ((cs->mult + cs->maxadj < cs->mult)
692 || (cs->mult - cs->maxadj > cs->mult)) { 695 || (cs->mult - cs->maxadj > cs->mult))) {
693 cs->mult >>= 1; 696 cs->mult >>= 1;
694 cs->shift--; 697 cs->shift--;
695 cs->maxadj = clocksource_max_adjustment(cs); 698 cs->maxadj = clocksource_max_adjustment(cs);
696 } 699 }
697 700
698 cs->max_idle_ns = clocksource_max_deferment(cs); 701 /*
702 * Only warn for *special* clocksources that self-define
703 * their mult/shift values and don't specify a freq.
704 */
705 WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
706 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
707 cs->name);
708
709 clocksource_update_max_deferment(cs);
710
711 pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
712 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
699} 713}
700EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); 714EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
701 715
702/** 716/**
703 * __clocksource_register_scale - Used to install new clocksources 717 * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +728,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
714{ 728{
715 729
716 /* Initialize mult/shift and max_idle_ns */ 730 /* Initialize mult/shift and max_idle_ns */
717 __clocksource_updatefreq_scale(cs, scale, freq); 731 __clocksource_update_freq_scale(cs, scale, freq);
718 732
719 /* Add clocksource to the clocksource list */ 733 /* Add clocksource to the clocksource list */
720 mutex_lock(&clocksource_mutex); 734 mutex_lock(&clocksource_mutex);
@@ -726,33 +740,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
726} 740}
727EXPORT_SYMBOL_GPL(__clocksource_register_scale); 741EXPORT_SYMBOL_GPL(__clocksource_register_scale);
728 742
729
730/**
731 * clocksource_register - Used to install new clocksources
732 * @cs: clocksource to be registered
733 *
734 * Returns -EBUSY if registration fails, zero otherwise.
735 */
736int clocksource_register(struct clocksource *cs)
737{
738 /* calculate max adjustment for given mult/shift */
739 cs->maxadj = clocksource_max_adjustment(cs);
740 WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
741 "Clocksource %s might overflow on 11%% adjustment\n",
742 cs->name);
743
744 /* calculate max idle time permitted for this clocksource */
745 cs->max_idle_ns = clocksource_max_deferment(cs);
746
747 mutex_lock(&clocksource_mutex);
748 clocksource_enqueue(cs);
749 clocksource_enqueue_watchdog(cs);
750 clocksource_select();
751 mutex_unlock(&clocksource_mutex);
752 return 0;
753}
754EXPORT_SYMBOL(clocksource_register);
755
756static void __clocksource_change_rating(struct clocksource *cs, int rating) 743static void __clocksource_change_rating(struct clocksource *cs, int rating)
757{ 744{
758 list_del(&cs->list); 745 list_del(&cs->list);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bee0c1f78091..76d4bd962b19 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -54,7 +54,7 @@
54 54
55#include <trace/events/timer.h> 55#include <trace/events/timer.h>
56 56
57#include "timekeeping.h" 57#include "tick-internal.h"
58 58
59/* 59/*
60 * The timer bases: 60 * The timer bases:
@@ -1707,17 +1707,10 @@ static int hrtimer_cpu_notify(struct notifier_block *self,
1707 break; 1707 break;
1708 1708
1709#ifdef CONFIG_HOTPLUG_CPU 1709#ifdef CONFIG_HOTPLUG_CPU
1710 case CPU_DYING:
1711 case CPU_DYING_FROZEN:
1712 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
1713 break;
1714 case CPU_DEAD: 1710 case CPU_DEAD:
1715 case CPU_DEAD_FROZEN: 1711 case CPU_DEAD_FROZEN:
1716 {
1717 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
1718 migrate_hrtimers(scpu); 1712 migrate_hrtimers(scpu);
1719 break; 1713 break;
1720 }
1721#endif 1714#endif
1722 1715
1723 default: 1716 default:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a6a5bf53e86d..347fecf86a3f 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -25,7 +25,7 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/init.h> 26#include <linux/init.h>
27 27
28#include "tick-internal.h" 28#include "timekeeping.h"
29 29
30/* The Jiffies based clocksource is the lowest common 30/* The Jiffies based clocksource is the lowest common
31 * denominator clock source which should function on 31 * denominator clock source which should function on
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
71 .mask = 0xffffffff, /*32bits*/ 71 .mask = 0xffffffff, /*32bits*/
72 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ 72 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
73 .shift = JIFFIES_SHIFT, 73 .shift = JIFFIES_SHIFT,
74 .max_cycles = 10,
74}; 75};
75 76
76__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); 77__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
94 95
95static int __init init_jiffies_clocksource(void) 96static int __init init_jiffies_clocksource(void)
96{ 97{
97 return clocksource_register(&clocksource_jiffies); 98 return __clocksource_register(&clocksource_jiffies);
98} 99}
99 100
100core_initcall(init_jiffies_clocksource); 101core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
130 131
131 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; 132 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
132 133
133 clocksource_register(&refined_jiffies); 134 __clocksource_register(&refined_jiffies);
134 return 0; 135 return 0;
135} 136}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 0f60b08a4f07..7a681003001c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -17,7 +17,6 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/rtc.h> 18#include <linux/rtc.h>
19 19
20#include "tick-internal.h"
21#include "ntp_internal.h" 20#include "ntp_internal.h"
22 21
23/* 22/*
@@ -459,6 +458,16 @@ out:
459 return leap; 458 return leap;
460} 459}
461 460
461#ifdef CONFIG_GENERIC_CMOS_UPDATE
462int __weak update_persistent_clock64(struct timespec64 now64)
463{
464 struct timespec now;
465
466 now = timespec64_to_timespec(now64);
467 return update_persistent_clock(now);
468}
469#endif
470
462#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) 471#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
463static void sync_cmos_clock(struct work_struct *work); 472static void sync_cmos_clock(struct work_struct *work);
464 473
@@ -494,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work)
494 if (persistent_clock_is_local) 503 if (persistent_clock_is_local)
495 adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); 504 adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
496#ifdef CONFIG_GENERIC_CMOS_UPDATE 505#ifdef CONFIG_GENERIC_CMOS_UPDATE
497 fail = update_persistent_clock(timespec64_to_timespec(adjust)); 506 fail = update_persistent_clock64(adjust);
498#endif 507#endif
508
499#ifdef CONFIG_RTC_SYSTOHC 509#ifdef CONFIG_RTC_SYSTOHC
500 if (fail == -ENODEV) 510 if (fail == -ENODEV)
501 fail = rtc_set_ntp_time(adjust); 511 fail = rtc_set_ntp_time(adjust);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..a26036d37a38 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * sched_clock.c: support for extending counters to full 64-bit ns counter 2 * sched_clock.c: Generic sched_clock() support, to extend low level
3 * hardware time counters to full 64-bit ns values.
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
18#include <linux/seqlock.h> 19#include <linux/seqlock.h>
19#include <linux/bitops.h> 20#include <linux/bitops.h>
20 21
21struct clock_data { 22/**
22 ktime_t wrap_kt; 23 * struct clock_read_data - data required to read from sched_clock()
24 *
25 * @epoch_ns: sched_clock() value at last update
26 * @epoch_cyc: Clock cycle value at last update.
27 * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
28 * clocks.
29 * @read_sched_clock: Current clock source (or dummy source when suspended).
30 * @mult: Multipler for scaled math conversion.
31 * @shift: Shift value for scaled math conversion.
32 *
33 * Care must be taken when updating this structure; it is read by
34 * some very hot code paths. It occupies <=40 bytes and, when combined
35 * with the seqcount used to synchronize access, comfortably fits into
36 * a 64 byte cache line.
37 */
38struct clock_read_data {
23 u64 epoch_ns; 39 u64 epoch_ns;
24 u64 epoch_cyc; 40 u64 epoch_cyc;
25 seqcount_t seq; 41 u64 sched_clock_mask;
26 unsigned long rate; 42 u64 (*read_sched_clock)(void);
27 u32 mult; 43 u32 mult;
28 u32 shift; 44 u32 shift;
29 bool suspended; 45};
46
47/**
48 * struct clock_data - all data needed for sched_clock() (including
49 * registration of a new clock source)
50 *
51 * @seq: Sequence counter for protecting updates. The lowest
52 * bit is the index for @read_data.
53 * @read_data: Data required to read from sched_clock.
54 * @wrap_kt: Duration for which clock can run before wrapping.
55 * @rate: Tick rate of the registered clock.
56 * @actual_read_sched_clock: Registered hardware level clock read function.
57 *
58 * The ordering of this structure has been chosen to optimize cache
59 * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
60 * into a single 64-byte cache line.
61 */
62struct clock_data {
63 seqcount_t seq;
64 struct clock_read_data read_data[2];
65 ktime_t wrap_kt;
66 unsigned long rate;
67
68 u64 (*actual_read_sched_clock)(void);
30}; 69};
31 70
32static struct hrtimer sched_clock_timer; 71static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
34 73
35core_param(irqtime, irqtime, int, 0400); 74core_param(irqtime, irqtime, int, 0400);
36 75
37static struct clock_data cd = {
38 .mult = NSEC_PER_SEC / HZ,
39};
40
41static u64 __read_mostly sched_clock_mask;
42
43static u64 notrace jiffy_sched_clock_read(void) 76static u64 notrace jiffy_sched_clock_read(void)
44{ 77{
45 /* 78 /*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
49 return (u64)(jiffies - INITIAL_JIFFIES); 82 return (u64)(jiffies - INITIAL_JIFFIES);
50} 83}
51 84
52static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; 85static struct clock_data cd ____cacheline_aligned = {
86 .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
87 .read_sched_clock = jiffy_sched_clock_read, },
88 .actual_read_sched_clock = jiffy_sched_clock_read,
89};
53 90
54static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) 91static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
55{ 92{
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
58 95
59unsigned long long notrace sched_clock(void) 96unsigned long long notrace sched_clock(void)
60{ 97{
61 u64 epoch_ns; 98 u64 cyc, res;
62 u64 epoch_cyc;
63 u64 cyc;
64 unsigned long seq; 99 unsigned long seq;
65 100 struct clock_read_data *rd;
66 if (cd.suspended)
67 return cd.epoch_ns;
68 101
69 do { 102 do {
70 seq = raw_read_seqcount_begin(&cd.seq); 103 seq = raw_read_seqcount(&cd.seq);
71 epoch_cyc = cd.epoch_cyc; 104 rd = cd.read_data + (seq & 1);
72 epoch_ns = cd.epoch_ns; 105
106 cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
107 rd->sched_clock_mask;
108 res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
73 } while (read_seqcount_retry(&cd.seq, seq)); 109 } while (read_seqcount_retry(&cd.seq, seq));
74 110
75 cyc = read_sched_clock(); 111 return res;
76 cyc = (cyc - epoch_cyc) & sched_clock_mask; 112}
77 return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); 113
114/*
115 * Updating the data required to read the clock.
116 *
117 * sched_clock() will never observe mis-matched data even if called from
118 * an NMI. We do this by maintaining an odd/even copy of the data and
119 * steering sched_clock() to one or the other using a sequence counter.
120 * In order to preserve the data cache profile of sched_clock() as much
121 * as possible the system reverts back to the even copy when the update
122 * completes; the odd copy is used *only* during an update.
123 */
124static void update_clock_read_data(struct clock_read_data *rd)
125{
126 /* update the backup (odd) copy with the new data */
127 cd.read_data[1] = *rd;
128
129 /* steer readers towards the odd copy */
130 raw_write_seqcount_latch(&cd.seq);
131
132 /* now its safe for us to update the normal (even) copy */
133 cd.read_data[0] = *rd;
134
135 /* switch readers back to the even copy */
136 raw_write_seqcount_latch(&cd.seq);
78} 137}
79 138
80/* 139/*
81 * Atomically update the sched_clock epoch. 140 * Atomically update the sched_clock() epoch.
82 */ 141 */
83static void notrace update_sched_clock(void) 142static void update_sched_clock(void)
84{ 143{
85 unsigned long flags;
86 u64 cyc; 144 u64 cyc;
87 u64 ns; 145 u64 ns;
146 struct clock_read_data rd;
147
148 rd = cd.read_data[0];
149
150 cyc = cd.actual_read_sched_clock();
151 ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
152
153 rd.epoch_ns = ns;
154 rd.epoch_cyc = cyc;
88 155
89 cyc = read_sched_clock(); 156 update_clock_read_data(&rd);
90 ns = cd.epoch_ns +
91 cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
92 cd.mult, cd.shift);
93
94 raw_local_irq_save(flags);
95 raw_write_seqcount_begin(&cd.seq);
96 cd.epoch_ns = ns;
97 cd.epoch_cyc = cyc;
98 raw_write_seqcount_end(&cd.seq);
99 raw_local_irq_restore(flags);
100} 157}
101 158
102static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) 159static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
103{ 160{
104 update_sched_clock(); 161 update_sched_clock();
105 hrtimer_forward_now(hrt, cd.wrap_kt); 162 hrtimer_forward_now(hrt, cd.wrap_kt);
163
106 return HRTIMER_RESTART; 164 return HRTIMER_RESTART;
107} 165}
108 166
109void __init sched_clock_register(u64 (*read)(void), int bits, 167void __init
110 unsigned long rate) 168sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
111{ 169{
112 u64 res, wrap, new_mask, new_epoch, cyc, ns; 170 u64 res, wrap, new_mask, new_epoch, cyc, ns;
113 u32 new_mult, new_shift; 171 u32 new_mult, new_shift;
114 ktime_t new_wrap_kt;
115 unsigned long r; 172 unsigned long r;
116 char r_unit; 173 char r_unit;
174 struct clock_read_data rd;
117 175
118 if (cd.rate > rate) 176 if (cd.rate > rate)
119 return; 177 return;
120 178
121 WARN_ON(!irqs_disabled()); 179 WARN_ON(!irqs_disabled());
122 180
123 /* calculate the mult/shift to convert counter ticks to ns. */ 181 /* Calculate the mult/shift to convert counter ticks to ns. */
124 clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); 182 clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
125 183
126 new_mask = CLOCKSOURCE_MASK(bits); 184 new_mask = CLOCKSOURCE_MASK(bits);
185 cd.rate = rate;
186
187 /* Calculate how many nanosecs until we risk wrapping */
188 wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
189 cd.wrap_kt = ns_to_ktime(wrap);
127 190
128 /* calculate how many ns until we wrap */ 191 rd = cd.read_data[0];
129 wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
130 new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
131 192
132 /* update epoch for new counter and update epoch_ns from old counter*/ 193 /* Update epoch for new counter and update 'epoch_ns' from old counter*/
133 new_epoch = read(); 194 new_epoch = read();
134 cyc = read_sched_clock(); 195 cyc = cd.actual_read_sched_clock();
135 ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, 196 ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
136 cd.mult, cd.shift); 197 cd.actual_read_sched_clock = read;
137 198
138 raw_write_seqcount_begin(&cd.seq); 199 rd.read_sched_clock = read;
139 read_sched_clock = read; 200 rd.sched_clock_mask = new_mask;
140 sched_clock_mask = new_mask; 201 rd.mult = new_mult;
141 cd.rate = rate; 202 rd.shift = new_shift;
142 cd.wrap_kt = new_wrap_kt; 203 rd.epoch_cyc = new_epoch;
143 cd.mult = new_mult; 204 rd.epoch_ns = ns;
144 cd.shift = new_shift; 205
145 cd.epoch_cyc = new_epoch; 206 update_clock_read_data(&rd);
146 cd.epoch_ns = ns;
147 raw_write_seqcount_end(&cd.seq);
148 207
149 r = rate; 208 r = rate;
150 if (r >= 4000000) { 209 if (r >= 4000000) {
151 r /= 1000000; 210 r /= 1000000;
152 r_unit = 'M'; 211 r_unit = 'M';
153 } else if (r >= 1000) { 212 } else {
154 r /= 1000; 213 if (r >= 1000) {
155 r_unit = 'k'; 214 r /= 1000;
156 } else 215 r_unit = 'k';
157 r_unit = ' '; 216 } else {
158 217 r_unit = ' ';
159 /* calculate the ns resolution of this counter */ 218 }
219 }
220
221 /* Calculate the ns resolution of this counter */
160 res = cyc_to_ns(1ULL, new_mult, new_shift); 222 res = cyc_to_ns(1ULL, new_mult, new_shift);
161 223
162 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", 224 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
163 bits, r, r_unit, res, wrap); 225 bits, r, r_unit, res, wrap);
164 226
165 /* Enable IRQ time accounting if we have a fast enough sched_clock */ 227 /* Enable IRQ time accounting if we have a fast enough sched_clock() */
166 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) 228 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
167 enable_sched_clock_irqtime(); 229 enable_sched_clock_irqtime();
168 230
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
172void __init sched_clock_postinit(void) 234void __init sched_clock_postinit(void)
173{ 235{
174 /* 236 /*
175 * If no sched_clock function has been provided at that point, 237 * If no sched_clock() function has been provided at that point,
176 * make it the final one one. 238 * make it the final one one.
177 */ 239 */
178 if (read_sched_clock == jiffy_sched_clock_read) 240 if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
179 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); 241 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
180 242
181 update_sched_clock(); 243 update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
189 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 251 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
190} 252}
191 253
254/*
255 * Clock read function for use when the clock is suspended.
256 *
257 * This function makes it appear to sched_clock() as if the clock
258 * stopped counting at its last update.
259 *
260 * This function must only be called from the critical
261 * section in sched_clock(). It relies on the read_seqcount_retry()
262 * at the end of the critical section to be sure we observe the
263 * correct copy of 'epoch_cyc'.
264 */
265static u64 notrace suspended_sched_clock_read(void)
266{
267 unsigned long seq = raw_read_seqcount(&cd.seq);
268
269 return cd.read_data[seq & 1].epoch_cyc;
270}
271
192static int sched_clock_suspend(void) 272static int sched_clock_suspend(void)
193{ 273{
274 struct clock_read_data *rd = &cd.read_data[0];
275
194 update_sched_clock(); 276 update_sched_clock();
195 hrtimer_cancel(&sched_clock_timer); 277 hrtimer_cancel(&sched_clock_timer);
196 cd.suspended = true; 278 rd->read_sched_clock = suspended_sched_clock_read;
279
197 return 0; 280 return 0;
198} 281}
199 282
200static void sched_clock_resume(void) 283static void sched_clock_resume(void)
201{ 284{
202 cd.epoch_cyc = read_sched_clock(); 285 struct clock_read_data *rd = &cd.read_data[0];
286
287 rd->epoch_cyc = cd.actual_read_sched_clock();
203 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 288 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
204 cd.suspended = false; 289 rd->read_sched_clock = cd.actual_read_sched_clock;
205} 290}
206 291
207static struct syscore_ops sched_clock_ops = { 292static struct syscore_ops sched_clock_ops = {
208 .suspend = sched_clock_suspend, 293 .suspend = sched_clock_suspend,
209 .resume = sched_clock_resume, 294 .resume = sched_clock_resume,
210}; 295};
211 296
212static int __init sched_clock_syscore_init(void) 297static int __init sched_clock_syscore_init(void)
213{ 298{
214 register_syscore_ops(&sched_clock_ops); 299 register_syscore_ops(&sched_clock_ops);
300
215 return 0; 301 return 0;
216} 302}
217device_initcall(sched_clock_syscore_init); 303device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 066f0ec05e48..7e8ca4f448a8 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -33,12 +33,14 @@ static cpumask_var_t tick_broadcast_mask;
33static cpumask_var_t tick_broadcast_on; 33static cpumask_var_t tick_broadcast_on;
34static cpumask_var_t tmpmask; 34static cpumask_var_t tmpmask;
35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36static int tick_broadcast_force; 36static int tick_broadcast_forced;
37 37
38#ifdef CONFIG_TICK_ONESHOT 38#ifdef CONFIG_TICK_ONESHOT
39static void tick_broadcast_clear_oneshot(int cpu); 39static void tick_broadcast_clear_oneshot(int cpu);
40static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
40#else 41#else
41static inline void tick_broadcast_clear_oneshot(int cpu) { } 42static inline void tick_broadcast_clear_oneshot(int cpu) { }
43static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
42#endif 44#endif
43 45
44/* 46/*
@@ -303,7 +305,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
303 /* 305 /*
304 * The device is in periodic mode. No reprogramming necessary: 306 * The device is in periodic mode. No reprogramming necessary:
305 */ 307 */
306 if (dev->mode == CLOCK_EVT_MODE_PERIODIC) 308 if (dev->state == CLOCK_EVT_STATE_PERIODIC)
307 goto unlock; 309 goto unlock;
308 310
309 /* 311 /*
@@ -324,49 +326,54 @@ unlock:
324 raw_spin_unlock(&tick_broadcast_lock); 326 raw_spin_unlock(&tick_broadcast_lock);
325} 327}
326 328
327/* 329/**
328 * Powerstate information: The system enters/leaves a state, where 330 * tick_broadcast_control - Enable/disable or force broadcast mode
329 * affected devices might stop 331 * @mode: The selected broadcast mode
332 *
333 * Called when the system enters a state where affected tick devices
334 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
335 *
336 * Called with interrupts disabled, so clockevents_lock is not
337 * required here because the local clock event device cannot go away
338 * under us.
330 */ 339 */
331static void tick_do_broadcast_on_off(unsigned long *reason) 340void tick_broadcast_control(enum tick_broadcast_mode mode)
332{ 341{
333 struct clock_event_device *bc, *dev; 342 struct clock_event_device *bc, *dev;
334 struct tick_device *td; 343 struct tick_device *td;
335 unsigned long flags;
336 int cpu, bc_stopped; 344 int cpu, bc_stopped;
337 345
338 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 346 td = this_cpu_ptr(&tick_cpu_device);
339
340 cpu = smp_processor_id();
341 td = &per_cpu(tick_cpu_device, cpu);
342 dev = td->evtdev; 347 dev = td->evtdev;
343 bc = tick_broadcast_device.evtdev;
344 348
345 /* 349 /*
346 * Is the device not affected by the powerstate ? 350 * Is the device not affected by the powerstate ?
347 */ 351 */
348 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 352 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
349 goto out; 353 return;
350 354
351 if (!tick_device_is_functional(dev)) 355 if (!tick_device_is_functional(dev))
352 goto out; 356 return;
353 357
358 raw_spin_lock(&tick_broadcast_lock);
359 cpu = smp_processor_id();
360 bc = tick_broadcast_device.evtdev;
354 bc_stopped = cpumask_empty(tick_broadcast_mask); 361 bc_stopped = cpumask_empty(tick_broadcast_mask);
355 362
356 switch (*reason) { 363 switch (mode) {
357 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 364 case TICK_BROADCAST_FORCE:
358 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: 365 tick_broadcast_forced = 1;
366 case TICK_BROADCAST_ON:
359 cpumask_set_cpu(cpu, tick_broadcast_on); 367 cpumask_set_cpu(cpu, tick_broadcast_on);
360 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 368 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
361 if (tick_broadcast_device.mode == 369 if (tick_broadcast_device.mode ==
362 TICKDEV_MODE_PERIODIC) 370 TICKDEV_MODE_PERIODIC)
363 clockevents_shutdown(dev); 371 clockevents_shutdown(dev);
364 } 372 }
365 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
366 tick_broadcast_force = 1;
367 break; 373 break;
368 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 374
369 if (tick_broadcast_force) 375 case TICK_BROADCAST_OFF:
376 if (tick_broadcast_forced)
370 break; 377 break;
371 cpumask_clear_cpu(cpu, tick_broadcast_on); 378 cpumask_clear_cpu(cpu, tick_broadcast_on);
372 if (!tick_device_is_functional(dev)) 379 if (!tick_device_is_functional(dev))
@@ -388,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
388 else 395 else
389 tick_broadcast_setup_oneshot(bc); 396 tick_broadcast_setup_oneshot(bc);
390 } 397 }
391out: 398 raw_spin_unlock(&tick_broadcast_lock);
392 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
393}
394
395/*
396 * Powerstate information: The system enters/leaves a state, where
397 * affected devices might stop.
398 */
399void tick_broadcast_on_off(unsigned long reason, int *oncpu)
400{
401 if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
402 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
403 "offline CPU #%d\n", *oncpu);
404 else
405 tick_do_broadcast_on_off(&reason);
406} 399}
400EXPORT_SYMBOL_GPL(tick_broadcast_control);
407 401
408/* 402/*
409 * Set the periodic handler depending on broadcast on/off 403 * Set the periodic handler depending on broadcast on/off
@@ -416,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
416 dev->event_handler = tick_handle_periodic_broadcast; 410 dev->event_handler = tick_handle_periodic_broadcast;
417} 411}
418 412
413#ifdef CONFIG_HOTPLUG_CPU
419/* 414/*
420 * Remove a CPU from broadcasting 415 * Remove a CPU from broadcasting
421 */ 416 */
422void tick_shutdown_broadcast(unsigned int *cpup) 417void tick_shutdown_broadcast(unsigned int cpu)
423{ 418{
424 struct clock_event_device *bc; 419 struct clock_event_device *bc;
425 unsigned long flags; 420 unsigned long flags;
426 unsigned int cpu = *cpup;
427 421
428 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 422 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
429 423
@@ -438,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
438 432
439 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 433 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
440} 434}
435#endif
441 436
442void tick_suspend_broadcast(void) 437void tick_suspend_broadcast(void)
443{ 438{
@@ -453,38 +448,48 @@ void tick_suspend_broadcast(void)
453 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 448 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
454} 449}
455 450
456int tick_resume_broadcast(void) 451/*
452 * This is called from tick_resume_local() on a resuming CPU. That's
453 * called from the core resume function, tick_unfreeze() and the magic XEN
454 * resume hackery.
455 *
456 * In none of these cases the broadcast device mode can change and the
457 * bit of the resuming CPU in the broadcast mask is safe as well.
458 */
459bool tick_resume_check_broadcast(void)
460{
461 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
462 return false;
463 else
464 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
465}
466
467void tick_resume_broadcast(void)
457{ 468{
458 struct clock_event_device *bc; 469 struct clock_event_device *bc;
459 unsigned long flags; 470 unsigned long flags;
460 int broadcast = 0;
461 471
462 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 472 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
463 473
464 bc = tick_broadcast_device.evtdev; 474 bc = tick_broadcast_device.evtdev;
465 475
466 if (bc) { 476 if (bc) {
467 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); 477 clockevents_tick_resume(bc);
468 478
469 switch (tick_broadcast_device.mode) { 479 switch (tick_broadcast_device.mode) {
470 case TICKDEV_MODE_PERIODIC: 480 case TICKDEV_MODE_PERIODIC:
471 if (!cpumask_empty(tick_broadcast_mask)) 481 if (!cpumask_empty(tick_broadcast_mask))
472 tick_broadcast_start_periodic(bc); 482 tick_broadcast_start_periodic(bc);
473 broadcast = cpumask_test_cpu(smp_processor_id(),
474 tick_broadcast_mask);
475 break; 483 break;
476 case TICKDEV_MODE_ONESHOT: 484 case TICKDEV_MODE_ONESHOT:
477 if (!cpumask_empty(tick_broadcast_mask)) 485 if (!cpumask_empty(tick_broadcast_mask))
478 broadcast = tick_resume_broadcast_oneshot(bc); 486 tick_resume_broadcast_oneshot(bc);
479 break; 487 break;
480 } 488 }
481 } 489 }
482 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 490 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
483
484 return broadcast;
485} 491}
486 492
487
488#ifdef CONFIG_TICK_ONESHOT 493#ifdef CONFIG_TICK_ONESHOT
489 494
490static cpumask_var_t tick_broadcast_oneshot_mask; 495static cpumask_var_t tick_broadcast_oneshot_mask;
@@ -532,8 +537,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
532{ 537{
533 int ret; 538 int ret;
534 539
535 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) 540 if (bc->state != CLOCK_EVT_STATE_ONESHOT)
536 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 541 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
537 542
538 ret = clockevents_program_event(bc, expires, force); 543 ret = clockevents_program_event(bc, expires, force);
539 if (!ret) 544 if (!ret)
@@ -541,10 +546,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
541 return ret; 546 return ret;
542} 547}
543 548
544int tick_resume_broadcast_oneshot(struct clock_event_device *bc) 549static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
545{ 550{
546 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 551 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
547 return 0;
548} 552}
549 553
550/* 554/*
@@ -562,8 +566,8 @@ void tick_check_oneshot_broadcast_this_cpu(void)
562 * switched over, leave the device alone. 566 * switched over, leave the device alone.
563 */ 567 */
564 if (td->mode == TICKDEV_MODE_ONESHOT) { 568 if (td->mode == TICKDEV_MODE_ONESHOT) {
565 clockevents_set_mode(td->evtdev, 569 clockevents_set_state(td->evtdev,
566 CLOCK_EVT_MODE_ONESHOT); 570 CLOCK_EVT_STATE_ONESHOT);
567 } 571 }
568 } 572 }
569} 573}
@@ -666,31 +670,26 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
666 if (dev->next_event.tv64 < bc->next_event.tv64) 670 if (dev->next_event.tv64 < bc->next_event.tv64)
667 return; 671 return;
668 } 672 }
669 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 673 clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
670} 674}
671 675
672static void broadcast_move_bc(int deadcpu) 676/**
673{ 677 * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
674 struct clock_event_device *bc = tick_broadcast_device.evtdev; 678 * @state: The target state (enter/exit)
675 679 *
676 if (!bc || !broadcast_needs_cpu(bc, deadcpu)) 680 * The system enters/leaves a state, where affected devices might stop
677 return;
678 /* This moves the broadcast assignment to this cpu */
679 clockevents_program_event(bc, bc->next_event, 1);
680}
681
682/*
683 * Powerstate information: The system enters/leaves a state, where
684 * affected devices might stop
685 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. 681 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
682 *
683 * Called with interrupts disabled, so clockevents_lock is not
684 * required here because the local clock event device cannot go away
685 * under us.
686 */ 686 */
687int tick_broadcast_oneshot_control(unsigned long reason) 687int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
688{ 688{
689 struct clock_event_device *bc, *dev; 689 struct clock_event_device *bc, *dev;
690 struct tick_device *td; 690 struct tick_device *td;
691 unsigned long flags;
692 ktime_t now;
693 int cpu, ret = 0; 691 int cpu, ret = 0;
692 ktime_t now;
694 693
695 /* 694 /*
696 * Periodic mode does not care about the enter/exit of power 695 * Periodic mode does not care about the enter/exit of power
@@ -703,17 +702,17 @@ int tick_broadcast_oneshot_control(unsigned long reason)
703 * We are called with preemtion disabled from the depth of the 702 * We are called with preemtion disabled from the depth of the
704 * idle code, so we can't be moved away. 703 * idle code, so we can't be moved away.
705 */ 704 */
706 cpu = smp_processor_id(); 705 td = this_cpu_ptr(&tick_cpu_device);
707 td = &per_cpu(tick_cpu_device, cpu);
708 dev = td->evtdev; 706 dev = td->evtdev;
709 707
710 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 708 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
711 return 0; 709 return 0;
712 710
711 raw_spin_lock(&tick_broadcast_lock);
713 bc = tick_broadcast_device.evtdev; 712 bc = tick_broadcast_device.evtdev;
713 cpu = smp_processor_id();
714 714
715 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 715 if (state == TICK_BROADCAST_ENTER) {
716 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
717 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 716 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
718 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 717 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
719 broadcast_shutdown_local(bc, dev); 718 broadcast_shutdown_local(bc, dev);
@@ -741,7 +740,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
741 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 740 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
742 } else { 741 } else {
743 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 742 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
744 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 743 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
745 /* 744 /*
746 * The cpu which was handling the broadcast 745 * The cpu which was handling the broadcast
747 * timer marked this cpu in the broadcast 746 * timer marked this cpu in the broadcast
@@ -805,9 +804,10 @@ int tick_broadcast_oneshot_control(unsigned long reason)
805 } 804 }
806 } 805 }
807out: 806out:
808 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 807 raw_spin_unlock(&tick_broadcast_lock);
809 return ret; 808 return ret;
810} 809}
810EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
811 811
812/* 812/*
813 * Reset the one shot broadcast for a cpu 813 * Reset the one shot broadcast for a cpu
@@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
842 842
843 /* Set it up only once ! */ 843 /* Set it up only once ! */
844 if (bc->event_handler != tick_handle_oneshot_broadcast) { 844 if (bc->event_handler != tick_handle_oneshot_broadcast) {
845 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; 845 int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
846 846
847 bc->event_handler = tick_handle_oneshot_broadcast; 847 bc->event_handler = tick_handle_oneshot_broadcast;
848 848
@@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
858 tick_broadcast_oneshot_mask, tmpmask); 858 tick_broadcast_oneshot_mask, tmpmask);
859 859
860 if (was_periodic && !cpumask_empty(tmpmask)) { 860 if (was_periodic && !cpumask_empty(tmpmask)) {
861 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 861 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
862 tick_broadcast_init_next_event(tmpmask, 862 tick_broadcast_init_next_event(tmpmask,
863 tick_next_period); 863 tick_next_period);
864 tick_broadcast_set_event(bc, cpu, tick_next_period, 1); 864 tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
@@ -894,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void)
894 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 894 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
895} 895}
896 896
897#ifdef CONFIG_HOTPLUG_CPU
898void hotplug_cpu__broadcast_tick_pull(int deadcpu)
899{
900 struct clock_event_device *bc;
901 unsigned long flags;
902
903 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
904 bc = tick_broadcast_device.evtdev;
905
906 if (bc && broadcast_needs_cpu(bc, deadcpu)) {
907 /* This moves the broadcast assignment to this CPU: */
908 clockevents_program_event(bc, bc->next_event, 1);
909 }
910 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
911}
897 912
898/* 913/*
899 * Remove a dead CPU from broadcasting 914 * Remove a dead CPU from broadcasting
900 */ 915 */
901void tick_shutdown_broadcast_oneshot(unsigned int *cpup) 916void tick_shutdown_broadcast_oneshot(unsigned int cpu)
902{ 917{
903 unsigned long flags; 918 unsigned long flags;
904 unsigned int cpu = *cpup;
905 919
906 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 920 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
907 921
@@ -913,10 +927,9 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
913 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 927 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
914 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 928 cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
915 929
916 broadcast_move_bc(cpu);
917
918 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 930 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
919} 931}
932#endif
920 933
921/* 934/*
922 * Check, whether the broadcast device is in one shot mode 935 * Check, whether the broadcast device is in one shot mode
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index f7c515595b42..3ae6afa1eb98 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
102 102
103 tick_periodic(cpu); 103 tick_periodic(cpu);
104 104
105 if (dev->mode != CLOCK_EVT_MODE_ONESHOT) 105 if (dev->state != CLOCK_EVT_STATE_ONESHOT)
106 return; 106 return;
107 for (;;) { 107 for (;;) {
108 /* 108 /*
@@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
140 140
141 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && 141 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
142 !tick_broadcast_oneshot_active()) { 142 !tick_broadcast_oneshot_active()) {
143 clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); 143 clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
144 } else { 144 } else {
145 unsigned long seq; 145 unsigned long seq;
146 ktime_t next; 146 ktime_t next;
@@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
150 next = tick_next_period; 150 next = tick_next_period;
151 } while (read_seqretry(&jiffies_lock, seq)); 151 } while (read_seqretry(&jiffies_lock, seq));
152 152
153 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 153 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
154 154
155 for (;;) { 155 for (;;) {
156 if (!clockevents_program_event(dev, next, false)) 156 if (!clockevents_program_event(dev, next, false))
@@ -332,14 +332,16 @@ out_bc:
332 tick_install_broadcast_device(newdev); 332 tick_install_broadcast_device(newdev);
333} 333}
334 334
335#ifdef CONFIG_HOTPLUG_CPU
335/* 336/*
336 * Transfer the do_timer job away from a dying cpu. 337 * Transfer the do_timer job away from a dying cpu.
337 * 338 *
338 * Called with interrupts disabled. 339 * Called with interrupts disabled. Not locking required. If
340 * tick_do_timer_cpu is owned by this cpu, nothing can change it.
339 */ 341 */
340void tick_handover_do_timer(int *cpup) 342void tick_handover_do_timer(void)
341{ 343{
342 if (*cpup == tick_do_timer_cpu) { 344 if (tick_do_timer_cpu == smp_processor_id()) {
343 int cpu = cpumask_first(cpu_online_mask); 345 int cpu = cpumask_first(cpu_online_mask);
344 346
345 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : 347 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
@@ -354,9 +356,9 @@ void tick_handover_do_timer(int *cpup)
354 * access the hardware device itself. 356 * access the hardware device itself.
355 * We just set the mode and remove it from the lists. 357 * We just set the mode and remove it from the lists.
356 */ 358 */
357void tick_shutdown(unsigned int *cpup) 359void tick_shutdown(unsigned int cpu)
358{ 360{
359 struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); 361 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
360 struct clock_event_device *dev = td->evtdev; 362 struct clock_event_device *dev = td->evtdev;
361 363
362 td->mode = TICKDEV_MODE_PERIODIC; 364 td->mode = TICKDEV_MODE_PERIODIC;
@@ -365,27 +367,42 @@ void tick_shutdown(unsigned int *cpup)
365 * Prevent that the clock events layer tries to call 367 * Prevent that the clock events layer tries to call
366 * the set mode function! 368 * the set mode function!
367 */ 369 */
370 dev->state = CLOCK_EVT_STATE_DETACHED;
368 dev->mode = CLOCK_EVT_MODE_UNUSED; 371 dev->mode = CLOCK_EVT_MODE_UNUSED;
369 clockevents_exchange_device(dev, NULL); 372 clockevents_exchange_device(dev, NULL);
370 dev->event_handler = clockevents_handle_noop; 373 dev->event_handler = clockevents_handle_noop;
371 td->evtdev = NULL; 374 td->evtdev = NULL;
372 } 375 }
373} 376}
377#endif
374 378
375void tick_suspend(void) 379/**
380 * tick_suspend_local - Suspend the local tick device
381 *
382 * Called from the local cpu for freeze with interrupts disabled.
383 *
384 * No locks required. Nothing can change the per cpu device.
385 */
386void tick_suspend_local(void)
376{ 387{
377 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 388 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
378 389
379 clockevents_shutdown(td->evtdev); 390 clockevents_shutdown(td->evtdev);
380} 391}
381 392
382void tick_resume(void) 393/**
394 * tick_resume_local - Resume the local tick device
395 *
396 * Called from the local CPU for unfreeze or XEN resume magic.
397 *
398 * No locks required. Nothing can change the per cpu device.
399 */
400void tick_resume_local(void)
383{ 401{
384 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 402 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
385 int broadcast = tick_resume_broadcast(); 403 bool broadcast = tick_resume_check_broadcast();
386
387 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
388 404
405 clockevents_tick_resume(td->evtdev);
389 if (!broadcast) { 406 if (!broadcast) {
390 if (td->mode == TICKDEV_MODE_PERIODIC) 407 if (td->mode == TICKDEV_MODE_PERIODIC)
391 tick_setup_periodic(td->evtdev, 0); 408 tick_setup_periodic(td->evtdev, 0);
@@ -394,6 +411,35 @@ void tick_resume(void)
394 } 411 }
395} 412}
396 413
414/**
415 * tick_suspend - Suspend the tick and the broadcast device
416 *
417 * Called from syscore_suspend() via timekeeping_suspend with only one
418 * CPU online and interrupts disabled or from tick_unfreeze() under
419 * tick_freeze_lock.
420 *
421 * No locks required. Nothing can change the per cpu device.
422 */
423void tick_suspend(void)
424{
425 tick_suspend_local();
426 tick_suspend_broadcast();
427}
428
429/**
430 * tick_resume - Resume the tick and the broadcast device
431 *
432 * Called from syscore_resume() via timekeeping_resume with only one
433 * CPU online and interrupts disabled.
434 *
435 * No locks required. Nothing can change the per cpu device.
436 */
437void tick_resume(void)
438{
439 tick_resume_broadcast();
440 tick_resume_local();
441}
442
397static DEFINE_RAW_SPINLOCK(tick_freeze_lock); 443static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
398static unsigned int tick_freeze_depth; 444static unsigned int tick_freeze_depth;
399 445
@@ -411,12 +457,10 @@ void tick_freeze(void)
411 raw_spin_lock(&tick_freeze_lock); 457 raw_spin_lock(&tick_freeze_lock);
412 458
413 tick_freeze_depth++; 459 tick_freeze_depth++;
414 if (tick_freeze_depth == num_online_cpus()) { 460 if (tick_freeze_depth == num_online_cpus())
415 timekeeping_suspend(); 461 timekeeping_suspend();
416 } else { 462 else
417 tick_suspend(); 463 tick_suspend_local();
418 tick_suspend_broadcast();
419 }
420 464
421 raw_spin_unlock(&tick_freeze_lock); 465 raw_spin_unlock(&tick_freeze_lock);
422} 466}
@@ -437,7 +481,7 @@ void tick_unfreeze(void)
437 if (tick_freeze_depth == num_online_cpus()) 481 if (tick_freeze_depth == num_online_cpus())
438 timekeeping_resume(); 482 timekeeping_resume();
439 else 483 else
440 tick_resume(); 484 tick_resume_local();
441 485
442 tick_freeze_depth--; 486 tick_freeze_depth--;
443 487
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 366aeb4f2c66..b64fdd8054c5 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -5,15 +5,12 @@
5#include <linux/tick.h> 5#include <linux/tick.h>
6 6
7#include "timekeeping.h" 7#include "timekeeping.h"
8#include "tick-sched.h"
8 9
9extern seqlock_t jiffies_lock; 10#ifdef CONFIG_GENERIC_CLOCKEVENTS
10 11
11#define CS_NAME_LEN 32 12# define TICK_DO_TIMER_NONE -1
12 13# define TICK_DO_TIMER_BOOT -2
13#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
14
15#define TICK_DO_TIMER_NONE -1
16#define TICK_DO_TIMER_BOOT -2
17 14
18DECLARE_PER_CPU(struct tick_device, tick_cpu_device); 15DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
19extern ktime_t tick_next_period; 16extern ktime_t tick_next_period;
@@ -23,21 +20,72 @@ extern int tick_do_timer_cpu __read_mostly;
23extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); 20extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
24extern void tick_handle_periodic(struct clock_event_device *dev); 21extern void tick_handle_periodic(struct clock_event_device *dev);
25extern void tick_check_new_device(struct clock_event_device *dev); 22extern void tick_check_new_device(struct clock_event_device *dev);
26extern void tick_handover_do_timer(int *cpup); 23extern void tick_shutdown(unsigned int cpu);
27extern void tick_shutdown(unsigned int *cpup);
28extern void tick_suspend(void); 24extern void tick_suspend(void);
29extern void tick_resume(void); 25extern void tick_resume(void);
30extern bool tick_check_replacement(struct clock_event_device *curdev, 26extern bool tick_check_replacement(struct clock_event_device *curdev,
31 struct clock_event_device *newdev); 27 struct clock_event_device *newdev);
32extern void tick_install_replacement(struct clock_event_device *dev); 28extern void tick_install_replacement(struct clock_event_device *dev);
29extern int tick_is_oneshot_available(void);
30extern struct tick_device *tick_get_device(int cpu);
33 31
34extern void clockevents_shutdown(struct clock_event_device *dev); 32extern int clockevents_tick_resume(struct clock_event_device *dev);
33/* Check, if the device is functional or a dummy for broadcast */
34static inline int tick_device_is_functional(struct clock_event_device *dev)
35{
36 return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
37}
35 38
39extern void clockevents_shutdown(struct clock_event_device *dev);
40extern void clockevents_exchange_device(struct clock_event_device *old,
41 struct clock_event_device *new);
42extern void clockevents_set_state(struct clock_event_device *dev,
43 enum clock_event_state state);
44extern int clockevents_program_event(struct clock_event_device *dev,
45 ktime_t expires, bool force);
46extern void clockevents_handle_noop(struct clock_event_device *dev);
47extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
36extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); 48extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
37 49
38/* 50/* Broadcasting support */
39 * NO_HZ / high resolution timer shared code 51# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
40 */ 52extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
53extern void tick_install_broadcast_device(struct clock_event_device *dev);
54extern int tick_is_broadcast_device(struct clock_event_device *dev);
55extern void tick_shutdown_broadcast(unsigned int cpu);
56extern void tick_suspend_broadcast(void);
57extern void tick_resume_broadcast(void);
58extern bool tick_resume_check_broadcast(void);
59extern void tick_broadcast_init(void);
60extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
61extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
62extern struct tick_device *tick_get_broadcast_device(void);
63extern struct cpumask *tick_get_broadcast_mask(void);
64# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
65static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
66static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
67static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
68static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
69static inline void tick_shutdown_broadcast(unsigned int cpu) { }
70static inline void tick_suspend_broadcast(void) { }
71static inline void tick_resume_broadcast(void) { }
72static inline bool tick_resume_check_broadcast(void) { return false; }
73static inline void tick_broadcast_init(void) { }
74static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
75
76/* Set the periodic handler in non broadcast mode */
77static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
78{
79 dev->event_handler = tick_handle_periodic;
80}
81# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
82
83#else /* !GENERIC_CLOCKEVENTS: */
84static inline void tick_suspend(void) { }
85static inline void tick_resume(void) { }
86#endif /* !GENERIC_CLOCKEVENTS */
87
88/* Oneshot related functions */
41#ifdef CONFIG_TICK_ONESHOT 89#ifdef CONFIG_TICK_ONESHOT
42extern void tick_setup_oneshot(struct clock_event_device *newdev, 90extern void tick_setup_oneshot(struct clock_event_device *newdev,
43 void (*handler)(struct clock_event_device *), 91 void (*handler)(struct clock_event_device *),
@@ -46,58 +94,42 @@ extern int tick_program_event(ktime_t expires, int force);
46extern void tick_oneshot_notify(void); 94extern void tick_oneshot_notify(void);
47extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); 95extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
48extern void tick_resume_oneshot(void); 96extern void tick_resume_oneshot(void);
49# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 97static inline bool tick_oneshot_possible(void) { return true; }
98extern int tick_oneshot_mode_active(void);
99extern void tick_clock_notify(void);
100extern int tick_check_oneshot_change(int allow_nohz);
101extern int tick_init_highres(void);
102#else /* !CONFIG_TICK_ONESHOT: */
103static inline
104void tick_setup_oneshot(struct clock_event_device *newdev,
105 void (*handler)(struct clock_event_device *),
106 ktime_t nextevt) { BUG(); }
107static inline void tick_resume_oneshot(void) { BUG(); }
108static inline int tick_program_event(ktime_t expires, int force) { return 0; }
109static inline void tick_oneshot_notify(void) { }
110static inline bool tick_oneshot_possible(void) { return false; }
111static inline int tick_oneshot_mode_active(void) { return 0; }
112static inline void tick_clock_notify(void) { }
113static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
114#endif /* !CONFIG_TICK_ONESHOT */
115
116/* Functions related to oneshot broadcasting */
117#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
50extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); 118extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
51extern int tick_broadcast_oneshot_control(unsigned long reason);
52extern void tick_broadcast_switch_to_oneshot(void); 119extern void tick_broadcast_switch_to_oneshot(void);
53extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); 120extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
54extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
55extern int tick_broadcast_oneshot_active(void); 121extern int tick_broadcast_oneshot_active(void);
56extern void tick_check_oneshot_broadcast_this_cpu(void); 122extern void tick_check_oneshot_broadcast_this_cpu(void);
57bool tick_broadcast_oneshot_available(void); 123bool tick_broadcast_oneshot_available(void);
58# else /* BROADCAST */ 124extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
59static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 125#else /* !(BROADCAST && ONESHOT): */
60{ 126static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
61 BUG();
62}
63static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
64static inline void tick_broadcast_switch_to_oneshot(void) { } 127static inline void tick_broadcast_switch_to_oneshot(void) { }
65static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } 128static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
66static inline int tick_broadcast_oneshot_active(void) { return 0; } 129static inline int tick_broadcast_oneshot_active(void) { return 0; }
67static inline void tick_check_oneshot_broadcast_this_cpu(void) { } 130static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
68static inline bool tick_broadcast_oneshot_available(void) { return true; } 131static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
69# endif /* !BROADCAST */ 132#endif /* !(BROADCAST && ONESHOT) */
70
71#else /* !ONESHOT */
72static inline
73void tick_setup_oneshot(struct clock_event_device *newdev,
74 void (*handler)(struct clock_event_device *),
75 ktime_t nextevt)
76{
77 BUG();
78}
79static inline void tick_resume_oneshot(void)
80{
81 BUG();
82}
83static inline int tick_program_event(ktime_t expires, int force)
84{
85 return 0;
86}
87static inline void tick_oneshot_notify(void) { }
88static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
89{
90 BUG();
91}
92static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
93static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
94static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
95{
96 return 0;
97}
98static inline int tick_broadcast_oneshot_active(void) { return 0; }
99static inline bool tick_broadcast_oneshot_available(void) { return false; }
100#endif /* !TICK_ONESHOT */
101 133
102/* NO_HZ_FULL internal */ 134/* NO_HZ_FULL internal */
103#ifdef CONFIG_NO_HZ_FULL 135#ifdef CONFIG_NO_HZ_FULL
@@ -105,68 +137,3 @@ extern void tick_nohz_init(void);
105# else 137# else
106static inline void tick_nohz_init(void) { } 138static inline void tick_nohz_init(void) { }
107#endif 139#endif
108
109/*
110 * Broadcasting support
111 */
112#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
113extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
114extern void tick_install_broadcast_device(struct clock_event_device *dev);
115extern int tick_is_broadcast_device(struct clock_event_device *dev);
116extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
117extern void tick_shutdown_broadcast(unsigned int *cpup);
118extern void tick_suspend_broadcast(void);
119extern int tick_resume_broadcast(void);
120extern void tick_broadcast_init(void);
121extern void
122tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
123int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
124
125#else /* !BROADCAST */
126
127static inline void tick_install_broadcast_device(struct clock_event_device *dev)
128{
129}
130
131static inline int tick_is_broadcast_device(struct clock_event_device *dev)
132{
133 return 0;
134}
135static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
136 int cpu)
137{
138 return 0;
139}
140static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
141static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
142static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
143static inline void tick_suspend_broadcast(void) { }
144static inline int tick_resume_broadcast(void) { return 0; }
145static inline void tick_broadcast_init(void) { }
146static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
147 u32 freq) { return -ENODEV; }
148
149/*
150 * Set the periodic handler in non broadcast mode
151 */
152static inline void tick_set_periodic_handler(struct clock_event_device *dev,
153 int broadcast)
154{
155 dev->event_handler = tick_handle_periodic;
156}
157#endif /* !BROADCAST */
158
159/*
160 * Check, if the device is functional or a dummy for broadcast
161 */
162static inline int tick_device_is_functional(struct clock_event_device *dev)
163{
164 return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
165}
166
167int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
168
169#endif
170
171extern void do_timer(unsigned long ticks);
172extern void update_wall_time(void);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 7ce740e78e1b..67a64b1670bf 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -38,7 +38,7 @@ void tick_resume_oneshot(void)
38{ 38{
39 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 39 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
40 40
41 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 41 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
42 clockevents_program_event(dev, ktime_get(), true); 42 clockevents_program_event(dev, ktime_get(), true);
43} 43}
44 44
@@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
50 ktime_t next_event) 50 ktime_t next_event)
51{ 51{
52 newdev->event_handler = handler; 52 newdev->event_handler = handler;
53 clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); 53 clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
54 clockevents_program_event(newdev, next_event, true); 54 clockevents_program_event(newdev, next_event, true);
55} 55}
56 56
@@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
81 81
82 td->mode = TICKDEV_MODE_ONESHOT; 82 td->mode = TICKDEV_MODE_ONESHOT;
83 dev->event_handler = handler; 83 dev->event_handler = handler;
84 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 84 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
85 tick_broadcast_switch_to_oneshot(); 85 tick_broadcast_switch_to_oneshot();
86 return 0; 86 return 0;
87} 87}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a4c4edac4528..914259128145 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -34,7 +34,7 @@
34/* 34/*
35 * Per cpu nohz control structure 35 * Per cpu nohz control structure
36 */ 36 */
37DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); 37static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
38 38
39/* 39/*
40 * The time, when the last jiffy update happened. Protected by jiffies_lock. 40 * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str)
416 416
417__setup("nohz=", setup_tick_nohz); 417__setup("nohz=", setup_tick_nohz);
418 418
419int tick_nohz_tick_stopped(void)
420{
421 return __this_cpu_read(tick_cpu_sched.tick_stopped);
422}
423
419/** 424/**
420 * tick_nohz_update_jiffies - update jiffies when idle was interrupted 425 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
421 * 426 *
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
new file mode 100644
index 000000000000..28b5da3e1a17
--- /dev/null
+++ b/kernel/time/tick-sched.h
@@ -0,0 +1,74 @@
1#ifndef _TICK_SCHED_H
2#define _TICK_SCHED_H
3
4#include <linux/hrtimer.h>
5
6enum tick_device_mode {
7 TICKDEV_MODE_PERIODIC,
8 TICKDEV_MODE_ONESHOT,
9};
10
11struct tick_device {
12 struct clock_event_device *evtdev;
13 enum tick_device_mode mode;
14};
15
16enum tick_nohz_mode {
17 NOHZ_MODE_INACTIVE,
18 NOHZ_MODE_LOWRES,
19 NOHZ_MODE_HIGHRES,
20};
21
22/**
23 * struct tick_sched - sched tick emulation and no idle tick control/stats
24 * @sched_timer: hrtimer to schedule the periodic tick in high
25 * resolution mode
26 * @last_tick: Store the last tick expiry time when the tick
27 * timer is modified for nohz sleeps. This is necessary
28 * to resume the tick timer operation in the timeline
29 * when the CPU returns from nohz sleep.
30 * @tick_stopped: Indicator that the idle tick has been stopped
31 * @idle_jiffies: jiffies at the entry to idle for idle time accounting
32 * @idle_calls: Total number of idle calls
33 * @idle_sleeps: Number of idle calls, where the sched tick was stopped
34 * @idle_entrytime: Time when the idle call was entered
35 * @idle_waketime: Time when the idle was interrupted
36 * @idle_exittime: Time when the idle state was left
37 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
38 * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
39 * @sleep_length: Duration of the current idle sleep
40 * @do_timer_lst: CPU was the last one doing do_timer before going idle
41 */
42struct tick_sched {
43 struct hrtimer sched_timer;
44 unsigned long check_clocks;
45 enum tick_nohz_mode nohz_mode;
46 ktime_t last_tick;
47 int inidle;
48 int tick_stopped;
49 unsigned long idle_jiffies;
50 unsigned long idle_calls;
51 unsigned long idle_sleeps;
52 int idle_active;
53 ktime_t idle_entrytime;
54 ktime_t idle_waketime;
55 ktime_t idle_exittime;
56 ktime_t idle_sleeptime;
57 ktime_t iowait_sleeptime;
58 ktime_t sleep_length;
59 unsigned long last_jiffies;
60 unsigned long next_jiffies;
61 ktime_t idle_expires;
62 int do_timer_last;
63};
64
65extern struct tick_sched *tick_get_tick_sched(int cpu);
66
67extern void tick_setup_sched_timer(void);
68#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
69extern void tick_cancel_sched_timer(int cpu);
70#else
71static inline void tick_cancel_sched_timer(int cpu) { }
72#endif
73
74#endif
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db94136c10..946acb72179f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,17 +59,15 @@ struct tk_fast {
59}; 59};
60 60
61static struct tk_fast tk_fast_mono ____cacheline_aligned; 61static struct tk_fast tk_fast_mono ____cacheline_aligned;
62static struct tk_fast tk_fast_raw ____cacheline_aligned;
62 63
63/* flag for if timekeeping is suspended */ 64/* flag for if timekeeping is suspended */
64int __read_mostly timekeeping_suspended; 65int __read_mostly timekeeping_suspended;
65 66
66/* Flag for if there is a persistent clock on this platform */
67bool __read_mostly persistent_clock_exist = false;
68
69static inline void tk_normalize_xtime(struct timekeeper *tk) 67static inline void tk_normalize_xtime(struct timekeeper *tk)
70{ 68{
71 while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { 69 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
72 tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; 70 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
73 tk->xtime_sec++; 71 tk->xtime_sec++;
74 } 72 }
75} 73}
@@ -79,20 +77,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
79 struct timespec64 ts; 77 struct timespec64 ts;
80 78
81 ts.tv_sec = tk->xtime_sec; 79 ts.tv_sec = tk->xtime_sec;
82 ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); 80 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
83 return ts; 81 return ts;
84} 82}
85 83
86static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 84static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
87{ 85{
88 tk->xtime_sec = ts->tv_sec; 86 tk->xtime_sec = ts->tv_sec;
89 tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; 87 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
90} 88}
91 89
92static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 90static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
93{ 91{
94 tk->xtime_sec += ts->tv_sec; 92 tk->xtime_sec += ts->tv_sec;
95 tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; 93 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
96 tk_normalize_xtime(tk); 94 tk_normalize_xtime(tk);
97} 95}
98 96
@@ -118,6 +116,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
118 tk->offs_boot = ktime_add(tk->offs_boot, delta); 116 tk->offs_boot = ktime_add(tk->offs_boot, delta);
119} 117}
120 118
119#ifdef CONFIG_DEBUG_TIMEKEEPING
120#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
121/*
122 * These simple flag variables are managed
123 * without locks, which is racy, but ok since
124 * we don't really care about being super
125 * precise about how many events were seen,
126 * just that a problem was observed.
127 */
128static int timekeeping_underflow_seen;
129static int timekeeping_overflow_seen;
130
131/* last_warning is only modified under the timekeeping lock */
132static long timekeeping_last_warning;
133
134static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
135{
136
137 cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
138 const char *name = tk->tkr_mono.clock->name;
139
140 if (offset > max_cycles) {
141 printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
142 offset, name, max_cycles);
143 printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
144 } else {
145 if (offset > (max_cycles >> 1)) {
146 printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
147 offset, name, max_cycles >> 1);
148 printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
149 }
150 }
151
152 if (timekeeping_underflow_seen) {
153 if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
154 printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
155 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
156 printk_deferred(" Your kernel is probably still fine.\n");
157 timekeeping_last_warning = jiffies;
158 }
159 timekeeping_underflow_seen = 0;
160 }
161
162 if (timekeeping_overflow_seen) {
163 if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
164 printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
165 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
166 printk_deferred(" Your kernel is probably still fine.\n");
167 timekeeping_last_warning = jiffies;
168 }
169 timekeeping_overflow_seen = 0;
170 }
171}
172
173static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
174{
175 cycle_t now, last, mask, max, delta;
176 unsigned int seq;
177
178 /*
179 * Since we're called holding a seqlock, the data may shift
180 * under us while we're doing the calculation. This can cause
181 * false positives, since we'd note a problem but throw the
182 * results away. So nest another seqlock here to atomically
183 * grab the points we are checking with.
184 */
185 do {
186 seq = read_seqcount_begin(&tk_core.seq);
187 now = tkr->read(tkr->clock);
188 last = tkr->cycle_last;
189 mask = tkr->mask;
190 max = tkr->clock->max_cycles;
191 } while (read_seqcount_retry(&tk_core.seq, seq));
192
193 delta = clocksource_delta(now, last, mask);
194
195 /*
196 * Try to catch underflows by checking if we are seeing small
197 * mask-relative negative values.
198 */
199 if (unlikely((~delta & mask) < (mask >> 3))) {
200 timekeeping_underflow_seen = 1;
201 delta = 0;
202 }
203
204 /* Cap delta value to the max_cycles values to avoid mult overflows */
205 if (unlikely(delta > max)) {
206 timekeeping_overflow_seen = 1;
207 delta = tkr->clock->max_cycles;
208 }
209
210 return delta;
211}
212#else
213static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
214{
215}
216static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
217{
218 cycle_t cycle_now, delta;
219
220 /* read clocksource */
221 cycle_now = tkr->read(tkr->clock);
222
223 /* calculate the delta since the last update_wall_time */
224 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
225
226 return delta;
227}
228#endif
229
121/** 230/**
122 * tk_setup_internals - Set up internals to use clocksource clock. 231 * tk_setup_internals - Set up internals to use clocksource clock.
123 * 232 *
@@ -135,11 +244,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
135 u64 tmp, ntpinterval; 244 u64 tmp, ntpinterval;
136 struct clocksource *old_clock; 245 struct clocksource *old_clock;
137 246
138 old_clock = tk->tkr.clock; 247 old_clock = tk->tkr_mono.clock;
139 tk->tkr.clock = clock; 248 tk->tkr_mono.clock = clock;
140 tk->tkr.read = clock->read; 249 tk->tkr_mono.read = clock->read;
141 tk->tkr.mask = clock->mask; 250 tk->tkr_mono.mask = clock->mask;
142 tk->tkr.cycle_last = tk->tkr.read(clock); 251 tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
252
253 tk->tkr_raw.clock = clock;
254 tk->tkr_raw.read = clock->read;
255 tk->tkr_raw.mask = clock->mask;
256 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
143 257
144 /* Do the ns -> cycle conversion first, using original mult */ 258 /* Do the ns -> cycle conversion first, using original mult */
145 tmp = NTP_INTERVAL_LENGTH; 259 tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +277,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
163 if (old_clock) { 277 if (old_clock) {
164 int shift_change = clock->shift - old_clock->shift; 278 int shift_change = clock->shift - old_clock->shift;
165 if (shift_change < 0) 279 if (shift_change < 0)
166 tk->tkr.xtime_nsec >>= -shift_change; 280 tk->tkr_mono.xtime_nsec >>= -shift_change;
167 else 281 else
168 tk->tkr.xtime_nsec <<= shift_change; 282 tk->tkr_mono.xtime_nsec <<= shift_change;
169 } 283 }
170 tk->tkr.shift = clock->shift; 284 tk->tkr_raw.xtime_nsec = 0;
285
286 tk->tkr_mono.shift = clock->shift;
287 tk->tkr_raw.shift = clock->shift;
171 288
172 tk->ntp_error = 0; 289 tk->ntp_error = 0;
173 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 290 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +295,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
178 * active clocksource. These value will be adjusted via NTP 295 * active clocksource. These value will be adjusted via NTP
179 * to counteract clock drifting. 296 * to counteract clock drifting.
180 */ 297 */
181 tk->tkr.mult = clock->mult; 298 tk->tkr_mono.mult = clock->mult;
299 tk->tkr_raw.mult = clock->mult;
182 tk->ntp_err_mult = 0; 300 tk->ntp_err_mult = 0;
183} 301}
184 302
@@ -193,14 +311,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
193 311
194static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) 312static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
195{ 313{
196 cycle_t cycle_now, delta; 314 cycle_t delta;
197 s64 nsec; 315 s64 nsec;
198 316
199 /* read clocksource: */ 317 delta = timekeeping_get_delta(tkr);
200 cycle_now = tkr->read(tkr->clock);
201
202 /* calculate the delta since the last update_wall_time: */
203 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
204 318
205 nsec = delta * tkr->mult + tkr->xtime_nsec; 319 nsec = delta * tkr->mult + tkr->xtime_nsec;
206 nsec >>= tkr->shift; 320 nsec >>= tkr->shift;
@@ -209,25 +323,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
209 return nsec + arch_gettimeoffset(); 323 return nsec + arch_gettimeoffset();
210} 324}
211 325
212static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
213{
214 struct clocksource *clock = tk->tkr.clock;
215 cycle_t cycle_now, delta;
216 s64 nsec;
217
218 /* read clocksource: */
219 cycle_now = tk->tkr.read(clock);
220
221 /* calculate the delta since the last update_wall_time: */
222 delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
223
224 /* convert delta to nanoseconds. */
225 nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
226
227 /* If arch requires, add in get_arch_timeoffset() */
228 return nsec + arch_gettimeoffset();
229}
230
231/** 326/**
232 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 327 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
233 * @tkr: Timekeeping readout base from which we take the update 328 * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +362,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
267 * slightly wrong timestamp (a few nanoseconds). See 362 * slightly wrong timestamp (a few nanoseconds). See
268 * @ktime_get_mono_fast_ns. 363 * @ktime_get_mono_fast_ns.
269 */ 364 */
270static void update_fast_timekeeper(struct tk_read_base *tkr) 365static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
271{ 366{
272 struct tk_read_base *base = tk_fast_mono.base; 367 struct tk_read_base *base = tkf->base;
273 368
274 /* Force readers off to base[1] */ 369 /* Force readers off to base[1] */
275 raw_write_seqcount_latch(&tk_fast_mono.seq); 370 raw_write_seqcount_latch(&tkf->seq);
276 371
277 /* Update base[0] */ 372 /* Update base[0] */
278 memcpy(base, tkr, sizeof(*base)); 373 memcpy(base, tkr, sizeof(*base));
279 374
280 /* Force readers back to base[0] */ 375 /* Force readers back to base[0] */
281 raw_write_seqcount_latch(&tk_fast_mono.seq); 376 raw_write_seqcount_latch(&tkf->seq);
282 377
283 /* Update base[1] */ 378 /* Update base[1] */
284 memcpy(base + 1, base, sizeof(*base)); 379 memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +411,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
316 * of the following timestamps. Callers need to be aware of that and 411 * of the following timestamps. Callers need to be aware of that and
317 * deal with it. 412 * deal with it.
318 */ 413 */
319u64 notrace ktime_get_mono_fast_ns(void) 414static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
320{ 415{
321 struct tk_read_base *tkr; 416 struct tk_read_base *tkr;
322 unsigned int seq; 417 unsigned int seq;
323 u64 now; 418 u64 now;
324 419
325 do { 420 do {
326 seq = raw_read_seqcount(&tk_fast_mono.seq); 421 seq = raw_read_seqcount(&tkf->seq);
327 tkr = tk_fast_mono.base + (seq & 0x01); 422 tkr = tkf->base + (seq & 0x01);
328 now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); 423 now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
424 } while (read_seqcount_retry(&tkf->seq, seq));
329 425
330 } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
331 return now; 426 return now;
332} 427}
428
429u64 ktime_get_mono_fast_ns(void)
430{
431 return __ktime_get_fast_ns(&tk_fast_mono);
432}
333EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 433EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
334 434
435u64 ktime_get_raw_fast_ns(void)
436{
437 return __ktime_get_fast_ns(&tk_fast_raw);
438}
439EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
440
335/* Suspend-time cycles value for halted fast timekeeper. */ 441/* Suspend-time cycles value for halted fast timekeeper. */
336static cycle_t cycles_at_suspend; 442static cycle_t cycles_at_suspend;
337 443
@@ -353,12 +459,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
353static void halt_fast_timekeeper(struct timekeeper *tk) 459static void halt_fast_timekeeper(struct timekeeper *tk)
354{ 460{
355 static struct tk_read_base tkr_dummy; 461 static struct tk_read_base tkr_dummy;
356 struct tk_read_base *tkr = &tk->tkr; 462 struct tk_read_base *tkr = &tk->tkr_mono;
357 463
358 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 464 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
359 cycles_at_suspend = tkr->read(tkr->clock); 465 cycles_at_suspend = tkr->read(tkr->clock);
360 tkr_dummy.read = dummy_clock_read; 466 tkr_dummy.read = dummy_clock_read;
361 update_fast_timekeeper(&tkr_dummy); 467 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
468
469 tkr = &tk->tkr_raw;
470 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
471 tkr_dummy.read = dummy_clock_read;
472 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
362} 473}
363 474
364#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD 475#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +480,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
369 480
370 xt = timespec64_to_timespec(tk_xtime(tk)); 481 xt = timespec64_to_timespec(tk_xtime(tk));
371 wm = timespec64_to_timespec(tk->wall_to_monotonic); 482 wm = timespec64_to_timespec(tk->wall_to_monotonic);
372 update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult, 483 update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
373 tk->tkr.cycle_last); 484 tk->tkr_mono.cycle_last);
374} 485}
375 486
376static inline void old_vsyscall_fixup(struct timekeeper *tk) 487static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +498,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
387 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD 498 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
388 * users are removed, this can be killed. 499 * users are removed, this can be killed.
389 */ 500 */
390 remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); 501 remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
391 tk->tkr.xtime_nsec -= remainder; 502 tk->tkr_mono.xtime_nsec -= remainder;
392 tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; 503 tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
393 tk->ntp_error += remainder << tk->ntp_error_shift; 504 tk->ntp_error += remainder << tk->ntp_error_shift;
394 tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; 505 tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
395} 506}
396#else 507#else
397#define old_vsyscall_fixup(tk) 508#define old_vsyscall_fixup(tk)
@@ -456,17 +567,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
456 */ 567 */
457 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 568 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
458 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 569 nsec = (u32) tk->wall_to_monotonic.tv_nsec;
459 tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 570 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
460 571
461 /* Update the monotonic raw base */ 572 /* Update the monotonic raw base */
462 tk->base_raw = timespec64_to_ktime(tk->raw_time); 573 tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
463 574
464 /* 575 /*
465 * The sum of the nanoseconds portions of xtime and 576 * The sum of the nanoseconds portions of xtime and
466 * wall_to_monotonic can be greater/equal one second. Take 577 * wall_to_monotonic can be greater/equal one second. Take
467 * this into account before updating tk->ktime_sec. 578 * this into account before updating tk->ktime_sec.
468 */ 579 */
469 nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); 580 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
470 if (nsec >= NSEC_PER_SEC) 581 if (nsec >= NSEC_PER_SEC)
471 seconds++; 582 seconds++;
472 tk->ktime_sec = seconds; 583 tk->ktime_sec = seconds;
@@ -489,7 +600,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
489 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 600 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
490 sizeof(tk_core.timekeeper)); 601 sizeof(tk_core.timekeeper));
491 602
492 update_fast_timekeeper(&tk->tkr); 603 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
604 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
493} 605}
494 606
495/** 607/**
@@ -501,22 +613,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
501 */ 613 */
502static void timekeeping_forward_now(struct timekeeper *tk) 614static void timekeeping_forward_now(struct timekeeper *tk)
503{ 615{
504 struct clocksource *clock = tk->tkr.clock; 616 struct clocksource *clock = tk->tkr_mono.clock;
505 cycle_t cycle_now, delta; 617 cycle_t cycle_now, delta;
506 s64 nsec; 618 s64 nsec;
507 619
508 cycle_now = tk->tkr.read(clock); 620 cycle_now = tk->tkr_mono.read(clock);
509 delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); 621 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
510 tk->tkr.cycle_last = cycle_now; 622 tk->tkr_mono.cycle_last = cycle_now;
623 tk->tkr_raw.cycle_last = cycle_now;
511 624
512 tk->tkr.xtime_nsec += delta * tk->tkr.mult; 625 tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
513 626
514 /* If arch requires, add in get_arch_timeoffset() */ 627 /* If arch requires, add in get_arch_timeoffset() */
515 tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; 628 tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
516 629
517 tk_normalize_xtime(tk); 630 tk_normalize_xtime(tk);
518 631
519 nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); 632 nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
520 timespec64_add_ns(&tk->raw_time, nsec); 633 timespec64_add_ns(&tk->raw_time, nsec);
521} 634}
522 635
@@ -537,7 +650,7 @@ int __getnstimeofday64(struct timespec64 *ts)
537 seq = read_seqcount_begin(&tk_core.seq); 650 seq = read_seqcount_begin(&tk_core.seq);
538 651
539 ts->tv_sec = tk->xtime_sec; 652 ts->tv_sec = tk->xtime_sec;
540 nsecs = timekeeping_get_ns(&tk->tkr); 653 nsecs = timekeeping_get_ns(&tk->tkr_mono);
541 654
542 } while (read_seqcount_retry(&tk_core.seq, seq)); 655 } while (read_seqcount_retry(&tk_core.seq, seq));
543 656
@@ -577,8 +690,8 @@ ktime_t ktime_get(void)
577 690
578 do { 691 do {
579 seq = read_seqcount_begin(&tk_core.seq); 692 seq = read_seqcount_begin(&tk_core.seq);
580 base = tk->tkr.base_mono; 693 base = tk->tkr_mono.base;
581 nsecs = timekeeping_get_ns(&tk->tkr); 694 nsecs = timekeeping_get_ns(&tk->tkr_mono);
582 695
583 } while (read_seqcount_retry(&tk_core.seq, seq)); 696 } while (read_seqcount_retry(&tk_core.seq, seq));
584 697
@@ -603,8 +716,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
603 716
604 do { 717 do {
605 seq = read_seqcount_begin(&tk_core.seq); 718 seq = read_seqcount_begin(&tk_core.seq);
606 base = ktime_add(tk->tkr.base_mono, *offset); 719 base = ktime_add(tk->tkr_mono.base, *offset);
607 nsecs = timekeeping_get_ns(&tk->tkr); 720 nsecs = timekeeping_get_ns(&tk->tkr_mono);
608 721
609 } while (read_seqcount_retry(&tk_core.seq, seq)); 722 } while (read_seqcount_retry(&tk_core.seq, seq));
610 723
@@ -645,8 +758,8 @@ ktime_t ktime_get_raw(void)
645 758
646 do { 759 do {
647 seq = read_seqcount_begin(&tk_core.seq); 760 seq = read_seqcount_begin(&tk_core.seq);
648 base = tk->base_raw; 761 base = tk->tkr_raw.base;
649 nsecs = timekeeping_get_ns_raw(tk); 762 nsecs = timekeeping_get_ns(&tk->tkr_raw);
650 763
651 } while (read_seqcount_retry(&tk_core.seq, seq)); 764 } while (read_seqcount_retry(&tk_core.seq, seq));
652 765
@@ -674,7 +787,7 @@ void ktime_get_ts64(struct timespec64 *ts)
674 do { 787 do {
675 seq = read_seqcount_begin(&tk_core.seq); 788 seq = read_seqcount_begin(&tk_core.seq);
676 ts->tv_sec = tk->xtime_sec; 789 ts->tv_sec = tk->xtime_sec;
677 nsec = timekeeping_get_ns(&tk->tkr); 790 nsec = timekeeping_get_ns(&tk->tkr_mono);
678 tomono = tk->wall_to_monotonic; 791 tomono = tk->wall_to_monotonic;
679 792
680 } while (read_seqcount_retry(&tk_core.seq, seq)); 793 } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +872,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
759 ts_real->tv_sec = tk->xtime_sec; 872 ts_real->tv_sec = tk->xtime_sec;
760 ts_real->tv_nsec = 0; 873 ts_real->tv_nsec = 0;
761 874
762 nsecs_raw = timekeeping_get_ns_raw(tk); 875 nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
763 nsecs_real = timekeeping_get_ns(&tk->tkr); 876 nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
764 877
765 } while (read_seqcount_retry(&tk_core.seq, seq)); 878 } while (read_seqcount_retry(&tk_core.seq, seq));
766 879
@@ -943,7 +1056,7 @@ static int change_clocksource(void *data)
943 */ 1056 */
944 if (try_module_get(new->owner)) { 1057 if (try_module_get(new->owner)) {
945 if (!new->enable || new->enable(new) == 0) { 1058 if (!new->enable || new->enable(new) == 0) {
946 old = tk->tkr.clock; 1059 old = tk->tkr_mono.clock;
947 tk_setup_internals(tk, new); 1060 tk_setup_internals(tk, new);
948 if (old->disable) 1061 if (old->disable)
949 old->disable(old); 1062 old->disable(old);
@@ -971,11 +1084,11 @@ int timekeeping_notify(struct clocksource *clock)
971{ 1084{
972 struct timekeeper *tk = &tk_core.timekeeper; 1085 struct timekeeper *tk = &tk_core.timekeeper;
973 1086
974 if (tk->tkr.clock == clock) 1087 if (tk->tkr_mono.clock == clock)
975 return 0; 1088 return 0;
976 stop_machine(change_clocksource, clock, NULL); 1089 stop_machine(change_clocksource, clock, NULL);
977 tick_clock_notify(); 1090 tick_clock_notify();
978 return tk->tkr.clock == clock ? 0 : -1; 1091 return tk->tkr_mono.clock == clock ? 0 : -1;
979} 1092}
980 1093
981/** 1094/**
@@ -993,7 +1106,7 @@ void getrawmonotonic64(struct timespec64 *ts)
993 1106
994 do { 1107 do {
995 seq = read_seqcount_begin(&tk_core.seq); 1108 seq = read_seqcount_begin(&tk_core.seq);
996 nsecs = timekeeping_get_ns_raw(tk); 1109 nsecs = timekeeping_get_ns(&tk->tkr_raw);
997 ts64 = tk->raw_time; 1110 ts64 = tk->raw_time;
998 1111
999 } while (read_seqcount_retry(&tk_core.seq, seq)); 1112 } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1129,7 @@ int timekeeping_valid_for_hres(void)
1016 do { 1129 do {
1017 seq = read_seqcount_begin(&tk_core.seq); 1130 seq = read_seqcount_begin(&tk_core.seq);
1018 1131
1019 ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1132 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
1020 1133
1021 } while (read_seqcount_retry(&tk_core.seq, seq)); 1134 } while (read_seqcount_retry(&tk_core.seq, seq));
1022 1135
@@ -1035,7 +1148,7 @@ u64 timekeeping_max_deferment(void)
1035 do { 1148 do {
1036 seq = read_seqcount_begin(&tk_core.seq); 1149 seq = read_seqcount_begin(&tk_core.seq);
1037 1150
1038 ret = tk->tkr.clock->max_idle_ns; 1151 ret = tk->tkr_mono.clock->max_idle_ns;
1039 1152
1040 } while (read_seqcount_retry(&tk_core.seq, seq)); 1153 } while (read_seqcount_retry(&tk_core.seq, seq));
1041 1154
@@ -1057,6 +1170,14 @@ void __weak read_persistent_clock(struct timespec *ts)
1057 ts->tv_nsec = 0; 1170 ts->tv_nsec = 0;
1058} 1171}
1059 1172
1173void __weak read_persistent_clock64(struct timespec64 *ts64)
1174{
1175 struct timespec ts;
1176
1177 read_persistent_clock(&ts);
1178 *ts64 = timespec_to_timespec64(ts);
1179}
1180
1060/** 1181/**
1061 * read_boot_clock - Return time of the system start. 1182 * read_boot_clock - Return time of the system start.
1062 * 1183 *
@@ -1072,6 +1193,20 @@ void __weak read_boot_clock(struct timespec *ts)
1072 ts->tv_nsec = 0; 1193 ts->tv_nsec = 0;
1073} 1194}
1074 1195
1196void __weak read_boot_clock64(struct timespec64 *ts64)
1197{
1198 struct timespec ts;
1199
1200 read_boot_clock(&ts);
1201 *ts64 = timespec_to_timespec64(ts);
1202}
1203
1204/* Flag for if timekeeping_resume() has injected sleeptime */
1205static bool sleeptime_injected;
1206
1207/* Flag for if there is a persistent clock on this platform */
1208static bool persistent_clock_exists;
1209
1075/* 1210/*
1076 * timekeeping_init - Initializes the clocksource and common timekeeping values 1211 * timekeeping_init - Initializes the clocksource and common timekeeping values
1077 */ 1212 */
@@ -1081,20 +1216,17 @@ void __init timekeeping_init(void)
1081 struct clocksource *clock; 1216 struct clocksource *clock;
1082 unsigned long flags; 1217 unsigned long flags;
1083 struct timespec64 now, boot, tmp; 1218 struct timespec64 now, boot, tmp;
1084 struct timespec ts;
1085 1219
1086 read_persistent_clock(&ts); 1220 read_persistent_clock64(&now);
1087 now = timespec_to_timespec64(ts);
1088 if (!timespec64_valid_strict(&now)) { 1221 if (!timespec64_valid_strict(&now)) {
1089 pr_warn("WARNING: Persistent clock returned invalid value!\n" 1222 pr_warn("WARNING: Persistent clock returned invalid value!\n"
1090 " Check your CMOS/BIOS settings.\n"); 1223 " Check your CMOS/BIOS settings.\n");
1091 now.tv_sec = 0; 1224 now.tv_sec = 0;
1092 now.tv_nsec = 0; 1225 now.tv_nsec = 0;
1093 } else if (now.tv_sec || now.tv_nsec) 1226 } else if (now.tv_sec || now.tv_nsec)
1094 persistent_clock_exist = true; 1227 persistent_clock_exists = true;
1095 1228
1096 read_boot_clock(&ts); 1229 read_boot_clock64(&boot);
1097 boot = timespec_to_timespec64(ts);
1098 if (!timespec64_valid_strict(&boot)) { 1230 if (!timespec64_valid_strict(&boot)) {
1099 pr_warn("WARNING: Boot clock returned invalid value!\n" 1231 pr_warn("WARNING: Boot clock returned invalid value!\n"
1100 " Check your CMOS/BIOS settings.\n"); 1232 " Check your CMOS/BIOS settings.\n");
@@ -1114,7 +1246,6 @@ void __init timekeeping_init(void)
1114 tk_set_xtime(tk, &now); 1246 tk_set_xtime(tk, &now);
1115 tk->raw_time.tv_sec = 0; 1247 tk->raw_time.tv_sec = 0;
1116 tk->raw_time.tv_nsec = 0; 1248 tk->raw_time.tv_nsec = 0;
1117 tk->base_raw.tv64 = 0;
1118 if (boot.tv_sec == 0 && boot.tv_nsec == 0) 1249 if (boot.tv_sec == 0 && boot.tv_nsec == 0)
1119 boot = tk_xtime(tk); 1250 boot = tk_xtime(tk);
1120 1251
@@ -1127,7 +1258,7 @@ void __init timekeeping_init(void)
1127 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1258 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1128} 1259}
1129 1260
1130/* time in seconds when suspend began */ 1261/* time in seconds when suspend began for persistent clock */
1131static struct timespec64 timekeeping_suspend_time; 1262static struct timespec64 timekeeping_suspend_time;
1132 1263
1133/** 1264/**
@@ -1152,12 +1283,49 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
1152 tk_debug_account_sleep_time(delta); 1283 tk_debug_account_sleep_time(delta);
1153} 1284}
1154 1285
1286#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
1287/**
1288 * We have three kinds of time sources to use for sleep time
1289 * injection, the preference order is:
1290 * 1) non-stop clocksource
1291 * 2) persistent clock (ie: RTC accessible when irqs are off)
1292 * 3) RTC
1293 *
1294 * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
1295 * If system has neither 1) nor 2), 3) will be used finally.
1296 *
1297 *
1298 * If timekeeping has injected sleeptime via either 1) or 2),
1299 * 3) becomes needless, so in this case we don't need to call
1300 * rtc_resume(), and this is what timekeeping_rtc_skipresume()
1301 * means.
1302 */
1303bool timekeeping_rtc_skipresume(void)
1304{
1305 return sleeptime_injected;
1306}
1307
1308/**
1309 * 1) can be determined whether to use or not only when doing
1310 * timekeeping_resume() which is invoked after rtc_suspend(),
1311 * so we can't skip rtc_suspend() surely if system has 1).
1312 *
1313 * But if system has 2), 2) will definitely be used, so in this
1314 * case we don't need to call rtc_suspend(), and this is what
1315 * timekeeping_rtc_skipsuspend() means.
1316 */
1317bool timekeeping_rtc_skipsuspend(void)
1318{
1319 return persistent_clock_exists;
1320}
1321
1155/** 1322/**
1156 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values 1323 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
1157 * @delta: pointer to a timespec64 delta value 1324 * @delta: pointer to a timespec64 delta value
1158 * 1325 *
1159 * This hook is for architectures that cannot support read_persistent_clock 1326 * This hook is for architectures that cannot support read_persistent_clock64
1160 * because their RTC/persistent clock is only accessible when irqs are enabled. 1327 * because their RTC/persistent clock is only accessible when irqs are enabled.
1328 * and also don't have an effective nonstop clocksource.
1161 * 1329 *
1162 * This function should only be called by rtc_resume(), and allows 1330 * This function should only be called by rtc_resume(), and allows
1163 * a suspend offset to be injected into the timekeeping values. 1331 * a suspend offset to be injected into the timekeeping values.
@@ -1167,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
1167 struct timekeeper *tk = &tk_core.timekeeper; 1335 struct timekeeper *tk = &tk_core.timekeeper;
1168 unsigned long flags; 1336 unsigned long flags;
1169 1337
1170 /*
1171 * Make sure we don't set the clock twice, as timekeeping_resume()
1172 * already did it
1173 */
1174 if (has_persistent_clock())
1175 return;
1176
1177 raw_spin_lock_irqsave(&timekeeper_lock, flags); 1338 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1178 write_seqcount_begin(&tk_core.seq); 1339 write_seqcount_begin(&tk_core.seq);
1179 1340
@@ -1189,26 +1350,21 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
1189 /* signal hrtimers about time change */ 1350 /* signal hrtimers about time change */
1190 clock_was_set(); 1351 clock_was_set();
1191} 1352}
1353#endif
1192 1354
1193/** 1355/**
1194 * timekeeping_resume - Resumes the generic timekeeping subsystem. 1356 * timekeeping_resume - Resumes the generic timekeeping subsystem.
1195 *
1196 * This is for the generic clocksource timekeeping.
1197 * xtime/wall_to_monotonic/jiffies/etc are
1198 * still managed by arch specific suspend/resume code.
1199 */ 1357 */
1200void timekeeping_resume(void) 1358void timekeeping_resume(void)
1201{ 1359{
1202 struct timekeeper *tk = &tk_core.timekeeper; 1360 struct timekeeper *tk = &tk_core.timekeeper;
1203 struct clocksource *clock = tk->tkr.clock; 1361 struct clocksource *clock = tk->tkr_mono.clock;
1204 unsigned long flags; 1362 unsigned long flags;
1205 struct timespec64 ts_new, ts_delta; 1363 struct timespec64 ts_new, ts_delta;
1206 struct timespec tmp;
1207 cycle_t cycle_now, cycle_delta; 1364 cycle_t cycle_now, cycle_delta;
1208 bool suspendtime_found = false;
1209 1365
1210 read_persistent_clock(&tmp); 1366 sleeptime_injected = false;
1211 ts_new = timespec_to_timespec64(tmp); 1367 read_persistent_clock64(&ts_new);
1212 1368
1213 clockevents_resume(); 1369 clockevents_resume();
1214 clocksource_resume(); 1370 clocksource_resume();
@@ -1228,16 +1384,16 @@ void timekeeping_resume(void)
1228 * The less preferred source will only be tried if there is no better 1384 * The less preferred source will only be tried if there is no better
1229 * usable source. The rtc part is handled separately in rtc core code. 1385 * usable source. The rtc part is handled separately in rtc core code.
1230 */ 1386 */
1231 cycle_now = tk->tkr.read(clock); 1387 cycle_now = tk->tkr_mono.read(clock);
1232 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1388 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
1233 cycle_now > tk->tkr.cycle_last) { 1389 cycle_now > tk->tkr_mono.cycle_last) {
1234 u64 num, max = ULLONG_MAX; 1390 u64 num, max = ULLONG_MAX;
1235 u32 mult = clock->mult; 1391 u32 mult = clock->mult;
1236 u32 shift = clock->shift; 1392 u32 shift = clock->shift;
1237 s64 nsec = 0; 1393 s64 nsec = 0;
1238 1394
1239 cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, 1395 cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
1240 tk->tkr.mask); 1396 tk->tkr_mono.mask);
1241 1397
1242 /* 1398 /*
1243 * "cycle_delta * mutl" may cause 64 bits overflow, if the 1399 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1253,17 +1409,19 @@ void timekeeping_resume(void)
1253 nsec += ((u64) cycle_delta * mult) >> shift; 1409 nsec += ((u64) cycle_delta * mult) >> shift;
1254 1410
1255 ts_delta = ns_to_timespec64(nsec); 1411 ts_delta = ns_to_timespec64(nsec);
1256 suspendtime_found = true; 1412 sleeptime_injected = true;
1257 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { 1413 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
1258 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); 1414 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
1259 suspendtime_found = true; 1415 sleeptime_injected = true;
1260 } 1416 }
1261 1417
1262 if (suspendtime_found) 1418 if (sleeptime_injected)
1263 __timekeeping_inject_sleeptime(tk, &ts_delta); 1419 __timekeeping_inject_sleeptime(tk, &ts_delta);
1264 1420
1265 /* Re-base the last cycle value */ 1421 /* Re-base the last cycle value */
1266 tk->tkr.cycle_last = cycle_now; 1422 tk->tkr_mono.cycle_last = cycle_now;
1423 tk->tkr_raw.cycle_last = cycle_now;
1424
1267 tk->ntp_error = 0; 1425 tk->ntp_error = 0;
1268 timekeeping_suspended = 0; 1426 timekeeping_suspended = 0;
1269 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); 1427 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1272,9 +1430,7 @@ void timekeeping_resume(void)
1272 1430
1273 touch_softlockup_watchdog(); 1431 touch_softlockup_watchdog();
1274 1432
1275 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); 1433 tick_resume();
1276
1277 /* Resume hrtimers */
1278 hrtimers_resume(); 1434 hrtimers_resume();
1279} 1435}
1280 1436
@@ -1284,10 +1440,8 @@ int timekeeping_suspend(void)
1284 unsigned long flags; 1440 unsigned long flags;
1285 struct timespec64 delta, delta_delta; 1441 struct timespec64 delta, delta_delta;
1286 static struct timespec64 old_delta; 1442 static struct timespec64 old_delta;
1287 struct timespec tmp;
1288 1443
1289 read_persistent_clock(&tmp); 1444 read_persistent_clock64(&timekeeping_suspend_time);
1290 timekeeping_suspend_time = timespec_to_timespec64(tmp);
1291 1445
1292 /* 1446 /*
1293 * On some systems the persistent_clock can not be detected at 1447 * On some systems the persistent_clock can not be detected at
@@ -1295,31 +1449,33 @@ int timekeeping_suspend(void)
1295 * value returned, update the persistent_clock_exists flag. 1449 * value returned, update the persistent_clock_exists flag.
1296 */ 1450 */
1297 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) 1451 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
1298 persistent_clock_exist = true; 1452 persistent_clock_exists = true;
1299 1453
1300 raw_spin_lock_irqsave(&timekeeper_lock, flags); 1454 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1301 write_seqcount_begin(&tk_core.seq); 1455 write_seqcount_begin(&tk_core.seq);
1302 timekeeping_forward_now(tk); 1456 timekeeping_forward_now(tk);
1303 timekeeping_suspended = 1; 1457 timekeeping_suspended = 1;
1304 1458
1305 /* 1459 if (persistent_clock_exists) {
1306 * To avoid drift caused by repeated suspend/resumes,
1307 * which each can add ~1 second drift error,
1308 * try to compensate so the difference in system time
1309 * and persistent_clock time stays close to constant.
1310 */
1311 delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
1312 delta_delta = timespec64_sub(delta, old_delta);
1313 if (abs(delta_delta.tv_sec) >= 2) {
1314 /* 1460 /*
1315 * if delta_delta is too large, assume time correction 1461 * To avoid drift caused by repeated suspend/resumes,
1316 * has occured and set old_delta to the current delta. 1462 * which each can add ~1 second drift error,
1463 * try to compensate so the difference in system time
1464 * and persistent_clock time stays close to constant.
1317 */ 1465 */
1318 old_delta = delta; 1466 delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
1319 } else { 1467 delta_delta = timespec64_sub(delta, old_delta);
1320 /* Otherwise try to adjust old_system to compensate */ 1468 if (abs(delta_delta.tv_sec) >= 2) {
1321 timekeeping_suspend_time = 1469 /*
1322 timespec64_add(timekeeping_suspend_time, delta_delta); 1470 * if delta_delta is too large, assume time correction
1471 * has occurred and set old_delta to the current delta.
1472 */
1473 old_delta = delta;
1474 } else {
1475 /* Otherwise try to adjust old_system to compensate */
1476 timekeeping_suspend_time =
1477 timespec64_add(timekeeping_suspend_time, delta_delta);
1478 }
1323 } 1479 }
1324 1480
1325 timekeeping_update(tk, TK_MIRROR); 1481 timekeeping_update(tk, TK_MIRROR);
@@ -1327,7 +1483,7 @@ int timekeeping_suspend(void)
1327 write_seqcount_end(&tk_core.seq); 1483 write_seqcount_end(&tk_core.seq);
1328 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1484 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1329 1485
1330 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 1486 tick_suspend();
1331 clocksource_suspend(); 1487 clocksource_suspend();
1332 clockevents_suspend(); 1488 clockevents_suspend();
1333 1489
@@ -1416,15 +1572,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
1416 * 1572 *
1417 * XXX - TODO: Doc ntp_error calculation. 1573 * XXX - TODO: Doc ntp_error calculation.
1418 */ 1574 */
1419 if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { 1575 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
1420 /* NTP adjustment caused clocksource mult overflow */ 1576 /* NTP adjustment caused clocksource mult overflow */
1421 WARN_ON_ONCE(1); 1577 WARN_ON_ONCE(1);
1422 return; 1578 return;
1423 } 1579 }
1424 1580
1425 tk->tkr.mult += mult_adj; 1581 tk->tkr_mono.mult += mult_adj;
1426 tk->xtime_interval += interval; 1582 tk->xtime_interval += interval;
1427 tk->tkr.xtime_nsec -= offset; 1583 tk->tkr_mono.xtime_nsec -= offset;
1428 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; 1584 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
1429} 1585}
1430 1586
@@ -1486,13 +1642,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1486 tk->ntp_err_mult = 0; 1642 tk->ntp_err_mult = 0;
1487 } 1643 }
1488 1644
1489 if (unlikely(tk->tkr.clock->maxadj && 1645 if (unlikely(tk->tkr_mono.clock->maxadj &&
1490 (abs(tk->tkr.mult - tk->tkr.clock->mult) 1646 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
1491 > tk->tkr.clock->maxadj))) { 1647 > tk->tkr_mono.clock->maxadj))) {
1492 printk_once(KERN_WARNING 1648 printk_once(KERN_WARNING
1493 "Adjusting %s more than 11%% (%ld vs %ld)\n", 1649 "Adjusting %s more than 11%% (%ld vs %ld)\n",
1494 tk->tkr.clock->name, (long)tk->tkr.mult, 1650 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
1495 (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); 1651 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
1496 } 1652 }
1497 1653
1498 /* 1654 /*
@@ -1509,9 +1665,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1509 * We'll correct this error next time through this function, when 1665 * We'll correct this error next time through this function, when
1510 * xtime_nsec is not as small. 1666 * xtime_nsec is not as small.
1511 */ 1667 */
1512 if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { 1668 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
1513 s64 neg = -(s64)tk->tkr.xtime_nsec; 1669 s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
1514 tk->tkr.xtime_nsec = 0; 1670 tk->tkr_mono.xtime_nsec = 0;
1515 tk->ntp_error += neg << tk->ntp_error_shift; 1671 tk->ntp_error += neg << tk->ntp_error_shift;
1516 } 1672 }
1517} 1673}
@@ -1526,13 +1682,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1526 */ 1682 */
1527static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 1683static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
1528{ 1684{
1529 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; 1685 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
1530 unsigned int clock_set = 0; 1686 unsigned int clock_set = 0;
1531 1687
1532 while (tk->tkr.xtime_nsec >= nsecps) { 1688 while (tk->tkr_mono.xtime_nsec >= nsecps) {
1533 int leap; 1689 int leap;
1534 1690
1535 tk->tkr.xtime_nsec -= nsecps; 1691 tk->tkr_mono.xtime_nsec -= nsecps;
1536 tk->xtime_sec++; 1692 tk->xtime_sec++;
1537 1693
1538 /* Figure out if its a leap sec and apply if needed */ 1694 /* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1733,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1577 1733
1578 /* Accumulate one shifted interval */ 1734 /* Accumulate one shifted interval */
1579 offset -= interval; 1735 offset -= interval;
1580 tk->tkr.cycle_last += interval; 1736 tk->tkr_mono.cycle_last += interval;
1737 tk->tkr_raw.cycle_last += interval;
1581 1738
1582 tk->tkr.xtime_nsec += tk->xtime_interval << shift; 1739 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
1583 *clock_set |= accumulate_nsecs_to_secs(tk); 1740 *clock_set |= accumulate_nsecs_to_secs(tk);
1584 1741
1585 /* Accumulate raw time */ 1742 /* Accumulate raw time */
@@ -1622,14 +1779,17 @@ void update_wall_time(void)
1622#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1779#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1623 offset = real_tk->cycle_interval; 1780 offset = real_tk->cycle_interval;
1624#else 1781#else
1625 offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), 1782 offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
1626 tk->tkr.cycle_last, tk->tkr.mask); 1783 tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
1627#endif 1784#endif
1628 1785
1629 /* Check if there's really nothing to do */ 1786 /* Check if there's really nothing to do */
1630 if (offset < real_tk->cycle_interval) 1787 if (offset < real_tk->cycle_interval)
1631 goto out; 1788 goto out;
1632 1789
1790 /* Do some additional sanity checking */
1791 timekeeping_check_update(real_tk, offset);
1792
1633 /* 1793 /*
1634 * With NO_HZ we may have to accumulate many cycle_intervals 1794 * With NO_HZ we may have to accumulate many cycle_intervals
1635 * (think "ticks") worth of time at once. To do this efficiently, 1795 * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1944,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
1784 do { 1944 do {
1785 seq = read_seqcount_begin(&tk_core.seq); 1945 seq = read_seqcount_begin(&tk_core.seq);
1786 1946
1787 base = tk->tkr.base_mono; 1947 base = tk->tkr_mono.base;
1788 nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; 1948 nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
1789 1949
1790 *offs_real = tk->offs_real; 1950 *offs_real = tk->offs_real;
1791 *offs_boot = tk->offs_boot; 1951 *offs_boot = tk->offs_boot;
@@ -1816,8 +1976,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
1816 do { 1976 do {
1817 seq = read_seqcount_begin(&tk_core.seq); 1977 seq = read_seqcount_begin(&tk_core.seq);
1818 1978
1819 base = tk->tkr.base_mono; 1979 base = tk->tkr_mono.base;
1820 nsecs = timekeeping_get_ns(&tk->tkr); 1980 nsecs = timekeeping_get_ns(&tk->tkr_mono);
1821 1981
1822 *offs_real = tk->offs_real; 1982 *offs_real = tk->offs_real;
1823 *offs_boot = tk->offs_boot; 1983 *offs_boot = tk->offs_boot;
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 1d91416055d5..ead8794b9a4e 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts);
19extern int timekeeping_suspend(void); 19extern int timekeeping_suspend(void);
20extern void timekeeping_resume(void); 20extern void timekeeping_resume(void);
21 21
22extern void do_timer(unsigned long ticks);
23extern void update_wall_time(void);
24
25extern seqlock_t jiffies_lock;
26
27#define CS_NAME_LEN 32
28
22#endif 29#endif
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d3f5c504939..2ece3aa5069c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -90,8 +90,18 @@ struct tvec_base {
90 struct tvec tv5; 90 struct tvec tv5;
91} ____cacheline_aligned; 91} ____cacheline_aligned;
92 92
93/*
94 * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
95 * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
96 * pointer to per-cpu entries because we don't know where we'll map the section,
97 * even for the boot cpu.
98 *
99 * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
100 * rest of them.
101 */
93struct tvec_base boot_tvec_bases; 102struct tvec_base boot_tvec_bases;
94EXPORT_SYMBOL(boot_tvec_bases); 103EXPORT_SYMBOL(boot_tvec_bases);
104
95static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 105static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
96 106
97/* Functions below help us manage 'deferrable' flag */ 107/* Functions below help us manage 'deferrable' flag */
@@ -1027,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer)
1027EXPORT_SYMBOL(try_to_del_timer_sync); 1037EXPORT_SYMBOL(try_to_del_timer_sync);
1028 1038
1029#ifdef CONFIG_SMP 1039#ifdef CONFIG_SMP
1040static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
1041
1030/** 1042/**
1031 * del_timer_sync - deactivate a timer and wait for the handler to finish. 1043 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1032 * @timer: the timer to be deactivated 1044 * @timer: the timer to be deactivated
@@ -1532,64 +1544,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1532} 1544}
1533EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1545EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1534 1546
1535static int init_timers_cpu(int cpu)
1536{
1537 int j;
1538 struct tvec_base *base;
1539 static char tvec_base_done[NR_CPUS];
1540
1541 if (!tvec_base_done[cpu]) {
1542 static char boot_done;
1543
1544 if (boot_done) {
1545 /*
1546 * The APs use this path later in boot
1547 */
1548 base = kzalloc_node(sizeof(*base), GFP_KERNEL,
1549 cpu_to_node(cpu));
1550 if (!base)
1551 return -ENOMEM;
1552
1553 /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
1554 if (WARN_ON(base != tbase_get_base(base))) {
1555 kfree(base);
1556 return -ENOMEM;
1557 }
1558 per_cpu(tvec_bases, cpu) = base;
1559 } else {
1560 /*
1561 * This is for the boot CPU - we use compile-time
1562 * static initialisation because per-cpu memory isn't
1563 * ready yet and because the memory allocators are not
1564 * initialised either.
1565 */
1566 boot_done = 1;
1567 base = &boot_tvec_bases;
1568 }
1569 spin_lock_init(&base->lock);
1570 tvec_base_done[cpu] = 1;
1571 base->cpu = cpu;
1572 } else {
1573 base = per_cpu(tvec_bases, cpu);
1574 }
1575
1576
1577 for (j = 0; j < TVN_SIZE; j++) {
1578 INIT_LIST_HEAD(base->tv5.vec + j);
1579 INIT_LIST_HEAD(base->tv4.vec + j);
1580 INIT_LIST_HEAD(base->tv3.vec + j);
1581 INIT_LIST_HEAD(base->tv2.vec + j);
1582 }
1583 for (j = 0; j < TVR_SIZE; j++)
1584 INIT_LIST_HEAD(base->tv1.vec + j);
1585
1586 base->timer_jiffies = jiffies;
1587 base->next_timer = base->timer_jiffies;
1588 base->active_timers = 0;
1589 base->all_timers = 0;
1590 return 0;
1591}
1592
1593#ifdef CONFIG_HOTPLUG_CPU 1547#ifdef CONFIG_HOTPLUG_CPU
1594static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1548static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
1595{ 1549{
@@ -1631,55 +1585,86 @@ static void migrate_timers(int cpu)
1631 migrate_timer_list(new_base, old_base->tv5.vec + i); 1585 migrate_timer_list(new_base, old_base->tv5.vec + i);
1632 } 1586 }
1633 1587
1588 old_base->active_timers = 0;
1589 old_base->all_timers = 0;
1590
1634 spin_unlock(&old_base->lock); 1591 spin_unlock(&old_base->lock);
1635 spin_unlock_irq(&new_base->lock); 1592 spin_unlock_irq(&new_base->lock);
1636 put_cpu_var(tvec_bases); 1593 put_cpu_var(tvec_bases);
1637} 1594}
1638#endif /* CONFIG_HOTPLUG_CPU */
1639 1595
1640static int timer_cpu_notify(struct notifier_block *self, 1596static int timer_cpu_notify(struct notifier_block *self,
1641 unsigned long action, void *hcpu) 1597 unsigned long action, void *hcpu)
1642{ 1598{
1643 long cpu = (long)hcpu; 1599 switch (action) {
1644 int err;
1645
1646 switch(action) {
1647 case CPU_UP_PREPARE:
1648 case CPU_UP_PREPARE_FROZEN:
1649 err = init_timers_cpu(cpu);
1650 if (err < 0)
1651 return notifier_from_errno(err);
1652 break;
1653#ifdef CONFIG_HOTPLUG_CPU
1654 case CPU_DEAD: 1600 case CPU_DEAD:
1655 case CPU_DEAD_FROZEN: 1601 case CPU_DEAD_FROZEN:
1656 migrate_timers(cpu); 1602 migrate_timers((long)hcpu);
1657 break; 1603 break;
1658#endif
1659 default: 1604 default:
1660 break; 1605 break;
1661 } 1606 }
1607
1662 return NOTIFY_OK; 1608 return NOTIFY_OK;
1663} 1609}
1664 1610
1665static struct notifier_block timers_nb = { 1611static inline void timer_register_cpu_notifier(void)
1666 .notifier_call = timer_cpu_notify, 1612{
1667}; 1613 cpu_notifier(timer_cpu_notify, 0);
1614}
1615#else
1616static inline void timer_register_cpu_notifier(void) { }
1617#endif /* CONFIG_HOTPLUG_CPU */
1668 1618
1619static void __init init_timer_cpu(struct tvec_base *base, int cpu)
1620{
1621 int j;
1669 1622
1670void __init init_timers(void) 1623 BUG_ON(base != tbase_get_base(base));
1624
1625 base->cpu = cpu;
1626 per_cpu(tvec_bases, cpu) = base;
1627 spin_lock_init(&base->lock);
1628
1629 for (j = 0; j < TVN_SIZE; j++) {
1630 INIT_LIST_HEAD(base->tv5.vec + j);
1631 INIT_LIST_HEAD(base->tv4.vec + j);
1632 INIT_LIST_HEAD(base->tv3.vec + j);
1633 INIT_LIST_HEAD(base->tv2.vec + j);
1634 }
1635 for (j = 0; j < TVR_SIZE; j++)
1636 INIT_LIST_HEAD(base->tv1.vec + j);
1637
1638 base->timer_jiffies = jiffies;
1639 base->next_timer = base->timer_jiffies;
1640}
1641
1642static void __init init_timer_cpus(void)
1671{ 1643{
1672 int err; 1644 struct tvec_base *base;
1645 int local_cpu = smp_processor_id();
1646 int cpu;
1673 1647
1648 for_each_possible_cpu(cpu) {
1649 if (cpu == local_cpu)
1650 base = &boot_tvec_bases;
1651#ifdef CONFIG_SMP
1652 else
1653 base = per_cpu_ptr(&__tvec_bases, cpu);
1654#endif
1655
1656 init_timer_cpu(base, cpu);
1657 }
1658}
1659
1660void __init init_timers(void)
1661{
1674 /* ensure there are enough low bits for flags in timer->base pointer */ 1662 /* ensure there are enough low bits for flags in timer->base pointer */
1675 BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); 1663 BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
1676 1664
1677 err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1665 init_timer_cpus();
1678 (void *)(long)smp_processor_id());
1679 BUG_ON(err != NOTIFY_OK);
1680
1681 init_timer_stats(); 1666 init_timer_stats();
1682 register_cpu_notifier(&timers_nb); 1667 timer_register_cpu_notifier();
1683 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1668 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1684} 1669}
1685 1670
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 61ed862cdd37..e878c2e0ba45 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -16,10 +16,10 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/seq_file.h> 17#include <linux/seq_file.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/tick.h>
20 19
21#include <asm/uaccess.h> 20#include <asm/uaccess.h>
22 21
22#include "tick-internal.h"
23 23
24struct timer_list_iter { 24struct timer_list_iter {
25 int cpu; 25 int cpu;
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
228 print_name_offset(m, dev->set_next_event); 228 print_name_offset(m, dev->set_next_event);
229 SEQ_printf(m, "\n"); 229 SEQ_printf(m, "\n");
230 230
231 SEQ_printf(m, " set_mode: "); 231 if (dev->set_mode) {
232 print_name_offset(m, dev->set_mode); 232 SEQ_printf(m, " set_mode: ");
233 SEQ_printf(m, "\n"); 233 print_name_offset(m, dev->set_mode);
234 SEQ_printf(m, "\n");
235 } else {
236 if (dev->set_state_shutdown) {
237 SEQ_printf(m, " shutdown: ");
238 print_name_offset(m, dev->set_state_shutdown);
239 SEQ_printf(m, "\n");
240 }
241
242 if (dev->set_state_periodic) {
243 SEQ_printf(m, " periodic: ");
244 print_name_offset(m, dev->set_state_periodic);
245 SEQ_printf(m, "\n");
246 }
247
248 if (dev->set_state_oneshot) {
249 SEQ_printf(m, " oneshot: ");
250 print_name_offset(m, dev->set_state_oneshot);
251 SEQ_printf(m, "\n");
252 }
253
254 if (dev->tick_resume) {
255 SEQ_printf(m, " resume: ");
256 print_name_offset(m, dev->tick_resume);
257 SEQ_printf(m, "\n");
258 }
259 }
234 260
235 SEQ_printf(m, " event_handler: "); 261 SEQ_printf(m, " event_handler: ");
236 print_name_offset(m, dev->event_handler); 262 print_name_offset(m, dev->event_handler);