aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-22 14:35:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-22 14:35:46 -0400
commit3992c0321258bdff3666cbaf5225f538ad61a548 (patch)
tree42c98bcf601237b07ceac34b5bdb0b37558280dc /kernel
parent55acdddbac1725b80df0c41970505e8a41c84956 (diff)
parenteec19d1a0d04c80e66eef634f7b8f460f2ca5643 (diff)
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer core changes from Ingo Molnar: "Continued cleanups of the core time and NTP code, plus more nohz work preparing for tick-less userspace execution." * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: time: Rework timekeeping functions to take timekeeper ptr as argument time: Move xtime_nsec adjustment underflow handling timekeeping_adjust time: Move arch_gettimeoffset() usage into timekeeping_get_ns() time: Refactor accumulation of nsecs to secs time: Condense timekeeper.xtime into xtime_sec time: Explicitly use u32 instead of int for shift values time: Whitespace cleanups per Ingo%27s requests nohz: Move next idle expiry time record into idle logic area nohz: Move ts->idle_calls incrementation into strict idle logic nohz: Rename ts->idle_tick to ts->last_tick nohz: Make nohz API agnostic against idle ticks cputime accounting nohz: Separate idle sleeping time accounting from nohz logic timers: Improve get_next_timer_interrupt() timers: Add accounting of non deferrable timers timers: Consolidate base->next_timer update timers: Create detach_if_pending() and use it
Diffstat (limited to 'kernel')
-rw-r--r--kernel/time/tick-sched.c192
-rw-r--r--kernel/time/timekeeping.c487
-rw-r--r--kernel/time/timer_list.c4
-rw-r--r--kernel/timer.c110
4 files changed, 432 insertions, 361 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 45b17aea79ef..024540f97f74 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -271,50 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
271} 271}
272EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); 272EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
273 273
274static void tick_nohz_stop_sched_tick(struct tick_sched *ts) 274static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
275 ktime_t now, int cpu)
275{ 276{
276 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; 277 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
278 ktime_t last_update, expires, ret = { .tv64 = 0 };
277 unsigned long rcu_delta_jiffies; 279 unsigned long rcu_delta_jiffies;
278 ktime_t last_update, expires, now;
279 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 280 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
280 u64 time_delta; 281 u64 time_delta;
281 int cpu;
282
283 cpu = smp_processor_id();
284 ts = &per_cpu(tick_cpu_sched, cpu);
285
286 now = tick_nohz_start_idle(cpu, ts);
287
288 /*
289 * If this cpu is offline and it is the one which updates
290 * jiffies, then give up the assignment and let it be taken by
291 * the cpu which runs the tick timer next. If we don't drop
292 * this here the jiffies might be stale and do_timer() never
293 * invoked.
294 */
295 if (unlikely(!cpu_online(cpu))) {
296 if (cpu == tick_do_timer_cpu)
297 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
298 }
299
300 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
301 return;
302 282
303 if (need_resched())
304 return;
305
306 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
307 static int ratelimit;
308
309 if (ratelimit < 10) {
310 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
311 (unsigned int) local_softirq_pending());
312 ratelimit++;
313 }
314 return;
315 }
316
317 ts->idle_calls++;
318 /* Read jiffies and the time when jiffies were updated last */ 283 /* Read jiffies and the time when jiffies were updated last */
319 do { 284 do {
320 seq = read_seqbegin(&xtime_lock); 285 seq = read_seqbegin(&xtime_lock);
@@ -397,6 +362,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
397 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 362 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
398 goto out; 363 goto out;
399 364
365 ret = expires;
366
400 /* 367 /*
401 * nohz_stop_sched_tick can be called several times before 368 * nohz_stop_sched_tick can be called several times before
402 * the nohz_restart_sched_tick is called. This happens when 369 * the nohz_restart_sched_tick is called. This happens when
@@ -408,16 +375,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
408 select_nohz_load_balancer(1); 375 select_nohz_load_balancer(1);
409 calc_load_enter_idle(); 376 calc_load_enter_idle();
410 377
411 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); 378 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
412 ts->tick_stopped = 1; 379 ts->tick_stopped = 1;
413 ts->idle_jiffies = last_jiffies;
414 } 380 }
415 381
416 ts->idle_sleeps++;
417
418 /* Mark expires */
419 ts->idle_expires = expires;
420
421 /* 382 /*
422 * If the expiration time == KTIME_MAX, then 383 * If the expiration time == KTIME_MAX, then
423 * in this case we simply stop the tick timer. 384 * in this case we simply stop the tick timer.
@@ -448,6 +409,65 @@ out:
448 ts->next_jiffies = next_jiffies; 409 ts->next_jiffies = next_jiffies;
449 ts->last_jiffies = last_jiffies; 410 ts->last_jiffies = last_jiffies;
450 ts->sleep_length = ktime_sub(dev->next_event, now); 411 ts->sleep_length = ktime_sub(dev->next_event, now);
412
413 return ret;
414}
415
416static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
417{
418 /*
419 * If this cpu is offline and it is the one which updates
420 * jiffies, then give up the assignment and let it be taken by
421 * the cpu which runs the tick timer next. If we don't drop
422 * this here the jiffies might be stale and do_timer() never
423 * invoked.
424 */
425 if (unlikely(!cpu_online(cpu))) {
426 if (cpu == tick_do_timer_cpu)
427 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
428 }
429
430 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
431 return false;
432
433 if (need_resched())
434 return false;
435
436 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
437 static int ratelimit;
438
439 if (ratelimit < 10) {
440 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
441 (unsigned int) local_softirq_pending());
442 ratelimit++;
443 }
444 return false;
445 }
446
447 return true;
448}
449
450static void __tick_nohz_idle_enter(struct tick_sched *ts)
451{
452 ktime_t now, expires;
453 int cpu = smp_processor_id();
454
455 now = tick_nohz_start_idle(cpu, ts);
456
457 if (can_stop_idle_tick(cpu, ts)) {
458 int was_stopped = ts->tick_stopped;
459
460 ts->idle_calls++;
461
462 expires = tick_nohz_stop_sched_tick(ts, now, cpu);
463 if (expires.tv64 > 0LL) {
464 ts->idle_sleeps++;
465 ts->idle_expires = expires;
466 }
467
468 if (!was_stopped && ts->tick_stopped)
469 ts->idle_jiffies = ts->last_jiffies;
470 }
451} 471}
452 472
453/** 473/**
@@ -485,7 +505,7 @@ void tick_nohz_idle_enter(void)
485 * update of the idle time accounting in tick_nohz_start_idle(). 505 * update of the idle time accounting in tick_nohz_start_idle().
486 */ 506 */
487 ts->inidle = 1; 507 ts->inidle = 1;
488 tick_nohz_stop_sched_tick(ts); 508 __tick_nohz_idle_enter(ts);
489 509
490 local_irq_enable(); 510 local_irq_enable();
491} 511}
@@ -505,7 +525,7 @@ void tick_nohz_irq_exit(void)
505 if (!ts->inidle) 525 if (!ts->inidle)
506 return; 526 return;
507 527
508 tick_nohz_stop_sched_tick(ts); 528 __tick_nohz_idle_enter(ts);
509} 529}
510 530
511/** 531/**
@@ -523,7 +543,7 @@ ktime_t tick_nohz_get_sleep_length(void)
523static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 543static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
524{ 544{
525 hrtimer_cancel(&ts->sched_timer); 545 hrtimer_cancel(&ts->sched_timer);
526 hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); 546 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
527 547
528 while (1) { 548 while (1) {
529 /* Forward the time to expire in the future */ 549 /* Forward the time to expire in the future */
@@ -546,6 +566,41 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
546 } 566 }
547} 567}
548 568
569static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
570{
571 /* Update jiffies first */
572 select_nohz_load_balancer(0);
573 tick_do_update_jiffies64(now);
574 update_cpu_load_nohz();
575
576 touch_softlockup_watchdog();
577 /*
578 * Cancel the scheduled timer and restore the tick
579 */
580 ts->tick_stopped = 0;
581 ts->idle_exittime = now;
582
583 tick_nohz_restart(ts, now);
584}
585
586static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
587{
588#ifndef CONFIG_VIRT_CPU_ACCOUNTING
589 unsigned long ticks;
590 /*
591 * We stopped the tick in idle. Update process times would miss the
592 * time we slept as update_process_times does only a 1 tick
593 * accounting. Enforce that this is accounted to idle !
594 */
595 ticks = jiffies - ts->idle_jiffies;
596 /*
597 * We might be one off. Do not randomly account a huge number of ticks!
598 */
599 if (ticks && ticks < LONG_MAX)
600 account_idle_ticks(ticks);
601#endif
602}
603
549/** 604/**
550 * tick_nohz_idle_exit - restart the idle tick from the idle task 605 * tick_nohz_idle_exit - restart the idle tick from the idle task
551 * 606 *
@@ -557,9 +612,6 @@ void tick_nohz_idle_exit(void)
557{ 612{
558 int cpu = smp_processor_id(); 613 int cpu = smp_processor_id();
559 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 614 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
560#ifndef CONFIG_VIRT_CPU_ACCOUNTING
561 unsigned long ticks;
562#endif
563 ktime_t now; 615 ktime_t now;
564 616
565 local_irq_disable(); 617 local_irq_disable();
@@ -574,40 +626,11 @@ void tick_nohz_idle_exit(void)
574 if (ts->idle_active) 626 if (ts->idle_active)
575 tick_nohz_stop_idle(cpu, now); 627 tick_nohz_stop_idle(cpu, now);
576 628
577 if (!ts->tick_stopped) { 629 if (ts->tick_stopped) {
578 local_irq_enable(); 630 tick_nohz_restart_sched_tick(ts, now);
579 return; 631 tick_nohz_account_idle_ticks(ts);
580 } 632 }
581 633
582 /* Update jiffies first */
583 select_nohz_load_balancer(0);
584 tick_do_update_jiffies64(now);
585 update_cpu_load_nohz();
586
587#ifndef CONFIG_VIRT_CPU_ACCOUNTING
588 /*
589 * We stopped the tick in idle. Update process times would miss the
590 * time we slept as update_process_times does only a 1 tick
591 * accounting. Enforce that this is accounted to idle !
592 */
593 ticks = jiffies - ts->idle_jiffies;
594 /*
595 * We might be one off. Do not randomly account a huge number of ticks!
596 */
597 if (ticks && ticks < LONG_MAX)
598 account_idle_ticks(ticks);
599#endif
600
601 calc_load_exit_idle();
602 touch_softlockup_watchdog();
603 /*
604 * Cancel the scheduled timer and restore the tick
605 */
606 ts->tick_stopped = 0;
607 ts->idle_exittime = now;
608
609 tick_nohz_restart(ts, now);
610
611 local_irq_enable(); 634 local_irq_enable();
612} 635}
613 636
@@ -811,7 +834,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
811 */ 834 */
812 if (ts->tick_stopped) { 835 if (ts->tick_stopped) {
813 touch_softlockup_watchdog(); 836 touch_softlockup_watchdog();
814 ts->idle_jiffies++; 837 if (idle_cpu(cpu))
838 ts->idle_jiffies++;
815 } 839 }
816 update_process_times(user_mode(regs)); 840 update_process_times(user_mode(regs));
817 profile_tick(CPU_PROFILING); 841 profile_tick(CPU_PROFILING);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3447cfaf11e7..f045cc50832d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -24,32 +24,32 @@
24/* Structure holding internal timekeeping values. */ 24/* Structure holding internal timekeeping values. */
25struct timekeeper { 25struct timekeeper {
26 /* Current clocksource used for timekeeping. */ 26 /* Current clocksource used for timekeeping. */
27 struct clocksource *clock; 27 struct clocksource *clock;
28 /* NTP adjusted clock multiplier */ 28 /* NTP adjusted clock multiplier */
29 u32 mult; 29 u32 mult;
30 /* The shift value of the current clocksource. */ 30 /* The shift value of the current clocksource. */
31 int shift; 31 u32 shift;
32
33 /* Number of clock cycles in one NTP interval. */ 32 /* Number of clock cycles in one NTP interval. */
34 cycle_t cycle_interval; 33 cycle_t cycle_interval;
35 /* Number of clock shifted nano seconds in one NTP interval. */ 34 /* Number of clock shifted nano seconds in one NTP interval. */
36 u64 xtime_interval; 35 u64 xtime_interval;
37 /* shifted nano seconds left over when rounding cycle_interval */ 36 /* shifted nano seconds left over when rounding cycle_interval */
38 s64 xtime_remainder; 37 s64 xtime_remainder;
39 /* Raw nano seconds accumulated per NTP interval. */ 38 /* Raw nano seconds accumulated per NTP interval. */
40 u32 raw_interval; 39 u32 raw_interval;
40
41 /* Current CLOCK_REALTIME time in seconds */
42 u64 xtime_sec;
43 /* Clock shifted nano seconds */
44 u64 xtime_nsec;
41 45
42 /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
43 u64 xtime_nsec;
44 /* Difference between accumulated time and NTP time in ntp 46 /* Difference between accumulated time and NTP time in ntp
45 * shifted nano seconds. */ 47 * shifted nano seconds. */
46 s64 ntp_error; 48 s64 ntp_error;
47 /* Shift conversion between clock shifted nano seconds and 49 /* Shift conversion between clock shifted nano seconds and
48 * ntp shifted nano seconds. */ 50 * ntp shifted nano seconds. */
49 int ntp_error_shift; 51 u32 ntp_error_shift;
50 52
51 /* The current time */
52 struct timespec xtime;
53 /* 53 /*
54 * wall_to_monotonic is what we need to add to xtime (or xtime corrected 54 * wall_to_monotonic is what we need to add to xtime (or xtime corrected
55 * for sub jiffie times) to get to monotonic time. Monotonic is pegged 55 * for sub jiffie times) to get to monotonic time. Monotonic is pegged
@@ -64,20 +64,17 @@ struct timekeeper {
64 * - wall_to_monotonic is no longer the boot time, getboottime must be 64 * - wall_to_monotonic is no longer the boot time, getboottime must be
65 * used instead. 65 * used instead.
66 */ 66 */
67 struct timespec wall_to_monotonic; 67 struct timespec wall_to_monotonic;
68 /* time spent in suspend */ 68 /* time spent in suspend */
69 struct timespec total_sleep_time; 69 struct timespec total_sleep_time;
70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ 70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
71 struct timespec raw_time; 71 struct timespec raw_time;
72
73 /* Offset clock monotonic -> clock realtime */ 72 /* Offset clock monotonic -> clock realtime */
74 ktime_t offs_real; 73 ktime_t offs_real;
75
76 /* Offset clock monotonic -> clock boottime */ 74 /* Offset clock monotonic -> clock boottime */
77 ktime_t offs_boot; 75 ktime_t offs_boot;
78
79 /* Seqlock for all timekeeper values */ 76 /* Seqlock for all timekeeper values */
80 seqlock_t lock; 77 seqlock_t lock;
81}; 78};
82 79
83static struct timekeeper timekeeper; 80static struct timekeeper timekeeper;
@@ -88,11 +85,37 @@ static struct timekeeper timekeeper;
88 */ 85 */
89__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); 86__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
90 87
91
92/* flag for if timekeeping is suspended */ 88/* flag for if timekeeping is suspended */
93int __read_mostly timekeeping_suspended; 89int __read_mostly timekeeping_suspended;
94 90
91static inline void tk_normalize_xtime(struct timekeeper *tk)
92{
93 while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
94 tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
95 tk->xtime_sec++;
96 }
97}
95 98
99static struct timespec tk_xtime(struct timekeeper *tk)
100{
101 struct timespec ts;
102
103 ts.tv_sec = tk->xtime_sec;
104 ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
105 return ts;
106}
107
108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
109{
110 tk->xtime_sec = ts->tv_sec;
111 tk->xtime_nsec = ts->tv_nsec << tk->shift;
112}
113
114static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
115{
116 tk->xtime_sec += ts->tv_sec;
117 tk->xtime_nsec += ts->tv_nsec << tk->shift;
118}
96 119
97/** 120/**
98 * timekeeper_setup_internals - Set up internals to use clocksource clock. 121 * timekeeper_setup_internals - Set up internals to use clocksource clock.
@@ -104,12 +127,14 @@ int __read_mostly timekeeping_suspended;
104 * 127 *
105 * Unless you're the timekeeping code, you should not be using this! 128 * Unless you're the timekeeping code, you should not be using this!
106 */ 129 */
107static void timekeeper_setup_internals(struct clocksource *clock) 130static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
108{ 131{
109 cycle_t interval; 132 cycle_t interval;
110 u64 tmp, ntpinterval; 133 u64 tmp, ntpinterval;
134 struct clocksource *old_clock;
111 135
112 timekeeper.clock = clock; 136 old_clock = tk->clock;
137 tk->clock = clock;
113 clock->cycle_last = clock->read(clock); 138 clock->cycle_last = clock->read(clock);
114 139
115 /* Do the ns -> cycle conversion first, using original mult */ 140 /* Do the ns -> cycle conversion first, using original mult */
@@ -122,80 +147,96 @@ static void timekeeper_setup_internals(struct clocksource *clock)
122 tmp = 1; 147 tmp = 1;
123 148
124 interval = (cycle_t) tmp; 149 interval = (cycle_t) tmp;
125 timekeeper.cycle_interval = interval; 150 tk->cycle_interval = interval;
126 151
127 /* Go back from cycles -> shifted ns */ 152 /* Go back from cycles -> shifted ns */
128 timekeeper.xtime_interval = (u64) interval * clock->mult; 153 tk->xtime_interval = (u64) interval * clock->mult;
129 timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; 154 tk->xtime_remainder = ntpinterval - tk->xtime_interval;
130 timekeeper.raw_interval = 155 tk->raw_interval =
131 ((u64) interval * clock->mult) >> clock->shift; 156 ((u64) interval * clock->mult) >> clock->shift;
132 157
133 timekeeper.xtime_nsec = 0; 158 /* if changing clocks, convert xtime_nsec shift units */
134 timekeeper.shift = clock->shift; 159 if (old_clock) {
160 int shift_change = clock->shift - old_clock->shift;
161 if (shift_change < 0)
162 tk->xtime_nsec >>= -shift_change;
163 else
164 tk->xtime_nsec <<= shift_change;
165 }
166 tk->shift = clock->shift;
135 167
136 timekeeper.ntp_error = 0; 168 tk->ntp_error = 0;
137 timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 169 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
138 170
139 /* 171 /*
140 * The timekeeper keeps its own mult values for the currently 172 * The timekeeper keeps its own mult values for the currently
141 * active clocksource. These value will be adjusted via NTP 173 * active clocksource. These value will be adjusted via NTP
142 * to counteract clock drifting. 174 * to counteract clock drifting.
143 */ 175 */
144 timekeeper.mult = clock->mult; 176 tk->mult = clock->mult;
145} 177}
146 178
147/* Timekeeper helper functions. */ 179/* Timekeeper helper functions. */
148static inline s64 timekeeping_get_ns(void) 180static inline s64 timekeeping_get_ns(struct timekeeper *tk)
149{ 181{
150 cycle_t cycle_now, cycle_delta; 182 cycle_t cycle_now, cycle_delta;
151 struct clocksource *clock; 183 struct clocksource *clock;
184 s64 nsec;
152 185
153 /* read clocksource: */ 186 /* read clocksource: */
154 clock = timekeeper.clock; 187 clock = tk->clock;
155 cycle_now = clock->read(clock); 188 cycle_now = clock->read(clock);
156 189
157 /* calculate the delta since the last update_wall_time: */ 190 /* calculate the delta since the last update_wall_time: */
158 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 191 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
159 192
160 /* return delta convert to nanoseconds using ntp adjusted mult. */ 193 nsec = cycle_delta * tk->mult + tk->xtime_nsec;
161 return clocksource_cyc2ns(cycle_delta, timekeeper.mult, 194 nsec >>= tk->shift;
162 timekeeper.shift); 195
196 /* If arch requires, add in gettimeoffset() */
197 return nsec + arch_gettimeoffset();
163} 198}
164 199
165static inline s64 timekeeping_get_ns_raw(void) 200static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
166{ 201{
167 cycle_t cycle_now, cycle_delta; 202 cycle_t cycle_now, cycle_delta;
168 struct clocksource *clock; 203 struct clocksource *clock;
204 s64 nsec;
169 205
170 /* read clocksource: */ 206 /* read clocksource: */
171 clock = timekeeper.clock; 207 clock = tk->clock;
172 cycle_now = clock->read(clock); 208 cycle_now = clock->read(clock);
173 209
174 /* calculate the delta since the last update_wall_time: */ 210 /* calculate the delta since the last update_wall_time: */
175 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 211 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
176 212
177 /* return delta convert to nanoseconds. */ 213 /* convert delta to nanoseconds. */
178 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); 214 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
215
216 /* If arch requires, add in gettimeoffset() */
217 return nsec + arch_gettimeoffset();
179} 218}
180 219
181static void update_rt_offset(void) 220static void update_rt_offset(struct timekeeper *tk)
182{ 221{
183 struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; 222 struct timespec tmp, *wtm = &tk->wall_to_monotonic;
184 223
185 set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); 224 set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
186 timekeeper.offs_real = timespec_to_ktime(tmp); 225 tk->offs_real = timespec_to_ktime(tmp);
187} 226}
188 227
189/* must hold write on timekeeper.lock */ 228/* must hold write on timekeeper.lock */
190static void timekeeping_update(bool clearntp) 229static void timekeeping_update(struct timekeeper *tk, bool clearntp)
191{ 230{
231 struct timespec xt;
232
192 if (clearntp) { 233 if (clearntp) {
193 timekeeper.ntp_error = 0; 234 tk->ntp_error = 0;
194 ntp_clear(); 235 ntp_clear();
195 } 236 }
196 update_rt_offset(); 237 update_rt_offset(tk);
197 update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, 238 xt = tk_xtime(tk);
198 timekeeper.clock, timekeeper.mult); 239 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
199} 240}
200 241
201 242
@@ -206,27 +247,26 @@ static void timekeeping_update(bool clearntp)
206 * update_wall_time(). This is useful before significant clock changes, 247 * update_wall_time(). This is useful before significant clock changes,
207 * as it avoids having to deal with this time offset explicitly. 248 * as it avoids having to deal with this time offset explicitly.
208 */ 249 */
209static void timekeeping_forward_now(void) 250static void timekeeping_forward_now(struct timekeeper *tk)
210{ 251{
211 cycle_t cycle_now, cycle_delta; 252 cycle_t cycle_now, cycle_delta;
212 struct clocksource *clock; 253 struct clocksource *clock;
213 s64 nsec; 254 s64 nsec;
214 255
215 clock = timekeeper.clock; 256 clock = tk->clock;
216 cycle_now = clock->read(clock); 257 cycle_now = clock->read(clock);
217 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 258 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
218 clock->cycle_last = cycle_now; 259 clock->cycle_last = cycle_now;
219 260
220 nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, 261 tk->xtime_nsec += cycle_delta * tk->mult;
221 timekeeper.shift);
222 262
223 /* If arch requires, add in gettimeoffset() */ 263 /* If arch requires, add in gettimeoffset() */
224 nsec += arch_gettimeoffset(); 264 tk->xtime_nsec += arch_gettimeoffset() << tk->shift;
225 265
226 timespec_add_ns(&timekeeper.xtime, nsec); 266 tk_normalize_xtime(tk);
227 267
228 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); 268 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
229 timespec_add_ns(&timekeeper.raw_time, nsec); 269 timespec_add_ns(&tk->raw_time, nsec);
230} 270}
231 271
232/** 272/**
@@ -238,18 +278,15 @@ static void timekeeping_forward_now(void)
238void getnstimeofday(struct timespec *ts) 278void getnstimeofday(struct timespec *ts)
239{ 279{
240 unsigned long seq; 280 unsigned long seq;
241 s64 nsecs; 281 s64 nsecs = 0;
242 282
243 WARN_ON(timekeeping_suspended); 283 WARN_ON(timekeeping_suspended);
244 284
245 do { 285 do {
246 seq = read_seqbegin(&timekeeper.lock); 286 seq = read_seqbegin(&timekeeper.lock);
247 287
248 *ts = timekeeper.xtime; 288 ts->tv_sec = timekeeper.xtime_sec;
249 nsecs = timekeeping_get_ns(); 289 ts->tv_nsec = timekeeping_get_ns(&timekeeper);
250
251 /* If arch requires, add in gettimeoffset() */
252 nsecs += arch_gettimeoffset();
253 290
254 } while (read_seqretry(&timekeeper.lock, seq)); 291 } while (read_seqretry(&timekeeper.lock, seq));
255 292
@@ -266,13 +303,10 @@ ktime_t ktime_get(void)
266 303
267 do { 304 do {
268 seq = read_seqbegin(&timekeeper.lock); 305 seq = read_seqbegin(&timekeeper.lock);
269 secs = timekeeper.xtime.tv_sec + 306 secs = timekeeper.xtime_sec +
270 timekeeper.wall_to_monotonic.tv_sec; 307 timekeeper.wall_to_monotonic.tv_sec;
271 nsecs = timekeeper.xtime.tv_nsec + 308 nsecs = timekeeping_get_ns(&timekeeper) +
272 timekeeper.wall_to_monotonic.tv_nsec; 309 timekeeper.wall_to_monotonic.tv_nsec;
273 nsecs += timekeeping_get_ns();
274 /* If arch requires, add in gettimeoffset() */
275 nsecs += arch_gettimeoffset();
276 310
277 } while (read_seqretry(&timekeeper.lock, seq)); 311 } while (read_seqretry(&timekeeper.lock, seq));
278 /* 312 /*
@@ -295,22 +329,19 @@ void ktime_get_ts(struct timespec *ts)
295{ 329{
296 struct timespec tomono; 330 struct timespec tomono;
297 unsigned int seq; 331 unsigned int seq;
298 s64 nsecs;
299 332
300 WARN_ON(timekeeping_suspended); 333 WARN_ON(timekeeping_suspended);
301 334
302 do { 335 do {
303 seq = read_seqbegin(&timekeeper.lock); 336 seq = read_seqbegin(&timekeeper.lock);
304 *ts = timekeeper.xtime; 337 ts->tv_sec = timekeeper.xtime_sec;
338 ts->tv_nsec = timekeeping_get_ns(&timekeeper);
305 tomono = timekeeper.wall_to_monotonic; 339 tomono = timekeeper.wall_to_monotonic;
306 nsecs = timekeeping_get_ns();
307 /* If arch requires, add in gettimeoffset() */
308 nsecs += arch_gettimeoffset();
309 340
310 } while (read_seqretry(&timekeeper.lock, seq)); 341 } while (read_seqretry(&timekeeper.lock, seq));
311 342
312 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, 343 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
313 ts->tv_nsec + tomono.tv_nsec + nsecs); 344 ts->tv_nsec + tomono.tv_nsec);
314} 345}
315EXPORT_SYMBOL_GPL(ktime_get_ts); 346EXPORT_SYMBOL_GPL(ktime_get_ts);
316 347
@@ -333,20 +364,14 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
333 WARN_ON_ONCE(timekeeping_suspended); 364 WARN_ON_ONCE(timekeeping_suspended);
334 365
335 do { 366 do {
336 u32 arch_offset;
337
338 seq = read_seqbegin(&timekeeper.lock); 367 seq = read_seqbegin(&timekeeper.lock);
339 368
340 *ts_raw = timekeeper.raw_time; 369 *ts_raw = timekeeper.raw_time;
341 *ts_real = timekeeper.xtime; 370 ts_real->tv_sec = timekeeper.xtime_sec;
342 371 ts_real->tv_nsec = 0;
343 nsecs_raw = timekeeping_get_ns_raw();
344 nsecs_real = timekeeping_get_ns();
345 372
346 /* If arch requires, add in gettimeoffset() */ 373 nsecs_raw = timekeeping_get_ns_raw(&timekeeper);
347 arch_offset = arch_gettimeoffset(); 374 nsecs_real = timekeeping_get_ns(&timekeeper);
348 nsecs_raw += arch_offset;
349 nsecs_real += arch_offset;
350 375
351 } while (read_seqretry(&timekeeper.lock, seq)); 376 } while (read_seqretry(&timekeeper.lock, seq));
352 377
@@ -381,7 +406,7 @@ EXPORT_SYMBOL(do_gettimeofday);
381 */ 406 */
382int do_settimeofday(const struct timespec *tv) 407int do_settimeofday(const struct timespec *tv)
383{ 408{
384 struct timespec ts_delta; 409 struct timespec ts_delta, xt;
385 unsigned long flags; 410 unsigned long flags;
386 411
387 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) 412 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
@@ -389,15 +414,18 @@ int do_settimeofday(const struct timespec *tv)
389 414
390 write_seqlock_irqsave(&timekeeper.lock, flags); 415 write_seqlock_irqsave(&timekeeper.lock, flags);
391 416
392 timekeeping_forward_now(); 417 timekeeping_forward_now(&timekeeper);
418
419 xt = tk_xtime(&timekeeper);
420 ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
421 ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
393 422
394 ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec;
395 ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec;
396 timekeeper.wall_to_monotonic = 423 timekeeper.wall_to_monotonic =
397 timespec_sub(timekeeper.wall_to_monotonic, ts_delta); 424 timespec_sub(timekeeper.wall_to_monotonic, ts_delta);
398 425
399 timekeeper.xtime = *tv; 426 tk_set_xtime(&timekeeper, tv);
400 timekeeping_update(true); 427
428 timekeeping_update(&timekeeper, true);
401 429
402 write_sequnlock_irqrestore(&timekeeper.lock, flags); 430 write_sequnlock_irqrestore(&timekeeper.lock, flags);
403 431
@@ -424,13 +452,14 @@ int timekeeping_inject_offset(struct timespec *ts)
424 452
425 write_seqlock_irqsave(&timekeeper.lock, flags); 453 write_seqlock_irqsave(&timekeeper.lock, flags);
426 454
427 timekeeping_forward_now(); 455 timekeeping_forward_now(&timekeeper);
456
428 457
429 timekeeper.xtime = timespec_add(timekeeper.xtime, *ts); 458 tk_xtime_add(&timekeeper, ts);
430 timekeeper.wall_to_monotonic = 459 timekeeper.wall_to_monotonic =
431 timespec_sub(timekeeper.wall_to_monotonic, *ts); 460 timespec_sub(timekeeper.wall_to_monotonic, *ts);
432 461
433 timekeeping_update(true); 462 timekeeping_update(&timekeeper, true);
434 463
435 write_sequnlock_irqrestore(&timekeeper.lock, flags); 464 write_sequnlock_irqrestore(&timekeeper.lock, flags);
436 465
@@ -455,14 +484,14 @@ static int change_clocksource(void *data)
455 484
456 write_seqlock_irqsave(&timekeeper.lock, flags); 485 write_seqlock_irqsave(&timekeeper.lock, flags);
457 486
458 timekeeping_forward_now(); 487 timekeeping_forward_now(&timekeeper);
459 if (!new->enable || new->enable(new) == 0) { 488 if (!new->enable || new->enable(new) == 0) {
460 old = timekeeper.clock; 489 old = timekeeper.clock;
461 timekeeper_setup_internals(new); 490 tk_setup_internals(&timekeeper, new);
462 if (old->disable) 491 if (old->disable)
463 old->disable(old); 492 old->disable(old);
464 } 493 }
465 timekeeping_update(true); 494 timekeeping_update(&timekeeper, true);
466 495
467 write_sequnlock_irqrestore(&timekeeper.lock, flags); 496 write_sequnlock_irqrestore(&timekeeper.lock, flags);
468 497
@@ -512,7 +541,7 @@ void getrawmonotonic(struct timespec *ts)
512 541
513 do { 542 do {
514 seq = read_seqbegin(&timekeeper.lock); 543 seq = read_seqbegin(&timekeeper.lock);
515 nsecs = timekeeping_get_ns_raw(); 544 nsecs = timekeeping_get_ns_raw(&timekeeper);
516 *ts = timekeeper.raw_time; 545 *ts = timekeeper.raw_time;
517 546
518 } while (read_seqretry(&timekeeper.lock, seq)); 547 } while (read_seqretry(&timekeeper.lock, seq));
@@ -547,6 +576,7 @@ u64 timekeeping_max_deferment(void)
547{ 576{
548 unsigned long seq; 577 unsigned long seq;
549 u64 ret; 578 u64 ret;
579
550 do { 580 do {
551 seq = read_seqbegin(&timekeeper.lock); 581 seq = read_seqbegin(&timekeeper.lock);
552 582
@@ -607,19 +637,17 @@ void __init timekeeping_init(void)
607 clock = clocksource_default_clock(); 637 clock = clocksource_default_clock();
608 if (clock->enable) 638 if (clock->enable)
609 clock->enable(clock); 639 clock->enable(clock);
610 timekeeper_setup_internals(clock); 640 tk_setup_internals(&timekeeper, clock);
611 641
612 timekeeper.xtime.tv_sec = now.tv_sec; 642 tk_set_xtime(&timekeeper, &now);
613 timekeeper.xtime.tv_nsec = now.tv_nsec;
614 timekeeper.raw_time.tv_sec = 0; 643 timekeeper.raw_time.tv_sec = 0;
615 timekeeper.raw_time.tv_nsec = 0; 644 timekeeper.raw_time.tv_nsec = 0;
616 if (boot.tv_sec == 0 && boot.tv_nsec == 0) { 645 if (boot.tv_sec == 0 && boot.tv_nsec == 0)
617 boot.tv_sec = timekeeper.xtime.tv_sec; 646 boot = tk_xtime(&timekeeper);
618 boot.tv_nsec = timekeeper.xtime.tv_nsec; 647
619 }
620 set_normalized_timespec(&timekeeper.wall_to_monotonic, 648 set_normalized_timespec(&timekeeper.wall_to_monotonic,
621 -boot.tv_sec, -boot.tv_nsec); 649 -boot.tv_sec, -boot.tv_nsec);
622 update_rt_offset(); 650 update_rt_offset(&timekeeper);
623 timekeeper.total_sleep_time.tv_sec = 0; 651 timekeeper.total_sleep_time.tv_sec = 0;
624 timekeeper.total_sleep_time.tv_nsec = 0; 652 timekeeper.total_sleep_time.tv_nsec = 0;
625 write_sequnlock_irqrestore(&timekeeper.lock, flags); 653 write_sequnlock_irqrestore(&timekeeper.lock, flags);
@@ -641,7 +669,8 @@ static void update_sleep_time(struct timespec t)
641 * Takes a timespec offset measuring a suspend interval and properly 669 * Takes a timespec offset measuring a suspend interval and properly
642 * adds the sleep offset to the timekeeping variables. 670 * adds the sleep offset to the timekeeping variables.
643 */ 671 */
644static void __timekeeping_inject_sleeptime(struct timespec *delta) 672static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
673 struct timespec *delta)
645{ 674{
646 if (!timespec_valid(delta)) { 675 if (!timespec_valid(delta)) {
647 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " 676 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
@@ -649,10 +678,9 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
649 return; 678 return;
650 } 679 }
651 680
652 timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); 681 tk_xtime_add(tk, delta);
653 timekeeper.wall_to_monotonic = 682 tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta);
654 timespec_sub(timekeeper.wall_to_monotonic, *delta); 683 update_sleep_time(timespec_add(tk->total_sleep_time, *delta));
655 update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta));
656} 684}
657 685
658 686
@@ -678,11 +706,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
678 706
679 write_seqlock_irqsave(&timekeeper.lock, flags); 707 write_seqlock_irqsave(&timekeeper.lock, flags);
680 708
681 timekeeping_forward_now(); 709 timekeeping_forward_now(&timekeeper);
682 710
683 __timekeeping_inject_sleeptime(delta); 711 __timekeeping_inject_sleeptime(&timekeeper, delta);
684 712
685 timekeeping_update(true); 713 timekeeping_update(&timekeeper, true);
686 714
687 write_sequnlock_irqrestore(&timekeeper.lock, flags); 715 write_sequnlock_irqrestore(&timekeeper.lock, flags);
688 716
@@ -711,13 +739,13 @@ static void timekeeping_resume(void)
711 739
712 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 740 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
713 ts = timespec_sub(ts, timekeeping_suspend_time); 741 ts = timespec_sub(ts, timekeeping_suspend_time);
714 __timekeeping_inject_sleeptime(&ts); 742 __timekeeping_inject_sleeptime(&timekeeper, &ts);
715 } 743 }
716 /* re-base the last cycle value */ 744 /* re-base the last cycle value */
717 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 745 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
718 timekeeper.ntp_error = 0; 746 timekeeper.ntp_error = 0;
719 timekeeping_suspended = 0; 747 timekeeping_suspended = 0;
720 timekeeping_update(false); 748 timekeeping_update(&timekeeper, false);
721 write_sequnlock_irqrestore(&timekeeper.lock, flags); 749 write_sequnlock_irqrestore(&timekeeper.lock, flags);
722 750
723 touch_softlockup_watchdog(); 751 touch_softlockup_watchdog();
@@ -737,7 +765,7 @@ static int timekeeping_suspend(void)
737 read_persistent_clock(&timekeeping_suspend_time); 765 read_persistent_clock(&timekeeping_suspend_time);
738 766
739 write_seqlock_irqsave(&timekeeper.lock, flags); 767 write_seqlock_irqsave(&timekeeper.lock, flags);
740 timekeeping_forward_now(); 768 timekeeping_forward_now(&timekeeper);
741 timekeeping_suspended = 1; 769 timekeeping_suspended = 1;
742 770
743 /* 771 /*
@@ -746,7 +774,7 @@ static int timekeeping_suspend(void)
746 * try to compensate so the difference in system time 774 * try to compensate so the difference in system time
747 * and persistent_clock time stays close to constant. 775 * and persistent_clock time stays close to constant.
748 */ 776 */
749 delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time); 777 delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time);
750 delta_delta = timespec_sub(delta, old_delta); 778 delta_delta = timespec_sub(delta, old_delta);
751 if (abs(delta_delta.tv_sec) >= 2) { 779 if (abs(delta_delta.tv_sec) >= 2) {
752 /* 780 /*
@@ -785,7 +813,8 @@ device_initcall(timekeeping_init_ops);
785 * If the error is already larger, we look ahead even further 813 * If the error is already larger, we look ahead even further
786 * to compensate for late or lost adjustments. 814 * to compensate for late or lost adjustments.
787 */ 815 */
788static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, 816static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
817 s64 error, s64 *interval,
789 s64 *offset) 818 s64 *offset)
790{ 819{
791 s64 tick_error, i; 820 s64 tick_error, i;
@@ -801,7 +830,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
801 * here. This is tuned so that an error of about 1 msec is adjusted 830 * here. This is tuned so that an error of about 1 msec is adjusted
802 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). 831 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
803 */ 832 */
804 error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); 833 error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
805 error2 = abs(error2); 834 error2 = abs(error2);
806 for (look_ahead = 0; error2 > 0; look_ahead++) 835 for (look_ahead = 0; error2 > 0; look_ahead++)
807 error2 >>= 2; 836 error2 >>= 2;
@@ -810,8 +839,8 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
810 * Now calculate the error in (1 << look_ahead) ticks, but first 839 * Now calculate the error in (1 << look_ahead) ticks, but first
811 * remove the single look ahead already included in the error. 840 * remove the single look ahead already included in the error.
812 */ 841 */
813 tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1); 842 tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
814 tick_error -= timekeeper.xtime_interval >> 1; 843 tick_error -= tk->xtime_interval >> 1;
815 error = ((error - tick_error) >> look_ahead) + tick_error; 844 error = ((error - tick_error) >> look_ahead) + tick_error;
816 845
817 /* Finally calculate the adjustment shift value. */ 846 /* Finally calculate the adjustment shift value. */
@@ -836,9 +865,9 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
836 * this is optimized for the most common adjustments of -1,0,1, 865 * this is optimized for the most common adjustments of -1,0,1,
837 * for other values we can do a bit more work. 866 * for other values we can do a bit more work.
838 */ 867 */
839static void timekeeping_adjust(s64 offset) 868static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
840{ 869{
841 s64 error, interval = timekeeper.cycle_interval; 870 s64 error, interval = tk->cycle_interval;
842 int adj; 871 int adj;
843 872
844 /* 873 /*
@@ -854,7 +883,7 @@ static void timekeeping_adjust(s64 offset)
854 * 883 *
855 * Note: It does not "save" on aggravation when reading the code. 884 * Note: It does not "save" on aggravation when reading the code.
856 */ 885 */
857 error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); 886 error = tk->ntp_error >> (tk->ntp_error_shift - 1);
858 if (error > interval) { 887 if (error > interval) {
859 /* 888 /*
860 * We now divide error by 4(via shift), which checks if 889 * We now divide error by 4(via shift), which checks if
@@ -876,7 +905,8 @@ static void timekeeping_adjust(s64 offset)
876 if (likely(error <= interval)) 905 if (likely(error <= interval))
877 adj = 1; 906 adj = 1;
878 else 907 else
879 adj = timekeeping_bigadjust(error, &interval, &offset); 908 adj = timekeeping_bigadjust(tk, error, &interval,
909 &offset);
880 } else if (error < -interval) { 910 } else if (error < -interval) {
881 /* See comment above, this is just switched for the negative */ 911 /* See comment above, this is just switched for the negative */
882 error >>= 2; 912 error >>= 2;
@@ -885,18 +915,17 @@ static void timekeeping_adjust(s64 offset)
885 interval = -interval; 915 interval = -interval;
886 offset = -offset; 916 offset = -offset;
887 } else 917 } else
888 adj = timekeeping_bigadjust(error, &interval, &offset); 918 adj = timekeeping_bigadjust(tk, error, &interval,
889 } else /* No adjustment needed */ 919 &offset);
920 } else
890 return; 921 return;
891 922
892 if (unlikely(timekeeper.clock->maxadj && 923 if (unlikely(tk->clock->maxadj &&
893 (timekeeper.mult + adj > 924 (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
894 timekeeper.clock->mult + timekeeper.clock->maxadj))) {
895 printk_once(KERN_WARNING 925 printk_once(KERN_WARNING
896 "Adjusting %s more than 11%% (%ld vs %ld)\n", 926 "Adjusting %s more than 11%% (%ld vs %ld)\n",
897 timekeeper.clock->name, (long)timekeeper.mult + adj, 927 tk->clock->name, (long)tk->mult + adj,
898 (long)timekeeper.clock->mult + 928 (long)tk->clock->mult + tk->clock->maxadj);
899 timekeeper.clock->maxadj);
900 } 929 }
901 /* 930 /*
902 * So the following can be confusing. 931 * So the following can be confusing.
@@ -947,11 +976,60 @@ static void timekeeping_adjust(s64 offset)
947 * 976 *
948 * XXX - TODO: Doc ntp_error calculation. 977 * XXX - TODO: Doc ntp_error calculation.
949 */ 978 */
950 timekeeper.mult += adj; 979 tk->mult += adj;
951 timekeeper.xtime_interval += interval; 980 tk->xtime_interval += interval;
952 timekeeper.xtime_nsec -= offset; 981 tk->xtime_nsec -= offset;
953 timekeeper.ntp_error -= (interval - offset) << 982 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
954 timekeeper.ntp_error_shift; 983
984 /*
985 * It may be possible that when we entered this function, xtime_nsec
986 * was very small. Further, if we're slightly speeding the clocksource
987 * in the code above, its possible the required corrective factor to
988 * xtime_nsec could cause it to underflow.
989 *
990 * Now, since we already accumulated the second, cannot simply roll
991 * the accumulated second back, since the NTP subsystem has been
992 * notified via second_overflow. So instead we push xtime_nsec forward
993 * by the amount we underflowed, and add that amount into the error.
994 *
995 * We'll correct this error next time through this function, when
996 * xtime_nsec is not as small.
997 */
998 if (unlikely((s64)tk->xtime_nsec < 0)) {
999 s64 neg = -(s64)tk->xtime_nsec;
1000 tk->xtime_nsec = 0;
1001 tk->ntp_error += neg << tk->ntp_error_shift;
1002 }
1003
1004}
1005
1006
1007/**
1008 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
1009 *
1010 * Helper function that accumulates a the nsecs greater then a second
1011 * from the xtime_nsec field to the xtime_secs field.
1012 * It also calls into the NTP code to handle leapsecond processing.
1013 *
1014 */
1015static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1016{
1017 u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
1018
1019 while (tk->xtime_nsec >= nsecps) {
1020 int leap;
1021
1022 tk->xtime_nsec -= nsecps;
1023 tk->xtime_sec++;
1024
1025 /* Figure out if its a leap sec and apply if needed */
1026 leap = second_overflow(tk->xtime_sec);
1027 tk->xtime_sec += leap;
1028 tk->wall_to_monotonic.tv_sec -= leap;
1029 if (leap)
1030 clock_was_set_delayed();
1031
1032 }
955} 1033}
956 1034
957 1035
@@ -964,46 +1042,36 @@ static void timekeeping_adjust(s64 offset)
964 * 1042 *
965 * Returns the unconsumed cycles. 1043 * Returns the unconsumed cycles.
966 */ 1044 */
967static cycle_t logarithmic_accumulation(cycle_t offset, int shift) 1045static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1046 u32 shift)
968{ 1047{
969 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
970 u64 raw_nsecs; 1048 u64 raw_nsecs;
971 1049
972 /* If the offset is smaller than a shifted interval, do nothing */ 1050 /* If the offset is smaller then a shifted interval, do nothing */
973 if (offset < timekeeper.cycle_interval<<shift) 1051 if (offset < tk->cycle_interval<<shift)
974 return offset; 1052 return offset;
975 1053
976 /* Accumulate one shifted interval */ 1054 /* Accumulate one shifted interval */
977 offset -= timekeeper.cycle_interval << shift; 1055 offset -= tk->cycle_interval << shift;
978 timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; 1056 tk->clock->cycle_last += tk->cycle_interval << shift;
979 1057
980 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; 1058 tk->xtime_nsec += tk->xtime_interval << shift;
981 while (timekeeper.xtime_nsec >= nsecps) { 1059 accumulate_nsecs_to_secs(tk);
982 int leap;
983 timekeeper.xtime_nsec -= nsecps;
984 timekeeper.xtime.tv_sec++;
985 leap = second_overflow(timekeeper.xtime.tv_sec);
986 timekeeper.xtime.tv_sec += leap;
987 timekeeper.wall_to_monotonic.tv_sec -= leap;
988 if (leap)
989 clock_was_set_delayed();
990 }
991 1060
992 /* Accumulate raw time */ 1061 /* Accumulate raw time */
993 raw_nsecs = timekeeper.raw_interval << shift; 1062 raw_nsecs = tk->raw_interval << shift;
994 raw_nsecs += timekeeper.raw_time.tv_nsec; 1063 raw_nsecs += tk->raw_time.tv_nsec;
995 if (raw_nsecs >= NSEC_PER_SEC) { 1064 if (raw_nsecs >= NSEC_PER_SEC) {
996 u64 raw_secs = raw_nsecs; 1065 u64 raw_secs = raw_nsecs;
997 raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); 1066 raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
998 timekeeper.raw_time.tv_sec += raw_secs; 1067 tk->raw_time.tv_sec += raw_secs;
999 } 1068 }
1000 timekeeper.raw_time.tv_nsec = raw_nsecs; 1069 tk->raw_time.tv_nsec = raw_nsecs;
1001 1070
1002 /* Accumulate error between NTP and clock interval */ 1071 /* Accumulate error between NTP and clock interval */
1003 timekeeper.ntp_error += ntp_tick_length() << shift; 1072 tk->ntp_error += ntp_tick_length() << shift;
1004 timekeeper.ntp_error -= 1073 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
1005 (timekeeper.xtime_interval + timekeeper.xtime_remainder) << 1074 (tk->ntp_error_shift + shift);
1006 (timekeeper.ntp_error_shift + shift);
1007 1075
1008 return offset; 1076 return offset;
1009} 1077}
@@ -1019,6 +1087,7 @@ static void update_wall_time(void)
1019 cycle_t offset; 1087 cycle_t offset;
1020 int shift = 0, maxshift; 1088 int shift = 0, maxshift;
1021 unsigned long flags; 1089 unsigned long flags;
1090 s64 remainder;
1022 1091
1023 write_seqlock_irqsave(&timekeeper.lock, flags); 1092 write_seqlock_irqsave(&timekeeper.lock, flags);
1024 1093
@@ -1033,8 +1102,6 @@ static void update_wall_time(void)
1033#else 1102#else
1034 offset = (clock->read(clock) - clock->cycle_last) & clock->mask; 1103 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1035#endif 1104#endif
1036 timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec <<
1037 timekeeper.shift;
1038 1105
1039 /* 1106 /*
1040 * With NO_HZ we may have to accumulate many cycle_intervals 1107 * With NO_HZ we may have to accumulate many cycle_intervals
@@ -1050,64 +1117,36 @@ static void update_wall_time(void)
1050 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 1117 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
1051 shift = min(shift, maxshift); 1118 shift = min(shift, maxshift);
1052 while (offset >= timekeeper.cycle_interval) { 1119 while (offset >= timekeeper.cycle_interval) {
1053 offset = logarithmic_accumulation(offset, shift); 1120 offset = logarithmic_accumulation(&timekeeper, offset, shift);
1054 if(offset < timekeeper.cycle_interval<<shift) 1121 if(offset < timekeeper.cycle_interval<<shift)
1055 shift--; 1122 shift--;
1056 } 1123 }
1057 1124
1058 /* correct the clock when NTP error is too big */ 1125 /* correct the clock when NTP error is too big */
1059 timekeeping_adjust(offset); 1126 timekeeping_adjust(&timekeeper, offset);
1060
1061 /*
1062 * Since in the loop above, we accumulate any amount of time
1063 * in xtime_nsec over a second into xtime.tv_sec, its possible for
1064 * xtime_nsec to be fairly small after the loop. Further, if we're
1065 * slightly speeding the clocksource up in timekeeping_adjust(),
1066 * its possible the required corrective factor to xtime_nsec could
1067 * cause it to underflow.
1068 *
1069 * Now, we cannot simply roll the accumulated second back, since
1070 * the NTP subsystem has been notified via second_overflow. So
1071 * instead we push xtime_nsec forward by the amount we underflowed,
1072 * and add that amount into the error.
1073 *
1074 * We'll correct this error next time through this function, when
1075 * xtime_nsec is not as small.
1076 */
1077 if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
1078 s64 neg = -(s64)timekeeper.xtime_nsec;
1079 timekeeper.xtime_nsec = 0;
1080 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
1081 }
1082 1127
1083 1128
1084 /* 1129 /*
1085 * Store full nanoseconds into xtime after rounding it up and 1130 * Store only full nanoseconds into xtime_nsec after rounding
1086 * add the remainder to the error difference. 1131 * it up and add the remainder to the error difference.
1087 */ 1132 * XXX - This is necessary to avoid small 1ns inconsistnecies caused
1088 timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> 1133 * by truncating the remainder in vsyscalls. However, it causes
1089 timekeeper.shift) + 1; 1134 * additional work to be done in timekeeping_adjust(). Once
1090 timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec << 1135 * the vsyscall implementations are converted to use xtime_nsec
1091 timekeeper.shift; 1136 * (shifted nanoseconds), this can be killed.
1092 timekeeper.ntp_error += timekeeper.xtime_nsec << 1137 */
1093 timekeeper.ntp_error_shift; 1138 remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1);
1139 timekeeper.xtime_nsec -= remainder;
1140 timekeeper.xtime_nsec += 1 << timekeeper.shift;
1141 timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift;
1094 1142
1095 /* 1143 /*
1096 * Finally, make sure that after the rounding 1144 * Finally, make sure that after the rounding
1097 * xtime.tv_nsec isn't larger than NSEC_PER_SEC 1145 * xtime_nsec isn't larger than NSEC_PER_SEC
1098 */ 1146 */
1099 if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { 1147 accumulate_nsecs_to_secs(&timekeeper);
1100 int leap;
1101 timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
1102 timekeeper.xtime.tv_sec++;
1103 leap = second_overflow(timekeeper.xtime.tv_sec);
1104 timekeeper.xtime.tv_sec += leap;
1105 timekeeper.wall_to_monotonic.tv_sec -= leap;
1106 if (leap)
1107 clock_was_set_delayed();
1108 }
1109 1148
1110 timekeeping_update(false); 1149 timekeeping_update(&timekeeper, false);
1111 1150
1112out: 1151out:
1113 write_sequnlock_irqrestore(&timekeeper.lock, flags); 1152 write_sequnlock_irqrestore(&timekeeper.lock, flags);
@@ -1152,21 +1191,20 @@ void get_monotonic_boottime(struct timespec *ts)
1152{ 1191{
1153 struct timespec tomono, sleep; 1192 struct timespec tomono, sleep;
1154 unsigned int seq; 1193 unsigned int seq;
1155 s64 nsecs;
1156 1194
1157 WARN_ON(timekeeping_suspended); 1195 WARN_ON(timekeeping_suspended);
1158 1196
1159 do { 1197 do {
1160 seq = read_seqbegin(&timekeeper.lock); 1198 seq = read_seqbegin(&timekeeper.lock);
1161 *ts = timekeeper.xtime; 1199 ts->tv_sec = timekeeper.xtime_sec;
1200 ts->tv_nsec = timekeeping_get_ns(&timekeeper);
1162 tomono = timekeeper.wall_to_monotonic; 1201 tomono = timekeeper.wall_to_monotonic;
1163 sleep = timekeeper.total_sleep_time; 1202 sleep = timekeeper.total_sleep_time;
1164 nsecs = timekeeping_get_ns();
1165 1203
1166 } while (read_seqretry(&timekeeper.lock, seq)); 1204 } while (read_seqretry(&timekeeper.lock, seq));
1167 1205
1168 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, 1206 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
1169 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs); 1207 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec);
1170} 1208}
1171EXPORT_SYMBOL_GPL(get_monotonic_boottime); 1209EXPORT_SYMBOL_GPL(get_monotonic_boottime);
1172 1210
@@ -1199,13 +1237,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
1199 1237
1200unsigned long get_seconds(void) 1238unsigned long get_seconds(void)
1201{ 1239{
1202 return timekeeper.xtime.tv_sec; 1240 return timekeeper.xtime_sec;
1203} 1241}
1204EXPORT_SYMBOL(get_seconds); 1242EXPORT_SYMBOL(get_seconds);
1205 1243
1206struct timespec __current_kernel_time(void) 1244struct timespec __current_kernel_time(void)
1207{ 1245{
1208 return timekeeper.xtime; 1246 return tk_xtime(&timekeeper);
1209} 1247}
1210 1248
1211struct timespec current_kernel_time(void) 1249struct timespec current_kernel_time(void)
@@ -1216,7 +1254,7 @@ struct timespec current_kernel_time(void)
1216 do { 1254 do {
1217 seq = read_seqbegin(&timekeeper.lock); 1255 seq = read_seqbegin(&timekeeper.lock);
1218 1256
1219 now = timekeeper.xtime; 1257 now = tk_xtime(&timekeeper);
1220 } while (read_seqretry(&timekeeper.lock, seq)); 1258 } while (read_seqretry(&timekeeper.lock, seq));
1221 1259
1222 return now; 1260 return now;
@@ -1231,7 +1269,7 @@ struct timespec get_monotonic_coarse(void)
1231 do { 1269 do {
1232 seq = read_seqbegin(&timekeeper.lock); 1270 seq = read_seqbegin(&timekeeper.lock);
1233 1271
1234 now = timekeeper.xtime; 1272 now = tk_xtime(&timekeeper);
1235 mono = timekeeper.wall_to_monotonic; 1273 mono = timekeeper.wall_to_monotonic;
1236 } while (read_seqretry(&timekeeper.lock, seq)); 1274 } while (read_seqretry(&timekeeper.lock, seq));
1237 1275
@@ -1266,7 +1304,7 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1266 1304
1267 do { 1305 do {
1268 seq = read_seqbegin(&timekeeper.lock); 1306 seq = read_seqbegin(&timekeeper.lock);
1269 *xtim = timekeeper.xtime; 1307 *xtim = tk_xtime(&timekeeper);
1270 *wtom = timekeeper.wall_to_monotonic; 1308 *wtom = timekeeper.wall_to_monotonic;
1271 *sleep = timekeeper.total_sleep_time; 1309 *sleep = timekeeper.total_sleep_time;
1272 } while (read_seqretry(&timekeeper.lock, seq)); 1310 } while (read_seqretry(&timekeeper.lock, seq));
@@ -1290,11 +1328,8 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1290 do { 1328 do {
1291 seq = read_seqbegin(&timekeeper.lock); 1329 seq = read_seqbegin(&timekeeper.lock);
1292 1330
1293 secs = timekeeper.xtime.tv_sec; 1331 secs = timekeeper.xtime_sec;
1294 nsecs = timekeeper.xtime.tv_nsec; 1332 nsecs = timekeeping_get_ns(&timekeeper);
1295 nsecs += timekeeping_get_ns();
1296 /* If arch requires, add in gettimeoffset() */
1297 nsecs += arch_gettimeoffset();
1298 1333
1299 *offs_real = timekeeper.offs_real; 1334 *offs_real = timekeeper.offs_real;
1300 *offs_boot = timekeeper.offs_boot; 1335 *offs_boot = timekeeper.offs_boot;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 3258455549f4..af5a7e9f164b 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
167 { 167 {
168 struct tick_sched *ts = tick_get_tick_sched(cpu); 168 struct tick_sched *ts = tick_get_tick_sched(cpu);
169 P(nohz_mode); 169 P(nohz_mode);
170 P_ns(idle_tick); 170 P_ns(last_tick);
171 P(tick_stopped); 171 P(tick_stopped);
172 P(idle_jiffies); 172 P(idle_jiffies);
173 P(idle_calls); 173 P(idle_calls);
@@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v)
259 u64 now = ktime_to_ns(ktime_get()); 259 u64 now = ktime_to_ns(ktime_get());
260 int cpu; 260 int cpu;
261 261
262 SEQ_printf(m, "Timer List Version: v0.6\n"); 262 SEQ_printf(m, "Timer List Version: v0.7\n");
263 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 263 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
264 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 264 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
265 265
diff --git a/kernel/timer.c b/kernel/timer.c
index 6ec7e7e0db43..a61c09374eba 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -77,6 +77,7 @@ struct tvec_base {
77 struct timer_list *running_timer; 77 struct timer_list *running_timer;
78 unsigned long timer_jiffies; 78 unsigned long timer_jiffies;
79 unsigned long next_timer; 79 unsigned long next_timer;
80 unsigned long active_timers;
80 struct tvec_root tv1; 81 struct tvec_root tv1;
81 struct tvec tv2; 82 struct tvec tv2;
82 struct tvec tv3; 83 struct tvec tv3;
@@ -330,7 +331,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
330} 331}
331EXPORT_SYMBOL_GPL(set_timer_slack); 332EXPORT_SYMBOL_GPL(set_timer_slack);
332 333
333static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 334static void
335__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
334{ 336{
335 unsigned long expires = timer->expires; 337 unsigned long expires = timer->expires;
336 unsigned long idx = expires - base->timer_jiffies; 338 unsigned long idx = expires - base->timer_jiffies;
@@ -372,6 +374,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
372 list_add_tail(&timer->entry, vec); 374 list_add_tail(&timer->entry, vec);
373} 375}
374 376
377static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
378{
379 __internal_add_timer(base, timer);
380 /*
381 * Update base->active_timers and base->next_timer
382 */
383 if (!tbase_get_deferrable(timer->base)) {
384 if (time_before(timer->expires, base->next_timer))
385 base->next_timer = timer->expires;
386 base->active_timers++;
387 }
388}
389
375#ifdef CONFIG_TIMER_STATS 390#ifdef CONFIG_TIMER_STATS
376void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) 391void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
377{ 392{
@@ -654,8 +669,7 @@ void init_timer_deferrable_key(struct timer_list *timer,
654} 669}
655EXPORT_SYMBOL(init_timer_deferrable_key); 670EXPORT_SYMBOL(init_timer_deferrable_key);
656 671
657static inline void detach_timer(struct timer_list *timer, 672static inline void detach_timer(struct timer_list *timer, bool clear_pending)
658 int clear_pending)
659{ 673{
660 struct list_head *entry = &timer->entry; 674 struct list_head *entry = &timer->entry;
661 675
@@ -667,6 +681,29 @@ static inline void detach_timer(struct timer_list *timer,
667 entry->prev = LIST_POISON2; 681 entry->prev = LIST_POISON2;
668} 682}
669 683
684static inline void
685detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
686{
687 detach_timer(timer, true);
688 if (!tbase_get_deferrable(timer->base))
689 timer->base->active_timers--;
690}
691
692static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
693 bool clear_pending)
694{
695 if (!timer_pending(timer))
696 return 0;
697
698 detach_timer(timer, clear_pending);
699 if (!tbase_get_deferrable(timer->base)) {
700 timer->base->active_timers--;
701 if (timer->expires == base->next_timer)
702 base->next_timer = base->timer_jiffies;
703 }
704 return 1;
705}
706
670/* 707/*
671 * We are using hashed locking: holding per_cpu(tvec_bases).lock 708 * We are using hashed locking: holding per_cpu(tvec_bases).lock
672 * means that all timers which are tied to this base via timer->base are 709 * means that all timers which are tied to this base via timer->base are
@@ -712,16 +749,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
712 749
713 base = lock_timer_base(timer, &flags); 750 base = lock_timer_base(timer, &flags);
714 751
715 if (timer_pending(timer)) { 752 ret = detach_if_pending(timer, base, false);
716 detach_timer(timer, 0); 753 if (!ret && pending_only)
717 if (timer->expires == base->next_timer && 754 goto out_unlock;
718 !tbase_get_deferrable(timer->base))
719 base->next_timer = base->timer_jiffies;
720 ret = 1;
721 } else {
722 if (pending_only)
723 goto out_unlock;
724 }
725 755
726 debug_activate(timer, expires); 756 debug_activate(timer, expires);
727 757
@@ -752,9 +782,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
752 } 782 }
753 783
754 timer->expires = expires; 784 timer->expires = expires;
755 if (time_before(timer->expires, base->next_timer) &&
756 !tbase_get_deferrable(timer->base))
757 base->next_timer = timer->expires;
758 internal_add_timer(base, timer); 785 internal_add_timer(base, timer);
759 786
760out_unlock: 787out_unlock:
@@ -920,9 +947,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
920 spin_lock_irqsave(&base->lock, flags); 947 spin_lock_irqsave(&base->lock, flags);
921 timer_set_base(timer, base); 948 timer_set_base(timer, base);
922 debug_activate(timer, timer->expires); 949 debug_activate(timer, timer->expires);
923 if (time_before(timer->expires, base->next_timer) &&
924 !tbase_get_deferrable(timer->base))
925 base->next_timer = timer->expires;
926 internal_add_timer(base, timer); 950 internal_add_timer(base, timer);
927 /* 951 /*
928 * Check whether the other CPU is idle and needs to be 952 * Check whether the other CPU is idle and needs to be
@@ -959,13 +983,7 @@ int del_timer(struct timer_list *timer)
959 timer_stats_timer_clear_start_info(timer); 983 timer_stats_timer_clear_start_info(timer);
960 if (timer_pending(timer)) { 984 if (timer_pending(timer)) {
961 base = lock_timer_base(timer, &flags); 985 base = lock_timer_base(timer, &flags);
962 if (timer_pending(timer)) { 986 ret = detach_if_pending(timer, base, true);
963 detach_timer(timer, 1);
964 if (timer->expires == base->next_timer &&
965 !tbase_get_deferrable(timer->base))
966 base->next_timer = base->timer_jiffies;
967 ret = 1;
968 }
969 spin_unlock_irqrestore(&base->lock, flags); 987 spin_unlock_irqrestore(&base->lock, flags);
970 } 988 }
971 989
@@ -990,19 +1008,10 @@ int try_to_del_timer_sync(struct timer_list *timer)
990 1008
991 base = lock_timer_base(timer, &flags); 1009 base = lock_timer_base(timer, &flags);
992 1010
993 if (base->running_timer == timer) 1011 if (base->running_timer != timer) {
994 goto out; 1012 timer_stats_timer_clear_start_info(timer);
995 1013 ret = detach_if_pending(timer, base, true);
996 timer_stats_timer_clear_start_info(timer);
997 ret = 0;
998 if (timer_pending(timer)) {
999 detach_timer(timer, 1);
1000 if (timer->expires == base->next_timer &&
1001 !tbase_get_deferrable(timer->base))
1002 base->next_timer = base->timer_jiffies;
1003 ret = 1;
1004 } 1014 }
1005out:
1006 spin_unlock_irqrestore(&base->lock, flags); 1015 spin_unlock_irqrestore(&base->lock, flags);
1007 1016
1008 return ret; 1017 return ret;
@@ -1089,7 +1098,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1089 */ 1098 */
1090 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1099 list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
1091 BUG_ON(tbase_get_base(timer->base) != base); 1100 BUG_ON(tbase_get_base(timer->base) != base);
1092 internal_add_timer(base, timer); 1101 /* No accounting, while moving them */
1102 __internal_add_timer(base, timer);
1093 } 1103 }
1094 1104
1095 return index; 1105 return index;
@@ -1178,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base)
1178 timer_stats_account_timer(timer); 1188 timer_stats_account_timer(timer);
1179 1189
1180 base->running_timer = timer; 1190 base->running_timer = timer;
1181 detach_timer(timer, 1); 1191 detach_expired_timer(timer, base);
1182 1192
1183 spin_unlock_irq(&base->lock); 1193 spin_unlock_irq(&base->lock);
1184 call_timer_fn(timer, fn, data); 1194 call_timer_fn(timer, fn, data);
@@ -1316,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
1316unsigned long get_next_timer_interrupt(unsigned long now) 1326unsigned long get_next_timer_interrupt(unsigned long now)
1317{ 1327{
1318 struct tvec_base *base = __this_cpu_read(tvec_bases); 1328 struct tvec_base *base = __this_cpu_read(tvec_bases);
1319 unsigned long expires; 1329 unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
1320 1330
1321 /* 1331 /*
1322 * Pretend that there is no timer pending if the cpu is offline. 1332 * Pretend that there is no timer pending if the cpu is offline.
1323 * Possible pending timers will be migrated later to an active cpu. 1333 * Possible pending timers will be migrated later to an active cpu.
1324 */ 1334 */
1325 if (cpu_is_offline(smp_processor_id())) 1335 if (cpu_is_offline(smp_processor_id()))
1326 return now + NEXT_TIMER_MAX_DELTA; 1336 return expires;
1337
1327 spin_lock(&base->lock); 1338 spin_lock(&base->lock);
1328 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1339 if (base->active_timers) {
1329 base->next_timer = __next_timer_interrupt(base); 1340 if (time_before_eq(base->next_timer, base->timer_jiffies))
1330 expires = base->next_timer; 1341 base->next_timer = __next_timer_interrupt(base);
1342 expires = base->next_timer;
1343 }
1331 spin_unlock(&base->lock); 1344 spin_unlock(&base->lock);
1332 1345
1333 if (time_before_eq(expires, now)) 1346 if (time_before_eq(expires, now))
@@ -1704,6 +1717,7 @@ static int __cpuinit init_timers_cpu(int cpu)
1704 1717
1705 base->timer_jiffies = jiffies; 1718 base->timer_jiffies = jiffies;
1706 base->next_timer = base->timer_jiffies; 1719 base->next_timer = base->timer_jiffies;
1720 base->active_timers = 0;
1707 return 0; 1721 return 0;
1708} 1722}
1709 1723
@@ -1714,11 +1728,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
1714 1728
1715 while (!list_empty(head)) { 1729 while (!list_empty(head)) {
1716 timer = list_first_entry(head, struct timer_list, entry); 1730 timer = list_first_entry(head, struct timer_list, entry);
1717 detach_timer(timer, 0); 1731 /* We ignore the accounting on the dying cpu */
1732 detach_timer(timer, false);
1718 timer_set_base(timer, new_base); 1733 timer_set_base(timer, new_base);
1719 if (time_before(timer->expires, new_base->next_timer) &&
1720 !tbase_get_deferrable(timer->base))
1721 new_base->next_timer = timer->expires;
1722 internal_add_timer(new_base, timer); 1734 internal_add_timer(new_base, timer);
1723 } 1735 }
1724} 1736}