diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-22 14:35:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-22 14:35:46 -0400 |
commit | 3992c0321258bdff3666cbaf5225f538ad61a548 (patch) | |
tree | 42c98bcf601237b07ceac34b5bdb0b37558280dc /kernel | |
parent | 55acdddbac1725b80df0c41970505e8a41c84956 (diff) | |
parent | eec19d1a0d04c80e66eef634f7b8f460f2ca5643 (diff) |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer core changes from Ingo Molnar:
"Continued cleanups of the core time and NTP code, plus more nohz work
preparing for tick-less userspace execution."
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
time: Rework timekeeping functions to take timekeeper ptr as argument
time: Move xtime_nsec adjustment underflow handling timekeeping_adjust
time: Move arch_gettimeoffset() usage into timekeeping_get_ns()
time: Refactor accumulation of nsecs to secs
time: Condense timekeeper.xtime into xtime_sec
time: Explicitly use u32 instead of int for shift values
time: Whitespace cleanups per Ingo%27s requests
nohz: Move next idle expiry time record into idle logic area
nohz: Move ts->idle_calls incrementation into strict idle logic
nohz: Rename ts->idle_tick to ts->last_tick
nohz: Make nohz API agnostic against idle ticks cputime accounting
nohz: Separate idle sleeping time accounting from nohz logic
timers: Improve get_next_timer_interrupt()
timers: Add accounting of non deferrable timers
timers: Consolidate base->next_timer update
timers: Create detach_if_pending() and use it
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/time/tick-sched.c | 192 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 487 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 4 | ||||
-rw-r--r-- | kernel/timer.c | 110 |
4 files changed, 432 insertions, 361 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 45b17aea79ef..024540f97f74 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -271,50 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | |||
271 | } | 271 | } |
272 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 272 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
273 | 273 | ||
274 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | 274 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, |
275 | ktime_t now, int cpu) | ||
275 | { | 276 | { |
276 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 277 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
278 | ktime_t last_update, expires, ret = { .tv64 = 0 }; | ||
277 | unsigned long rcu_delta_jiffies; | 279 | unsigned long rcu_delta_jiffies; |
278 | ktime_t last_update, expires, now; | ||
279 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 280 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
280 | u64 time_delta; | 281 | u64 time_delta; |
281 | int cpu; | ||
282 | |||
283 | cpu = smp_processor_id(); | ||
284 | ts = &per_cpu(tick_cpu_sched, cpu); | ||
285 | |||
286 | now = tick_nohz_start_idle(cpu, ts); | ||
287 | |||
288 | /* | ||
289 | * If this cpu is offline and it is the one which updates | ||
290 | * jiffies, then give up the assignment and let it be taken by | ||
291 | * the cpu which runs the tick timer next. If we don't drop | ||
292 | * this here the jiffies might be stale and do_timer() never | ||
293 | * invoked. | ||
294 | */ | ||
295 | if (unlikely(!cpu_online(cpu))) { | ||
296 | if (cpu == tick_do_timer_cpu) | ||
297 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
298 | } | ||
299 | |||
300 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
301 | return; | ||
302 | 282 | ||
303 | if (need_resched()) | ||
304 | return; | ||
305 | |||
306 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | ||
307 | static int ratelimit; | ||
308 | |||
309 | if (ratelimit < 10) { | ||
310 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
311 | (unsigned int) local_softirq_pending()); | ||
312 | ratelimit++; | ||
313 | } | ||
314 | return; | ||
315 | } | ||
316 | |||
317 | ts->idle_calls++; | ||
318 | /* Read jiffies and the time when jiffies were updated last */ | 283 | /* Read jiffies and the time when jiffies were updated last */ |
319 | do { | 284 | do { |
320 | seq = read_seqbegin(&xtime_lock); | 285 | seq = read_seqbegin(&xtime_lock); |
@@ -397,6 +362,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
397 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 362 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
398 | goto out; | 363 | goto out; |
399 | 364 | ||
365 | ret = expires; | ||
366 | |||
400 | /* | 367 | /* |
401 | * nohz_stop_sched_tick can be called several times before | 368 | * nohz_stop_sched_tick can be called several times before |
402 | * the nohz_restart_sched_tick is called. This happens when | 369 | * the nohz_restart_sched_tick is called. This happens when |
@@ -408,16 +375,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
408 | select_nohz_load_balancer(1); | 375 | select_nohz_load_balancer(1); |
409 | calc_load_enter_idle(); | 376 | calc_load_enter_idle(); |
410 | 377 | ||
411 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); | 378 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
412 | ts->tick_stopped = 1; | 379 | ts->tick_stopped = 1; |
413 | ts->idle_jiffies = last_jiffies; | ||
414 | } | 380 | } |
415 | 381 | ||
416 | ts->idle_sleeps++; | ||
417 | |||
418 | /* Mark expires */ | ||
419 | ts->idle_expires = expires; | ||
420 | |||
421 | /* | 382 | /* |
422 | * If the expiration time == KTIME_MAX, then | 383 | * If the expiration time == KTIME_MAX, then |
423 | * in this case we simply stop the tick timer. | 384 | * in this case we simply stop the tick timer. |
@@ -448,6 +409,65 @@ out: | |||
448 | ts->next_jiffies = next_jiffies; | 409 | ts->next_jiffies = next_jiffies; |
449 | ts->last_jiffies = last_jiffies; | 410 | ts->last_jiffies = last_jiffies; |
450 | ts->sleep_length = ktime_sub(dev->next_event, now); | 411 | ts->sleep_length = ktime_sub(dev->next_event, now); |
412 | |||
413 | return ret; | ||
414 | } | ||
415 | |||
416 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | ||
417 | { | ||
418 | /* | ||
419 | * If this cpu is offline and it is the one which updates | ||
420 | * jiffies, then give up the assignment and let it be taken by | ||
421 | * the cpu which runs the tick timer next. If we don't drop | ||
422 | * this here the jiffies might be stale and do_timer() never | ||
423 | * invoked. | ||
424 | */ | ||
425 | if (unlikely(!cpu_online(cpu))) { | ||
426 | if (cpu == tick_do_timer_cpu) | ||
427 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
428 | } | ||
429 | |||
430 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
431 | return false; | ||
432 | |||
433 | if (need_resched()) | ||
434 | return false; | ||
435 | |||
436 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | ||
437 | static int ratelimit; | ||
438 | |||
439 | if (ratelimit < 10) { | ||
440 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
441 | (unsigned int) local_softirq_pending()); | ||
442 | ratelimit++; | ||
443 | } | ||
444 | return false; | ||
445 | } | ||
446 | |||
447 | return true; | ||
448 | } | ||
449 | |||
450 | static void __tick_nohz_idle_enter(struct tick_sched *ts) | ||
451 | { | ||
452 | ktime_t now, expires; | ||
453 | int cpu = smp_processor_id(); | ||
454 | |||
455 | now = tick_nohz_start_idle(cpu, ts); | ||
456 | |||
457 | if (can_stop_idle_tick(cpu, ts)) { | ||
458 | int was_stopped = ts->tick_stopped; | ||
459 | |||
460 | ts->idle_calls++; | ||
461 | |||
462 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); | ||
463 | if (expires.tv64 > 0LL) { | ||
464 | ts->idle_sleeps++; | ||
465 | ts->idle_expires = expires; | ||
466 | } | ||
467 | |||
468 | if (!was_stopped && ts->tick_stopped) | ||
469 | ts->idle_jiffies = ts->last_jiffies; | ||
470 | } | ||
451 | } | 471 | } |
452 | 472 | ||
453 | /** | 473 | /** |
@@ -485,7 +505,7 @@ void tick_nohz_idle_enter(void) | |||
485 | * update of the idle time accounting in tick_nohz_start_idle(). | 505 | * update of the idle time accounting in tick_nohz_start_idle(). |
486 | */ | 506 | */ |
487 | ts->inidle = 1; | 507 | ts->inidle = 1; |
488 | tick_nohz_stop_sched_tick(ts); | 508 | __tick_nohz_idle_enter(ts); |
489 | 509 | ||
490 | local_irq_enable(); | 510 | local_irq_enable(); |
491 | } | 511 | } |
@@ -505,7 +525,7 @@ void tick_nohz_irq_exit(void) | |||
505 | if (!ts->inidle) | 525 | if (!ts->inidle) |
506 | return; | 526 | return; |
507 | 527 | ||
508 | tick_nohz_stop_sched_tick(ts); | 528 | __tick_nohz_idle_enter(ts); |
509 | } | 529 | } |
510 | 530 | ||
511 | /** | 531 | /** |
@@ -523,7 +543,7 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
523 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 543 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
524 | { | 544 | { |
525 | hrtimer_cancel(&ts->sched_timer); | 545 | hrtimer_cancel(&ts->sched_timer); |
526 | hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); | 546 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); |
527 | 547 | ||
528 | while (1) { | 548 | while (1) { |
529 | /* Forward the time to expire in the future */ | 549 | /* Forward the time to expire in the future */ |
@@ -546,6 +566,41 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
546 | } | 566 | } |
547 | } | 567 | } |
548 | 568 | ||
569 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | ||
570 | { | ||
571 | /* Update jiffies first */ | ||
572 | select_nohz_load_balancer(0); | ||
573 | tick_do_update_jiffies64(now); | ||
574 | update_cpu_load_nohz(); | ||
575 | |||
576 | touch_softlockup_watchdog(); | ||
577 | /* | ||
578 | * Cancel the scheduled timer and restore the tick | ||
579 | */ | ||
580 | ts->tick_stopped = 0; | ||
581 | ts->idle_exittime = now; | ||
582 | |||
583 | tick_nohz_restart(ts, now); | ||
584 | } | ||
585 | |||
586 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | ||
587 | { | ||
588 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
589 | unsigned long ticks; | ||
590 | /* | ||
591 | * We stopped the tick in idle. Update process times would miss the | ||
592 | * time we slept as update_process_times does only a 1 tick | ||
593 | * accounting. Enforce that this is accounted to idle ! | ||
594 | */ | ||
595 | ticks = jiffies - ts->idle_jiffies; | ||
596 | /* | ||
597 | * We might be one off. Do not randomly account a huge number of ticks! | ||
598 | */ | ||
599 | if (ticks && ticks < LONG_MAX) | ||
600 | account_idle_ticks(ticks); | ||
601 | #endif | ||
602 | } | ||
603 | |||
549 | /** | 604 | /** |
550 | * tick_nohz_idle_exit - restart the idle tick from the idle task | 605 | * tick_nohz_idle_exit - restart the idle tick from the idle task |
551 | * | 606 | * |
@@ -557,9 +612,6 @@ void tick_nohz_idle_exit(void) | |||
557 | { | 612 | { |
558 | int cpu = smp_processor_id(); | 613 | int cpu = smp_processor_id(); |
559 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 614 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
560 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
561 | unsigned long ticks; | ||
562 | #endif | ||
563 | ktime_t now; | 615 | ktime_t now; |
564 | 616 | ||
565 | local_irq_disable(); | 617 | local_irq_disable(); |
@@ -574,40 +626,11 @@ void tick_nohz_idle_exit(void) | |||
574 | if (ts->idle_active) | 626 | if (ts->idle_active) |
575 | tick_nohz_stop_idle(cpu, now); | 627 | tick_nohz_stop_idle(cpu, now); |
576 | 628 | ||
577 | if (!ts->tick_stopped) { | 629 | if (ts->tick_stopped) { |
578 | local_irq_enable(); | 630 | tick_nohz_restart_sched_tick(ts, now); |
579 | return; | 631 | tick_nohz_account_idle_ticks(ts); |
580 | } | 632 | } |
581 | 633 | ||
582 | /* Update jiffies first */ | ||
583 | select_nohz_load_balancer(0); | ||
584 | tick_do_update_jiffies64(now); | ||
585 | update_cpu_load_nohz(); | ||
586 | |||
587 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
588 | /* | ||
589 | * We stopped the tick in idle. Update process times would miss the | ||
590 | * time we slept as update_process_times does only a 1 tick | ||
591 | * accounting. Enforce that this is accounted to idle ! | ||
592 | */ | ||
593 | ticks = jiffies - ts->idle_jiffies; | ||
594 | /* | ||
595 | * We might be one off. Do not randomly account a huge number of ticks! | ||
596 | */ | ||
597 | if (ticks && ticks < LONG_MAX) | ||
598 | account_idle_ticks(ticks); | ||
599 | #endif | ||
600 | |||
601 | calc_load_exit_idle(); | ||
602 | touch_softlockup_watchdog(); | ||
603 | /* | ||
604 | * Cancel the scheduled timer and restore the tick | ||
605 | */ | ||
606 | ts->tick_stopped = 0; | ||
607 | ts->idle_exittime = now; | ||
608 | |||
609 | tick_nohz_restart(ts, now); | ||
610 | |||
611 | local_irq_enable(); | 634 | local_irq_enable(); |
612 | } | 635 | } |
613 | 636 | ||
@@ -811,7 +834,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
811 | */ | 834 | */ |
812 | if (ts->tick_stopped) { | 835 | if (ts->tick_stopped) { |
813 | touch_softlockup_watchdog(); | 836 | touch_softlockup_watchdog(); |
814 | ts->idle_jiffies++; | 837 | if (idle_cpu(cpu)) |
838 | ts->idle_jiffies++; | ||
815 | } | 839 | } |
816 | update_process_times(user_mode(regs)); | 840 | update_process_times(user_mode(regs)); |
817 | profile_tick(CPU_PROFILING); | 841 | profile_tick(CPU_PROFILING); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3447cfaf11e7..f045cc50832d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -24,32 +24,32 @@ | |||
24 | /* Structure holding internal timekeeping values. */ | 24 | /* Structure holding internal timekeeping values. */ |
25 | struct timekeeper { | 25 | struct timekeeper { |
26 | /* Current clocksource used for timekeeping. */ | 26 | /* Current clocksource used for timekeeping. */ |
27 | struct clocksource *clock; | 27 | struct clocksource *clock; |
28 | /* NTP adjusted clock multiplier */ | 28 | /* NTP adjusted clock multiplier */ |
29 | u32 mult; | 29 | u32 mult; |
30 | /* The shift value of the current clocksource. */ | 30 | /* The shift value of the current clocksource. */ |
31 | int shift; | 31 | u32 shift; |
32 | |||
33 | /* Number of clock cycles in one NTP interval. */ | 32 | /* Number of clock cycles in one NTP interval. */ |
34 | cycle_t cycle_interval; | 33 | cycle_t cycle_interval; |
35 | /* Number of clock shifted nano seconds in one NTP interval. */ | 34 | /* Number of clock shifted nano seconds in one NTP interval. */ |
36 | u64 xtime_interval; | 35 | u64 xtime_interval; |
37 | /* shifted nano seconds left over when rounding cycle_interval */ | 36 | /* shifted nano seconds left over when rounding cycle_interval */ |
38 | s64 xtime_remainder; | 37 | s64 xtime_remainder; |
39 | /* Raw nano seconds accumulated per NTP interval. */ | 38 | /* Raw nano seconds accumulated per NTP interval. */ |
40 | u32 raw_interval; | 39 | u32 raw_interval; |
40 | |||
41 | /* Current CLOCK_REALTIME time in seconds */ | ||
42 | u64 xtime_sec; | ||
43 | /* Clock shifted nano seconds */ | ||
44 | u64 xtime_nsec; | ||
41 | 45 | ||
42 | /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ | ||
43 | u64 xtime_nsec; | ||
44 | /* Difference between accumulated time and NTP time in ntp | 46 | /* Difference between accumulated time and NTP time in ntp |
45 | * shifted nano seconds. */ | 47 | * shifted nano seconds. */ |
46 | s64 ntp_error; | 48 | s64 ntp_error; |
47 | /* Shift conversion between clock shifted nano seconds and | 49 | /* Shift conversion between clock shifted nano seconds and |
48 | * ntp shifted nano seconds. */ | 50 | * ntp shifted nano seconds. */ |
49 | int ntp_error_shift; | 51 | u32 ntp_error_shift; |
50 | 52 | ||
51 | /* The current time */ | ||
52 | struct timespec xtime; | ||
53 | /* | 53 | /* |
54 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected | 54 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected |
55 | * for sub jiffie times) to get to monotonic time. Monotonic is pegged | 55 | * for sub jiffie times) to get to monotonic time. Monotonic is pegged |
@@ -64,20 +64,17 @@ struct timekeeper { | |||
64 | * - wall_to_monotonic is no longer the boot time, getboottime must be | 64 | * - wall_to_monotonic is no longer the boot time, getboottime must be |
65 | * used instead. | 65 | * used instead. |
66 | */ | 66 | */ |
67 | struct timespec wall_to_monotonic; | 67 | struct timespec wall_to_monotonic; |
68 | /* time spent in suspend */ | 68 | /* time spent in suspend */ |
69 | struct timespec total_sleep_time; | 69 | struct timespec total_sleep_time; |
70 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ | 70 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ |
71 | struct timespec raw_time; | 71 | struct timespec raw_time; |
72 | |||
73 | /* Offset clock monotonic -> clock realtime */ | 72 | /* Offset clock monotonic -> clock realtime */ |
74 | ktime_t offs_real; | 73 | ktime_t offs_real; |
75 | |||
76 | /* Offset clock monotonic -> clock boottime */ | 74 | /* Offset clock monotonic -> clock boottime */ |
77 | ktime_t offs_boot; | 75 | ktime_t offs_boot; |
78 | |||
79 | /* Seqlock for all timekeeper values */ | 76 | /* Seqlock for all timekeeper values */ |
80 | seqlock_t lock; | 77 | seqlock_t lock; |
81 | }; | 78 | }; |
82 | 79 | ||
83 | static struct timekeeper timekeeper; | 80 | static struct timekeeper timekeeper; |
@@ -88,11 +85,37 @@ static struct timekeeper timekeeper; | |||
88 | */ | 85 | */ |
89 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); | 86 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); |
90 | 87 | ||
91 | |||
92 | /* flag for if timekeeping is suspended */ | 88 | /* flag for if timekeeping is suspended */ |
93 | int __read_mostly timekeeping_suspended; | 89 | int __read_mostly timekeeping_suspended; |
94 | 90 | ||
91 | static inline void tk_normalize_xtime(struct timekeeper *tk) | ||
92 | { | ||
93 | while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { | ||
94 | tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift; | ||
95 | tk->xtime_sec++; | ||
96 | } | ||
97 | } | ||
95 | 98 | ||
99 | static struct timespec tk_xtime(struct timekeeper *tk) | ||
100 | { | ||
101 | struct timespec ts; | ||
102 | |||
103 | ts.tv_sec = tk->xtime_sec; | ||
104 | ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
105 | return ts; | ||
106 | } | ||
107 | |||
108 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) | ||
109 | { | ||
110 | tk->xtime_sec = ts->tv_sec; | ||
111 | tk->xtime_nsec = ts->tv_nsec << tk->shift; | ||
112 | } | ||
113 | |||
114 | static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) | ||
115 | { | ||
116 | tk->xtime_sec += ts->tv_sec; | ||
117 | tk->xtime_nsec += ts->tv_nsec << tk->shift; | ||
118 | } | ||
96 | 119 | ||
97 | /** | 120 | /** |
98 | * timekeeper_setup_internals - Set up internals to use clocksource clock. | 121 | * timekeeper_setup_internals - Set up internals to use clocksource clock. |
@@ -104,12 +127,14 @@ int __read_mostly timekeeping_suspended; | |||
104 | * | 127 | * |
105 | * Unless you're the timekeeping code, you should not be using this! | 128 | * Unless you're the timekeeping code, you should not be using this! |
106 | */ | 129 | */ |
107 | static void timekeeper_setup_internals(struct clocksource *clock) | 130 | static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) |
108 | { | 131 | { |
109 | cycle_t interval; | 132 | cycle_t interval; |
110 | u64 tmp, ntpinterval; | 133 | u64 tmp, ntpinterval; |
134 | struct clocksource *old_clock; | ||
111 | 135 | ||
112 | timekeeper.clock = clock; | 136 | old_clock = tk->clock; |
137 | tk->clock = clock; | ||
113 | clock->cycle_last = clock->read(clock); | 138 | clock->cycle_last = clock->read(clock); |
114 | 139 | ||
115 | /* Do the ns -> cycle conversion first, using original mult */ | 140 | /* Do the ns -> cycle conversion first, using original mult */ |
@@ -122,80 +147,96 @@ static void timekeeper_setup_internals(struct clocksource *clock) | |||
122 | tmp = 1; | 147 | tmp = 1; |
123 | 148 | ||
124 | interval = (cycle_t) tmp; | 149 | interval = (cycle_t) tmp; |
125 | timekeeper.cycle_interval = interval; | 150 | tk->cycle_interval = interval; |
126 | 151 | ||
127 | /* Go back from cycles -> shifted ns */ | 152 | /* Go back from cycles -> shifted ns */ |
128 | timekeeper.xtime_interval = (u64) interval * clock->mult; | 153 | tk->xtime_interval = (u64) interval * clock->mult; |
129 | timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; | 154 | tk->xtime_remainder = ntpinterval - tk->xtime_interval; |
130 | timekeeper.raw_interval = | 155 | tk->raw_interval = |
131 | ((u64) interval * clock->mult) >> clock->shift; | 156 | ((u64) interval * clock->mult) >> clock->shift; |
132 | 157 | ||
133 | timekeeper.xtime_nsec = 0; | 158 | /* if changing clocks, convert xtime_nsec shift units */ |
134 | timekeeper.shift = clock->shift; | 159 | if (old_clock) { |
160 | int shift_change = clock->shift - old_clock->shift; | ||
161 | if (shift_change < 0) | ||
162 | tk->xtime_nsec >>= -shift_change; | ||
163 | else | ||
164 | tk->xtime_nsec <<= shift_change; | ||
165 | } | ||
166 | tk->shift = clock->shift; | ||
135 | 167 | ||
136 | timekeeper.ntp_error = 0; | 168 | tk->ntp_error = 0; |
137 | timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; | 169 | tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; |
138 | 170 | ||
139 | /* | 171 | /* |
140 | * The timekeeper keeps its own mult values for the currently | 172 | * The timekeeper keeps its own mult values for the currently |
141 | * active clocksource. These value will be adjusted via NTP | 173 | * active clocksource. These value will be adjusted via NTP |
142 | * to counteract clock drifting. | 174 | * to counteract clock drifting. |
143 | */ | 175 | */ |
144 | timekeeper.mult = clock->mult; | 176 | tk->mult = clock->mult; |
145 | } | 177 | } |
146 | 178 | ||
147 | /* Timekeeper helper functions. */ | 179 | /* Timekeeper helper functions. */ |
148 | static inline s64 timekeeping_get_ns(void) | 180 | static inline s64 timekeeping_get_ns(struct timekeeper *tk) |
149 | { | 181 | { |
150 | cycle_t cycle_now, cycle_delta; | 182 | cycle_t cycle_now, cycle_delta; |
151 | struct clocksource *clock; | 183 | struct clocksource *clock; |
184 | s64 nsec; | ||
152 | 185 | ||
153 | /* read clocksource: */ | 186 | /* read clocksource: */ |
154 | clock = timekeeper.clock; | 187 | clock = tk->clock; |
155 | cycle_now = clock->read(clock); | 188 | cycle_now = clock->read(clock); |
156 | 189 | ||
157 | /* calculate the delta since the last update_wall_time: */ | 190 | /* calculate the delta since the last update_wall_time: */ |
158 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 191 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
159 | 192 | ||
160 | /* return delta convert to nanoseconds using ntp adjusted mult. */ | 193 | nsec = cycle_delta * tk->mult + tk->xtime_nsec; |
161 | return clocksource_cyc2ns(cycle_delta, timekeeper.mult, | 194 | nsec >>= tk->shift; |
162 | timekeeper.shift); | 195 | |
196 | /* If arch requires, add in gettimeoffset() */ | ||
197 | return nsec + arch_gettimeoffset(); | ||
163 | } | 198 | } |
164 | 199 | ||
165 | static inline s64 timekeeping_get_ns_raw(void) | 200 | static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) |
166 | { | 201 | { |
167 | cycle_t cycle_now, cycle_delta; | 202 | cycle_t cycle_now, cycle_delta; |
168 | struct clocksource *clock; | 203 | struct clocksource *clock; |
204 | s64 nsec; | ||
169 | 205 | ||
170 | /* read clocksource: */ | 206 | /* read clocksource: */ |
171 | clock = timekeeper.clock; | 207 | clock = tk->clock; |
172 | cycle_now = clock->read(clock); | 208 | cycle_now = clock->read(clock); |
173 | 209 | ||
174 | /* calculate the delta since the last update_wall_time: */ | 210 | /* calculate the delta since the last update_wall_time: */ |
175 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 211 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
176 | 212 | ||
177 | /* return delta convert to nanoseconds. */ | 213 | /* convert delta to nanoseconds. */ |
178 | return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); | 214 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); |
215 | |||
216 | /* If arch requires, add in gettimeoffset() */ | ||
217 | return nsec + arch_gettimeoffset(); | ||
179 | } | 218 | } |
180 | 219 | ||
181 | static void update_rt_offset(void) | 220 | static void update_rt_offset(struct timekeeper *tk) |
182 | { | 221 | { |
183 | struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; | 222 | struct timespec tmp, *wtm = &tk->wall_to_monotonic; |
184 | 223 | ||
185 | set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); | 224 | set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); |
186 | timekeeper.offs_real = timespec_to_ktime(tmp); | 225 | tk->offs_real = timespec_to_ktime(tmp); |
187 | } | 226 | } |
188 | 227 | ||
189 | /* must hold write on timekeeper.lock */ | 228 | /* must hold write on timekeeper.lock */ |
190 | static void timekeeping_update(bool clearntp) | 229 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) |
191 | { | 230 | { |
231 | struct timespec xt; | ||
232 | |||
192 | if (clearntp) { | 233 | if (clearntp) { |
193 | timekeeper.ntp_error = 0; | 234 | tk->ntp_error = 0; |
194 | ntp_clear(); | 235 | ntp_clear(); |
195 | } | 236 | } |
196 | update_rt_offset(); | 237 | update_rt_offset(tk); |
197 | update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, | 238 | xt = tk_xtime(tk); |
198 | timekeeper.clock, timekeeper.mult); | 239 | update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); |
199 | } | 240 | } |
200 | 241 | ||
201 | 242 | ||
@@ -206,27 +247,26 @@ static void timekeeping_update(bool clearntp) | |||
206 | * update_wall_time(). This is useful before significant clock changes, | 247 | * update_wall_time(). This is useful before significant clock changes, |
207 | * as it avoids having to deal with this time offset explicitly. | 248 | * as it avoids having to deal with this time offset explicitly. |
208 | */ | 249 | */ |
209 | static void timekeeping_forward_now(void) | 250 | static void timekeeping_forward_now(struct timekeeper *tk) |
210 | { | 251 | { |
211 | cycle_t cycle_now, cycle_delta; | 252 | cycle_t cycle_now, cycle_delta; |
212 | struct clocksource *clock; | 253 | struct clocksource *clock; |
213 | s64 nsec; | 254 | s64 nsec; |
214 | 255 | ||
215 | clock = timekeeper.clock; | 256 | clock = tk->clock; |
216 | cycle_now = clock->read(clock); | 257 | cycle_now = clock->read(clock); |
217 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | 258 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; |
218 | clock->cycle_last = cycle_now; | 259 | clock->cycle_last = cycle_now; |
219 | 260 | ||
220 | nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, | 261 | tk->xtime_nsec += cycle_delta * tk->mult; |
221 | timekeeper.shift); | ||
222 | 262 | ||
223 | /* If arch requires, add in gettimeoffset() */ | 263 | /* If arch requires, add in gettimeoffset() */ |
224 | nsec += arch_gettimeoffset(); | 264 | tk->xtime_nsec += arch_gettimeoffset() << tk->shift; |
225 | 265 | ||
226 | timespec_add_ns(&timekeeper.xtime, nsec); | 266 | tk_normalize_xtime(tk); |
227 | 267 | ||
228 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); | 268 | nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); |
229 | timespec_add_ns(&timekeeper.raw_time, nsec); | 269 | timespec_add_ns(&tk->raw_time, nsec); |
230 | } | 270 | } |
231 | 271 | ||
232 | /** | 272 | /** |
@@ -238,18 +278,15 @@ static void timekeeping_forward_now(void) | |||
238 | void getnstimeofday(struct timespec *ts) | 278 | void getnstimeofday(struct timespec *ts) |
239 | { | 279 | { |
240 | unsigned long seq; | 280 | unsigned long seq; |
241 | s64 nsecs; | 281 | s64 nsecs = 0; |
242 | 282 | ||
243 | WARN_ON(timekeeping_suspended); | 283 | WARN_ON(timekeeping_suspended); |
244 | 284 | ||
245 | do { | 285 | do { |
246 | seq = read_seqbegin(&timekeeper.lock); | 286 | seq = read_seqbegin(&timekeeper.lock); |
247 | 287 | ||
248 | *ts = timekeeper.xtime; | 288 | ts->tv_sec = timekeeper.xtime_sec; |
249 | nsecs = timekeeping_get_ns(); | 289 | ts->tv_nsec = timekeeping_get_ns(&timekeeper); |
250 | |||
251 | /* If arch requires, add in gettimeoffset() */ | ||
252 | nsecs += arch_gettimeoffset(); | ||
253 | 290 | ||
254 | } while (read_seqretry(&timekeeper.lock, seq)); | 291 | } while (read_seqretry(&timekeeper.lock, seq)); |
255 | 292 | ||
@@ -266,13 +303,10 @@ ktime_t ktime_get(void) | |||
266 | 303 | ||
267 | do { | 304 | do { |
268 | seq = read_seqbegin(&timekeeper.lock); | 305 | seq = read_seqbegin(&timekeeper.lock); |
269 | secs = timekeeper.xtime.tv_sec + | 306 | secs = timekeeper.xtime_sec + |
270 | timekeeper.wall_to_monotonic.tv_sec; | 307 | timekeeper.wall_to_monotonic.tv_sec; |
271 | nsecs = timekeeper.xtime.tv_nsec + | 308 | nsecs = timekeeping_get_ns(&timekeeper) + |
272 | timekeeper.wall_to_monotonic.tv_nsec; | 309 | timekeeper.wall_to_monotonic.tv_nsec; |
273 | nsecs += timekeeping_get_ns(); | ||
274 | /* If arch requires, add in gettimeoffset() */ | ||
275 | nsecs += arch_gettimeoffset(); | ||
276 | 310 | ||
277 | } while (read_seqretry(&timekeeper.lock, seq)); | 311 | } while (read_seqretry(&timekeeper.lock, seq)); |
278 | /* | 312 | /* |
@@ -295,22 +329,19 @@ void ktime_get_ts(struct timespec *ts) | |||
295 | { | 329 | { |
296 | struct timespec tomono; | 330 | struct timespec tomono; |
297 | unsigned int seq; | 331 | unsigned int seq; |
298 | s64 nsecs; | ||
299 | 332 | ||
300 | WARN_ON(timekeeping_suspended); | 333 | WARN_ON(timekeeping_suspended); |
301 | 334 | ||
302 | do { | 335 | do { |
303 | seq = read_seqbegin(&timekeeper.lock); | 336 | seq = read_seqbegin(&timekeeper.lock); |
304 | *ts = timekeeper.xtime; | 337 | ts->tv_sec = timekeeper.xtime_sec; |
338 | ts->tv_nsec = timekeeping_get_ns(&timekeeper); | ||
305 | tomono = timekeeper.wall_to_monotonic; | 339 | tomono = timekeeper.wall_to_monotonic; |
306 | nsecs = timekeeping_get_ns(); | ||
307 | /* If arch requires, add in gettimeoffset() */ | ||
308 | nsecs += arch_gettimeoffset(); | ||
309 | 340 | ||
310 | } while (read_seqretry(&timekeeper.lock, seq)); | 341 | } while (read_seqretry(&timekeeper.lock, seq)); |
311 | 342 | ||
312 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, | 343 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, |
313 | ts->tv_nsec + tomono.tv_nsec + nsecs); | 344 | ts->tv_nsec + tomono.tv_nsec); |
314 | } | 345 | } |
315 | EXPORT_SYMBOL_GPL(ktime_get_ts); | 346 | EXPORT_SYMBOL_GPL(ktime_get_ts); |
316 | 347 | ||
@@ -333,20 +364,14 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) | |||
333 | WARN_ON_ONCE(timekeeping_suspended); | 364 | WARN_ON_ONCE(timekeeping_suspended); |
334 | 365 | ||
335 | do { | 366 | do { |
336 | u32 arch_offset; | ||
337 | |||
338 | seq = read_seqbegin(&timekeeper.lock); | 367 | seq = read_seqbegin(&timekeeper.lock); |
339 | 368 | ||
340 | *ts_raw = timekeeper.raw_time; | 369 | *ts_raw = timekeeper.raw_time; |
341 | *ts_real = timekeeper.xtime; | 370 | ts_real->tv_sec = timekeeper.xtime_sec; |
342 | 371 | ts_real->tv_nsec = 0; | |
343 | nsecs_raw = timekeeping_get_ns_raw(); | ||
344 | nsecs_real = timekeeping_get_ns(); | ||
345 | 372 | ||
346 | /* If arch requires, add in gettimeoffset() */ | 373 | nsecs_raw = timekeeping_get_ns_raw(&timekeeper); |
347 | arch_offset = arch_gettimeoffset(); | 374 | nsecs_real = timekeeping_get_ns(&timekeeper); |
348 | nsecs_raw += arch_offset; | ||
349 | nsecs_real += arch_offset; | ||
350 | 375 | ||
351 | } while (read_seqretry(&timekeeper.lock, seq)); | 376 | } while (read_seqretry(&timekeeper.lock, seq)); |
352 | 377 | ||
@@ -381,7 +406,7 @@ EXPORT_SYMBOL(do_gettimeofday); | |||
381 | */ | 406 | */ |
382 | int do_settimeofday(const struct timespec *tv) | 407 | int do_settimeofday(const struct timespec *tv) |
383 | { | 408 | { |
384 | struct timespec ts_delta; | 409 | struct timespec ts_delta, xt; |
385 | unsigned long flags; | 410 | unsigned long flags; |
386 | 411 | ||
387 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | 412 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) |
@@ -389,15 +414,18 @@ int do_settimeofday(const struct timespec *tv) | |||
389 | 414 | ||
390 | write_seqlock_irqsave(&timekeeper.lock, flags); | 415 | write_seqlock_irqsave(&timekeeper.lock, flags); |
391 | 416 | ||
392 | timekeeping_forward_now(); | 417 | timekeeping_forward_now(&timekeeper); |
418 | |||
419 | xt = tk_xtime(&timekeeper); | ||
420 | ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; | ||
421 | ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; | ||
393 | 422 | ||
394 | ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec; | ||
395 | ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec; | ||
396 | timekeeper.wall_to_monotonic = | 423 | timekeeper.wall_to_monotonic = |
397 | timespec_sub(timekeeper.wall_to_monotonic, ts_delta); | 424 | timespec_sub(timekeeper.wall_to_monotonic, ts_delta); |
398 | 425 | ||
399 | timekeeper.xtime = *tv; | 426 | tk_set_xtime(&timekeeper, tv); |
400 | timekeeping_update(true); | 427 | |
428 | timekeeping_update(&timekeeper, true); | ||
401 | 429 | ||
402 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 430 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
403 | 431 | ||
@@ -424,13 +452,14 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
424 | 452 | ||
425 | write_seqlock_irqsave(&timekeeper.lock, flags); | 453 | write_seqlock_irqsave(&timekeeper.lock, flags); |
426 | 454 | ||
427 | timekeeping_forward_now(); | 455 | timekeeping_forward_now(&timekeeper); |
456 | |||
428 | 457 | ||
429 | timekeeper.xtime = timespec_add(timekeeper.xtime, *ts); | 458 | tk_xtime_add(&timekeeper, ts); |
430 | timekeeper.wall_to_monotonic = | 459 | timekeeper.wall_to_monotonic = |
431 | timespec_sub(timekeeper.wall_to_monotonic, *ts); | 460 | timespec_sub(timekeeper.wall_to_monotonic, *ts); |
432 | 461 | ||
433 | timekeeping_update(true); | 462 | timekeeping_update(&timekeeper, true); |
434 | 463 | ||
435 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 464 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
436 | 465 | ||
@@ -455,14 +484,14 @@ static int change_clocksource(void *data) | |||
455 | 484 | ||
456 | write_seqlock_irqsave(&timekeeper.lock, flags); | 485 | write_seqlock_irqsave(&timekeeper.lock, flags); |
457 | 486 | ||
458 | timekeeping_forward_now(); | 487 | timekeeping_forward_now(&timekeeper); |
459 | if (!new->enable || new->enable(new) == 0) { | 488 | if (!new->enable || new->enable(new) == 0) { |
460 | old = timekeeper.clock; | 489 | old = timekeeper.clock; |
461 | timekeeper_setup_internals(new); | 490 | tk_setup_internals(&timekeeper, new); |
462 | if (old->disable) | 491 | if (old->disable) |
463 | old->disable(old); | 492 | old->disable(old); |
464 | } | 493 | } |
465 | timekeeping_update(true); | 494 | timekeeping_update(&timekeeper, true); |
466 | 495 | ||
467 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 496 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
468 | 497 | ||
@@ -512,7 +541,7 @@ void getrawmonotonic(struct timespec *ts) | |||
512 | 541 | ||
513 | do { | 542 | do { |
514 | seq = read_seqbegin(&timekeeper.lock); | 543 | seq = read_seqbegin(&timekeeper.lock); |
515 | nsecs = timekeeping_get_ns_raw(); | 544 | nsecs = timekeeping_get_ns_raw(&timekeeper); |
516 | *ts = timekeeper.raw_time; | 545 | *ts = timekeeper.raw_time; |
517 | 546 | ||
518 | } while (read_seqretry(&timekeeper.lock, seq)); | 547 | } while (read_seqretry(&timekeeper.lock, seq)); |
@@ -547,6 +576,7 @@ u64 timekeeping_max_deferment(void) | |||
547 | { | 576 | { |
548 | unsigned long seq; | 577 | unsigned long seq; |
549 | u64 ret; | 578 | u64 ret; |
579 | |||
550 | do { | 580 | do { |
551 | seq = read_seqbegin(&timekeeper.lock); | 581 | seq = read_seqbegin(&timekeeper.lock); |
552 | 582 | ||
@@ -607,19 +637,17 @@ void __init timekeeping_init(void) | |||
607 | clock = clocksource_default_clock(); | 637 | clock = clocksource_default_clock(); |
608 | if (clock->enable) | 638 | if (clock->enable) |
609 | clock->enable(clock); | 639 | clock->enable(clock); |
610 | timekeeper_setup_internals(clock); | 640 | tk_setup_internals(&timekeeper, clock); |
611 | 641 | ||
612 | timekeeper.xtime.tv_sec = now.tv_sec; | 642 | tk_set_xtime(&timekeeper, &now); |
613 | timekeeper.xtime.tv_nsec = now.tv_nsec; | ||
614 | timekeeper.raw_time.tv_sec = 0; | 643 | timekeeper.raw_time.tv_sec = 0; |
615 | timekeeper.raw_time.tv_nsec = 0; | 644 | timekeeper.raw_time.tv_nsec = 0; |
616 | if (boot.tv_sec == 0 && boot.tv_nsec == 0) { | 645 | if (boot.tv_sec == 0 && boot.tv_nsec == 0) |
617 | boot.tv_sec = timekeeper.xtime.tv_sec; | 646 | boot = tk_xtime(&timekeeper); |
618 | boot.tv_nsec = timekeeper.xtime.tv_nsec; | 647 | |
619 | } | ||
620 | set_normalized_timespec(&timekeeper.wall_to_monotonic, | 648 | set_normalized_timespec(&timekeeper.wall_to_monotonic, |
621 | -boot.tv_sec, -boot.tv_nsec); | 649 | -boot.tv_sec, -boot.tv_nsec); |
622 | update_rt_offset(); | 650 | update_rt_offset(&timekeeper); |
623 | timekeeper.total_sleep_time.tv_sec = 0; | 651 | timekeeper.total_sleep_time.tv_sec = 0; |
624 | timekeeper.total_sleep_time.tv_nsec = 0; | 652 | timekeeper.total_sleep_time.tv_nsec = 0; |
625 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 653 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
@@ -641,7 +669,8 @@ static void update_sleep_time(struct timespec t) | |||
641 | * Takes a timespec offset measuring a suspend interval and properly | 669 | * Takes a timespec offset measuring a suspend interval and properly |
642 | * adds the sleep offset to the timekeeping variables. | 670 | * adds the sleep offset to the timekeeping variables. |
643 | */ | 671 | */ |
644 | static void __timekeeping_inject_sleeptime(struct timespec *delta) | 672 | static void __timekeeping_inject_sleeptime(struct timekeeper *tk, |
673 | struct timespec *delta) | ||
645 | { | 674 | { |
646 | if (!timespec_valid(delta)) { | 675 | if (!timespec_valid(delta)) { |
647 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " | 676 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " |
@@ -649,10 +678,9 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) | |||
649 | return; | 678 | return; |
650 | } | 679 | } |
651 | 680 | ||
652 | timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); | 681 | tk_xtime_add(tk, delta); |
653 | timekeeper.wall_to_monotonic = | 682 | tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); |
654 | timespec_sub(timekeeper.wall_to_monotonic, *delta); | 683 | update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); |
655 | update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); | ||
656 | } | 684 | } |
657 | 685 | ||
658 | 686 | ||
@@ -678,11 +706,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta) | |||
678 | 706 | ||
679 | write_seqlock_irqsave(&timekeeper.lock, flags); | 707 | write_seqlock_irqsave(&timekeeper.lock, flags); |
680 | 708 | ||
681 | timekeeping_forward_now(); | 709 | timekeeping_forward_now(&timekeeper); |
682 | 710 | ||
683 | __timekeeping_inject_sleeptime(delta); | 711 | __timekeeping_inject_sleeptime(&timekeeper, delta); |
684 | 712 | ||
685 | timekeeping_update(true); | 713 | timekeeping_update(&timekeeper, true); |
686 | 714 | ||
687 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 715 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
688 | 716 | ||
@@ -711,13 +739,13 @@ static void timekeeping_resume(void) | |||
711 | 739 | ||
712 | if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { | 740 | if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { |
713 | ts = timespec_sub(ts, timekeeping_suspend_time); | 741 | ts = timespec_sub(ts, timekeeping_suspend_time); |
714 | __timekeeping_inject_sleeptime(&ts); | 742 | __timekeeping_inject_sleeptime(&timekeeper, &ts); |
715 | } | 743 | } |
716 | /* re-base the last cycle value */ | 744 | /* re-base the last cycle value */ |
717 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 745 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
718 | timekeeper.ntp_error = 0; | 746 | timekeeper.ntp_error = 0; |
719 | timekeeping_suspended = 0; | 747 | timekeeping_suspended = 0; |
720 | timekeeping_update(false); | 748 | timekeeping_update(&timekeeper, false); |
721 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 749 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
722 | 750 | ||
723 | touch_softlockup_watchdog(); | 751 | touch_softlockup_watchdog(); |
@@ -737,7 +765,7 @@ static int timekeeping_suspend(void) | |||
737 | read_persistent_clock(&timekeeping_suspend_time); | 765 | read_persistent_clock(&timekeeping_suspend_time); |
738 | 766 | ||
739 | write_seqlock_irqsave(&timekeeper.lock, flags); | 767 | write_seqlock_irqsave(&timekeeper.lock, flags); |
740 | timekeeping_forward_now(); | 768 | timekeeping_forward_now(&timekeeper); |
741 | timekeeping_suspended = 1; | 769 | timekeeping_suspended = 1; |
742 | 770 | ||
743 | /* | 771 | /* |
@@ -746,7 +774,7 @@ static int timekeeping_suspend(void) | |||
746 | * try to compensate so the difference in system time | 774 | * try to compensate so the difference in system time |
747 | * and persistent_clock time stays close to constant. | 775 | * and persistent_clock time stays close to constant. |
748 | */ | 776 | */ |
749 | delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time); | 777 | delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); |
750 | delta_delta = timespec_sub(delta, old_delta); | 778 | delta_delta = timespec_sub(delta, old_delta); |
751 | if (abs(delta_delta.tv_sec) >= 2) { | 779 | if (abs(delta_delta.tv_sec) >= 2) { |
752 | /* | 780 | /* |
@@ -785,7 +813,8 @@ device_initcall(timekeeping_init_ops); | |||
785 | * If the error is already larger, we look ahead even further | 813 | * If the error is already larger, we look ahead even further |
786 | * to compensate for late or lost adjustments. | 814 | * to compensate for late or lost adjustments. |
787 | */ | 815 | */ |
788 | static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, | 816 | static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, |
817 | s64 error, s64 *interval, | ||
789 | s64 *offset) | 818 | s64 *offset) |
790 | { | 819 | { |
791 | s64 tick_error, i; | 820 | s64 tick_error, i; |
@@ -801,7 +830,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, | |||
801 | * here. This is tuned so that an error of about 1 msec is adjusted | 830 | * here. This is tuned so that an error of about 1 msec is adjusted |
802 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | 831 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). |
803 | */ | 832 | */ |
804 | error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); | 833 | error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); |
805 | error2 = abs(error2); | 834 | error2 = abs(error2); |
806 | for (look_ahead = 0; error2 > 0; look_ahead++) | 835 | for (look_ahead = 0; error2 > 0; look_ahead++) |
807 | error2 >>= 2; | 836 | error2 >>= 2; |
@@ -810,8 +839,8 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, | |||
810 | * Now calculate the error in (1 << look_ahead) ticks, but first | 839 | * Now calculate the error in (1 << look_ahead) ticks, but first |
811 | * remove the single look ahead already included in the error. | 840 | * remove the single look ahead already included in the error. |
812 | */ | 841 | */ |
813 | tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1); | 842 | tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); |
814 | tick_error -= timekeeper.xtime_interval >> 1; | 843 | tick_error -= tk->xtime_interval >> 1; |
815 | error = ((error - tick_error) >> look_ahead) + tick_error; | 844 | error = ((error - tick_error) >> look_ahead) + tick_error; |
816 | 845 | ||
817 | /* Finally calculate the adjustment shift value. */ | 846 | /* Finally calculate the adjustment shift value. */ |
@@ -836,9 +865,9 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, | |||
836 | * this is optimized for the most common adjustments of -1,0,1, | 865 | * this is optimized for the most common adjustments of -1,0,1, |
837 | * for other values we can do a bit more work. | 866 | * for other values we can do a bit more work. |
838 | */ | 867 | */ |
839 | static void timekeeping_adjust(s64 offset) | 868 | static void timekeeping_adjust(struct timekeeper *tk, s64 offset) |
840 | { | 869 | { |
841 | s64 error, interval = timekeeper.cycle_interval; | 870 | s64 error, interval = tk->cycle_interval; |
842 | int adj; | 871 | int adj; |
843 | 872 | ||
844 | /* | 873 | /* |
@@ -854,7 +883,7 @@ static void timekeeping_adjust(s64 offset) | |||
854 | * | 883 | * |
855 | * Note: It does not "save" on aggravation when reading the code. | 884 | * Note: It does not "save" on aggravation when reading the code. |
856 | */ | 885 | */ |
857 | error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); | 886 | error = tk->ntp_error >> (tk->ntp_error_shift - 1); |
858 | if (error > interval) { | 887 | if (error > interval) { |
859 | /* | 888 | /* |
860 | * We now divide error by 4(via shift), which checks if | 889 | * We now divide error by 4(via shift), which checks if |
@@ -876,7 +905,8 @@ static void timekeeping_adjust(s64 offset) | |||
876 | if (likely(error <= interval)) | 905 | if (likely(error <= interval)) |
877 | adj = 1; | 906 | adj = 1; |
878 | else | 907 | else |
879 | adj = timekeeping_bigadjust(error, &interval, &offset); | 908 | adj = timekeeping_bigadjust(tk, error, &interval, |
909 | &offset); | ||
880 | } else if (error < -interval) { | 910 | } else if (error < -interval) { |
881 | /* See comment above, this is just switched for the negative */ | 911 | /* See comment above, this is just switched for the negative */ |
882 | error >>= 2; | 912 | error >>= 2; |
@@ -885,18 +915,17 @@ static void timekeeping_adjust(s64 offset) | |||
885 | interval = -interval; | 915 | interval = -interval; |
886 | offset = -offset; | 916 | offset = -offset; |
887 | } else | 917 | } else |
888 | adj = timekeeping_bigadjust(error, &interval, &offset); | 918 | adj = timekeeping_bigadjust(tk, error, &interval, |
889 | } else /* No adjustment needed */ | 919 | &offset); |
920 | } else | ||
890 | return; | 921 | return; |
891 | 922 | ||
892 | if (unlikely(timekeeper.clock->maxadj && | 923 | if (unlikely(tk->clock->maxadj && |
893 | (timekeeper.mult + adj > | 924 | (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { |
894 | timekeeper.clock->mult + timekeeper.clock->maxadj))) { | ||
895 | printk_once(KERN_WARNING | 925 | printk_once(KERN_WARNING |
896 | "Adjusting %s more than 11%% (%ld vs %ld)\n", | 926 | "Adjusting %s more than 11%% (%ld vs %ld)\n", |
897 | timekeeper.clock->name, (long)timekeeper.mult + adj, | 927 | tk->clock->name, (long)tk->mult + adj, |
898 | (long)timekeeper.clock->mult + | 928 | (long)tk->clock->mult + tk->clock->maxadj); |
899 | timekeeper.clock->maxadj); | ||
900 | } | 929 | } |
901 | /* | 930 | /* |
902 | * So the following can be confusing. | 931 | * So the following can be confusing. |
@@ -947,11 +976,60 @@ static void timekeeping_adjust(s64 offset) | |||
947 | * | 976 | * |
948 | * XXX - TODO: Doc ntp_error calculation. | 977 | * XXX - TODO: Doc ntp_error calculation. |
949 | */ | 978 | */ |
950 | timekeeper.mult += adj; | 979 | tk->mult += adj; |
951 | timekeeper.xtime_interval += interval; | 980 | tk->xtime_interval += interval; |
952 | timekeeper.xtime_nsec -= offset; | 981 | tk->xtime_nsec -= offset; |
953 | timekeeper.ntp_error -= (interval - offset) << | 982 | tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; |
954 | timekeeper.ntp_error_shift; | 983 | |
984 | /* | ||
985 | * It may be possible that when we entered this function, xtime_nsec | ||
986 | * was very small. Further, if we're slightly speeding the clocksource | ||
987 | * in the code above, its possible the required corrective factor to | ||
988 | * xtime_nsec could cause it to underflow. | ||
989 | * | ||
990 | * Now, since we already accumulated the second, cannot simply roll | ||
991 | * the accumulated second back, since the NTP subsystem has been | ||
992 | * notified via second_overflow. So instead we push xtime_nsec forward | ||
993 | * by the amount we underflowed, and add that amount into the error. | ||
994 | * | ||
995 | * We'll correct this error next time through this function, when | ||
996 | * xtime_nsec is not as small. | ||
997 | */ | ||
998 | if (unlikely((s64)tk->xtime_nsec < 0)) { | ||
999 | s64 neg = -(s64)tk->xtime_nsec; | ||
1000 | tk->xtime_nsec = 0; | ||
1001 | tk->ntp_error += neg << tk->ntp_error_shift; | ||
1002 | } | ||
1003 | |||
1004 | } | ||
1005 | |||
1006 | |||
1007 | /** | ||
1008 | * accumulate_nsecs_to_secs - Accumulates nsecs into secs | ||
1009 | * | ||
1010 | * Helper function that accumulates a the nsecs greater then a second | ||
1011 | * from the xtime_nsec field to the xtime_secs field. | ||
1012 | * It also calls into the NTP code to handle leapsecond processing. | ||
1013 | * | ||
1014 | */ | ||
1015 | static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) | ||
1016 | { | ||
1017 | u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; | ||
1018 | |||
1019 | while (tk->xtime_nsec >= nsecps) { | ||
1020 | int leap; | ||
1021 | |||
1022 | tk->xtime_nsec -= nsecps; | ||
1023 | tk->xtime_sec++; | ||
1024 | |||
1025 | /* Figure out if its a leap sec and apply if needed */ | ||
1026 | leap = second_overflow(tk->xtime_sec); | ||
1027 | tk->xtime_sec += leap; | ||
1028 | tk->wall_to_monotonic.tv_sec -= leap; | ||
1029 | if (leap) | ||
1030 | clock_was_set_delayed(); | ||
1031 | |||
1032 | } | ||
955 | } | 1033 | } |
956 | 1034 | ||
957 | 1035 | ||
@@ -964,46 +1042,36 @@ static void timekeeping_adjust(s64 offset) | |||
964 | * | 1042 | * |
965 | * Returns the unconsumed cycles. | 1043 | * Returns the unconsumed cycles. |
966 | */ | 1044 | */ |
967 | static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | 1045 | static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, |
1046 | u32 shift) | ||
968 | { | 1047 | { |
969 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | ||
970 | u64 raw_nsecs; | 1048 | u64 raw_nsecs; |
971 | 1049 | ||
972 | /* If the offset is smaller than a shifted interval, do nothing */ | 1050 | /* If the offset is smaller then a shifted interval, do nothing */ |
973 | if (offset < timekeeper.cycle_interval<<shift) | 1051 | if (offset < tk->cycle_interval<<shift) |
974 | return offset; | 1052 | return offset; |
975 | 1053 | ||
976 | /* Accumulate one shifted interval */ | 1054 | /* Accumulate one shifted interval */ |
977 | offset -= timekeeper.cycle_interval << shift; | 1055 | offset -= tk->cycle_interval << shift; |
978 | timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; | 1056 | tk->clock->cycle_last += tk->cycle_interval << shift; |
979 | 1057 | ||
980 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; | 1058 | tk->xtime_nsec += tk->xtime_interval << shift; |
981 | while (timekeeper.xtime_nsec >= nsecps) { | 1059 | accumulate_nsecs_to_secs(tk); |
982 | int leap; | ||
983 | timekeeper.xtime_nsec -= nsecps; | ||
984 | timekeeper.xtime.tv_sec++; | ||
985 | leap = second_overflow(timekeeper.xtime.tv_sec); | ||
986 | timekeeper.xtime.tv_sec += leap; | ||
987 | timekeeper.wall_to_monotonic.tv_sec -= leap; | ||
988 | if (leap) | ||
989 | clock_was_set_delayed(); | ||
990 | } | ||
991 | 1060 | ||
992 | /* Accumulate raw time */ | 1061 | /* Accumulate raw time */ |
993 | raw_nsecs = timekeeper.raw_interval << shift; | 1062 | raw_nsecs = tk->raw_interval << shift; |
994 | raw_nsecs += timekeeper.raw_time.tv_nsec; | 1063 | raw_nsecs += tk->raw_time.tv_nsec; |
995 | if (raw_nsecs >= NSEC_PER_SEC) { | 1064 | if (raw_nsecs >= NSEC_PER_SEC) { |
996 | u64 raw_secs = raw_nsecs; | 1065 | u64 raw_secs = raw_nsecs; |
997 | raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); | 1066 | raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); |
998 | timekeeper.raw_time.tv_sec += raw_secs; | 1067 | tk->raw_time.tv_sec += raw_secs; |
999 | } | 1068 | } |
1000 | timekeeper.raw_time.tv_nsec = raw_nsecs; | 1069 | tk->raw_time.tv_nsec = raw_nsecs; |
1001 | 1070 | ||
1002 | /* Accumulate error between NTP and clock interval */ | 1071 | /* Accumulate error between NTP and clock interval */ |
1003 | timekeeper.ntp_error += ntp_tick_length() << shift; | 1072 | tk->ntp_error += ntp_tick_length() << shift; |
1004 | timekeeper.ntp_error -= | 1073 | tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << |
1005 | (timekeeper.xtime_interval + timekeeper.xtime_remainder) << | 1074 | (tk->ntp_error_shift + shift); |
1006 | (timekeeper.ntp_error_shift + shift); | ||
1007 | 1075 | ||
1008 | return offset; | 1076 | return offset; |
1009 | } | 1077 | } |
@@ -1019,6 +1087,7 @@ static void update_wall_time(void) | |||
1019 | cycle_t offset; | 1087 | cycle_t offset; |
1020 | int shift = 0, maxshift; | 1088 | int shift = 0, maxshift; |
1021 | unsigned long flags; | 1089 | unsigned long flags; |
1090 | s64 remainder; | ||
1022 | 1091 | ||
1023 | write_seqlock_irqsave(&timekeeper.lock, flags); | 1092 | write_seqlock_irqsave(&timekeeper.lock, flags); |
1024 | 1093 | ||
@@ -1033,8 +1102,6 @@ static void update_wall_time(void) | |||
1033 | #else | 1102 | #else |
1034 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; | 1103 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; |
1035 | #endif | 1104 | #endif |
1036 | timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec << | ||
1037 | timekeeper.shift; | ||
1038 | 1105 | ||
1039 | /* | 1106 | /* |
1040 | * With NO_HZ we may have to accumulate many cycle_intervals | 1107 | * With NO_HZ we may have to accumulate many cycle_intervals |
@@ -1050,64 +1117,36 @@ static void update_wall_time(void) | |||
1050 | maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; | 1117 | maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; |
1051 | shift = min(shift, maxshift); | 1118 | shift = min(shift, maxshift); |
1052 | while (offset >= timekeeper.cycle_interval) { | 1119 | while (offset >= timekeeper.cycle_interval) { |
1053 | offset = logarithmic_accumulation(offset, shift); | 1120 | offset = logarithmic_accumulation(&timekeeper, offset, shift); |
1054 | if(offset < timekeeper.cycle_interval<<shift) | 1121 | if(offset < timekeeper.cycle_interval<<shift) |
1055 | shift--; | 1122 | shift--; |
1056 | } | 1123 | } |
1057 | 1124 | ||
1058 | /* correct the clock when NTP error is too big */ | 1125 | /* correct the clock when NTP error is too big */ |
1059 | timekeeping_adjust(offset); | 1126 | timekeeping_adjust(&timekeeper, offset); |
1060 | |||
1061 | /* | ||
1062 | * Since in the loop above, we accumulate any amount of time | ||
1063 | * in xtime_nsec over a second into xtime.tv_sec, its possible for | ||
1064 | * xtime_nsec to be fairly small after the loop. Further, if we're | ||
1065 | * slightly speeding the clocksource up in timekeeping_adjust(), | ||
1066 | * its possible the required corrective factor to xtime_nsec could | ||
1067 | * cause it to underflow. | ||
1068 | * | ||
1069 | * Now, we cannot simply roll the accumulated second back, since | ||
1070 | * the NTP subsystem has been notified via second_overflow. So | ||
1071 | * instead we push xtime_nsec forward by the amount we underflowed, | ||
1072 | * and add that amount into the error. | ||
1073 | * | ||
1074 | * We'll correct this error next time through this function, when | ||
1075 | * xtime_nsec is not as small. | ||
1076 | */ | ||
1077 | if (unlikely((s64)timekeeper.xtime_nsec < 0)) { | ||
1078 | s64 neg = -(s64)timekeeper.xtime_nsec; | ||
1079 | timekeeper.xtime_nsec = 0; | ||
1080 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; | ||
1081 | } | ||
1082 | 1127 | ||
1083 | 1128 | ||
1084 | /* | 1129 | /* |
1085 | * Store full nanoseconds into xtime after rounding it up and | 1130 | * Store only full nanoseconds into xtime_nsec after rounding |
1086 | * add the remainder to the error difference. | 1131 | * it up and add the remainder to the error difference. |
1087 | */ | 1132 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused |
1088 | timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> | 1133 | * by truncating the remainder in vsyscalls. However, it causes |
1089 | timekeeper.shift) + 1; | 1134 | * additional work to be done in timekeeping_adjust(). Once |
1090 | timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec << | 1135 | * the vsyscall implementations are converted to use xtime_nsec |
1091 | timekeeper.shift; | 1136 | * (shifted nanoseconds), this can be killed. |
1092 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 1137 | */ |
1093 | timekeeper.ntp_error_shift; | 1138 | remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1); |
1139 | timekeeper.xtime_nsec -= remainder; | ||
1140 | timekeeper.xtime_nsec += 1 << timekeeper.shift; | ||
1141 | timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift; | ||
1094 | 1142 | ||
1095 | /* | 1143 | /* |
1096 | * Finally, make sure that after the rounding | 1144 | * Finally, make sure that after the rounding |
1097 | * xtime.tv_nsec isn't larger than NSEC_PER_SEC | 1145 | * xtime_nsec isn't larger than NSEC_PER_SEC |
1098 | */ | 1146 | */ |
1099 | if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { | 1147 | accumulate_nsecs_to_secs(&timekeeper); |
1100 | int leap; | ||
1101 | timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; | ||
1102 | timekeeper.xtime.tv_sec++; | ||
1103 | leap = second_overflow(timekeeper.xtime.tv_sec); | ||
1104 | timekeeper.xtime.tv_sec += leap; | ||
1105 | timekeeper.wall_to_monotonic.tv_sec -= leap; | ||
1106 | if (leap) | ||
1107 | clock_was_set_delayed(); | ||
1108 | } | ||
1109 | 1148 | ||
1110 | timekeeping_update(false); | 1149 | timekeeping_update(&timekeeper, false); |
1111 | 1150 | ||
1112 | out: | 1151 | out: |
1113 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 1152 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
@@ -1152,21 +1191,20 @@ void get_monotonic_boottime(struct timespec *ts) | |||
1152 | { | 1191 | { |
1153 | struct timespec tomono, sleep; | 1192 | struct timespec tomono, sleep; |
1154 | unsigned int seq; | 1193 | unsigned int seq; |
1155 | s64 nsecs; | ||
1156 | 1194 | ||
1157 | WARN_ON(timekeeping_suspended); | 1195 | WARN_ON(timekeeping_suspended); |
1158 | 1196 | ||
1159 | do { | 1197 | do { |
1160 | seq = read_seqbegin(&timekeeper.lock); | 1198 | seq = read_seqbegin(&timekeeper.lock); |
1161 | *ts = timekeeper.xtime; | 1199 | ts->tv_sec = timekeeper.xtime_sec; |
1200 | ts->tv_nsec = timekeeping_get_ns(&timekeeper); | ||
1162 | tomono = timekeeper.wall_to_monotonic; | 1201 | tomono = timekeeper.wall_to_monotonic; |
1163 | sleep = timekeeper.total_sleep_time; | 1202 | sleep = timekeeper.total_sleep_time; |
1164 | nsecs = timekeeping_get_ns(); | ||
1165 | 1203 | ||
1166 | } while (read_seqretry(&timekeeper.lock, seq)); | 1204 | } while (read_seqretry(&timekeeper.lock, seq)); |
1167 | 1205 | ||
1168 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, | 1206 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, |
1169 | ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs); | 1207 | ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); |
1170 | } | 1208 | } |
1171 | EXPORT_SYMBOL_GPL(get_monotonic_boottime); | 1209 | EXPORT_SYMBOL_GPL(get_monotonic_boottime); |
1172 | 1210 | ||
@@ -1199,13 +1237,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | |||
1199 | 1237 | ||
1200 | unsigned long get_seconds(void) | 1238 | unsigned long get_seconds(void) |
1201 | { | 1239 | { |
1202 | return timekeeper.xtime.tv_sec; | 1240 | return timekeeper.xtime_sec; |
1203 | } | 1241 | } |
1204 | EXPORT_SYMBOL(get_seconds); | 1242 | EXPORT_SYMBOL(get_seconds); |
1205 | 1243 | ||
1206 | struct timespec __current_kernel_time(void) | 1244 | struct timespec __current_kernel_time(void) |
1207 | { | 1245 | { |
1208 | return timekeeper.xtime; | 1246 | return tk_xtime(&timekeeper); |
1209 | } | 1247 | } |
1210 | 1248 | ||
1211 | struct timespec current_kernel_time(void) | 1249 | struct timespec current_kernel_time(void) |
@@ -1216,7 +1254,7 @@ struct timespec current_kernel_time(void) | |||
1216 | do { | 1254 | do { |
1217 | seq = read_seqbegin(&timekeeper.lock); | 1255 | seq = read_seqbegin(&timekeeper.lock); |
1218 | 1256 | ||
1219 | now = timekeeper.xtime; | 1257 | now = tk_xtime(&timekeeper); |
1220 | } while (read_seqretry(&timekeeper.lock, seq)); | 1258 | } while (read_seqretry(&timekeeper.lock, seq)); |
1221 | 1259 | ||
1222 | return now; | 1260 | return now; |
@@ -1231,7 +1269,7 @@ struct timespec get_monotonic_coarse(void) | |||
1231 | do { | 1269 | do { |
1232 | seq = read_seqbegin(&timekeeper.lock); | 1270 | seq = read_seqbegin(&timekeeper.lock); |
1233 | 1271 | ||
1234 | now = timekeeper.xtime; | 1272 | now = tk_xtime(&timekeeper); |
1235 | mono = timekeeper.wall_to_monotonic; | 1273 | mono = timekeeper.wall_to_monotonic; |
1236 | } while (read_seqretry(&timekeeper.lock, seq)); | 1274 | } while (read_seqretry(&timekeeper.lock, seq)); |
1237 | 1275 | ||
@@ -1266,7 +1304,7 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, | |||
1266 | 1304 | ||
1267 | do { | 1305 | do { |
1268 | seq = read_seqbegin(&timekeeper.lock); | 1306 | seq = read_seqbegin(&timekeeper.lock); |
1269 | *xtim = timekeeper.xtime; | 1307 | *xtim = tk_xtime(&timekeeper); |
1270 | *wtom = timekeeper.wall_to_monotonic; | 1308 | *wtom = timekeeper.wall_to_monotonic; |
1271 | *sleep = timekeeper.total_sleep_time; | 1309 | *sleep = timekeeper.total_sleep_time; |
1272 | } while (read_seqretry(&timekeeper.lock, seq)); | 1310 | } while (read_seqretry(&timekeeper.lock, seq)); |
@@ -1290,11 +1328,8 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) | |||
1290 | do { | 1328 | do { |
1291 | seq = read_seqbegin(&timekeeper.lock); | 1329 | seq = read_seqbegin(&timekeeper.lock); |
1292 | 1330 | ||
1293 | secs = timekeeper.xtime.tv_sec; | 1331 | secs = timekeeper.xtime_sec; |
1294 | nsecs = timekeeper.xtime.tv_nsec; | 1332 | nsecs = timekeeping_get_ns(&timekeeper); |
1295 | nsecs += timekeeping_get_ns(); | ||
1296 | /* If arch requires, add in gettimeoffset() */ | ||
1297 | nsecs += arch_gettimeoffset(); | ||
1298 | 1333 | ||
1299 | *offs_real = timekeeper.offs_real; | 1334 | *offs_real = timekeeper.offs_real; |
1300 | *offs_boot = timekeeper.offs_boot; | 1335 | *offs_boot = timekeeper.offs_boot; |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3258455549f4..af5a7e9f164b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
167 | { | 167 | { |
168 | struct tick_sched *ts = tick_get_tick_sched(cpu); | 168 | struct tick_sched *ts = tick_get_tick_sched(cpu); |
169 | P(nohz_mode); | 169 | P(nohz_mode); |
170 | P_ns(idle_tick); | 170 | P_ns(last_tick); |
171 | P(tick_stopped); | 171 | P(tick_stopped); |
172 | P(idle_jiffies); | 172 | P(idle_jiffies); |
173 | P(idle_calls); | 173 | P(idle_calls); |
@@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v) | |||
259 | u64 now = ktime_to_ns(ktime_get()); | 259 | u64 now = ktime_to_ns(ktime_get()); |
260 | int cpu; | 260 | int cpu; |
261 | 261 | ||
262 | SEQ_printf(m, "Timer List Version: v0.6\n"); | 262 | SEQ_printf(m, "Timer List Version: v0.7\n"); |
263 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | 263 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
264 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | 264 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
265 | 265 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 6ec7e7e0db43..a61c09374eba 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -77,6 +77,7 @@ struct tvec_base { | |||
77 | struct timer_list *running_timer; | 77 | struct timer_list *running_timer; |
78 | unsigned long timer_jiffies; | 78 | unsigned long timer_jiffies; |
79 | unsigned long next_timer; | 79 | unsigned long next_timer; |
80 | unsigned long active_timers; | ||
80 | struct tvec_root tv1; | 81 | struct tvec_root tv1; |
81 | struct tvec tv2; | 82 | struct tvec tv2; |
82 | struct tvec tv3; | 83 | struct tvec tv3; |
@@ -330,7 +331,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) | |||
330 | } | 331 | } |
331 | EXPORT_SYMBOL_GPL(set_timer_slack); | 332 | EXPORT_SYMBOL_GPL(set_timer_slack); |
332 | 333 | ||
333 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | 334 | static void |
335 | __internal_add_timer(struct tvec_base *base, struct timer_list *timer) | ||
334 | { | 336 | { |
335 | unsigned long expires = timer->expires; | 337 | unsigned long expires = timer->expires; |
336 | unsigned long idx = expires - base->timer_jiffies; | 338 | unsigned long idx = expires - base->timer_jiffies; |
@@ -372,6 +374,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | |||
372 | list_add_tail(&timer->entry, vec); | 374 | list_add_tail(&timer->entry, vec); |
373 | } | 375 | } |
374 | 376 | ||
377 | static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) | ||
378 | { | ||
379 | __internal_add_timer(base, timer); | ||
380 | /* | ||
381 | * Update base->active_timers and base->next_timer | ||
382 | */ | ||
383 | if (!tbase_get_deferrable(timer->base)) { | ||
384 | if (time_before(timer->expires, base->next_timer)) | ||
385 | base->next_timer = timer->expires; | ||
386 | base->active_timers++; | ||
387 | } | ||
388 | } | ||
389 | |||
375 | #ifdef CONFIG_TIMER_STATS | 390 | #ifdef CONFIG_TIMER_STATS |
376 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | 391 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) |
377 | { | 392 | { |
@@ -654,8 +669,7 @@ void init_timer_deferrable_key(struct timer_list *timer, | |||
654 | } | 669 | } |
655 | EXPORT_SYMBOL(init_timer_deferrable_key); | 670 | EXPORT_SYMBOL(init_timer_deferrable_key); |
656 | 671 | ||
657 | static inline void detach_timer(struct timer_list *timer, | 672 | static inline void detach_timer(struct timer_list *timer, bool clear_pending) |
658 | int clear_pending) | ||
659 | { | 673 | { |
660 | struct list_head *entry = &timer->entry; | 674 | struct list_head *entry = &timer->entry; |
661 | 675 | ||
@@ -667,6 +681,29 @@ static inline void detach_timer(struct timer_list *timer, | |||
667 | entry->prev = LIST_POISON2; | 681 | entry->prev = LIST_POISON2; |
668 | } | 682 | } |
669 | 683 | ||
684 | static inline void | ||
685 | detach_expired_timer(struct timer_list *timer, struct tvec_base *base) | ||
686 | { | ||
687 | detach_timer(timer, true); | ||
688 | if (!tbase_get_deferrable(timer->base)) | ||
689 | timer->base->active_timers--; | ||
690 | } | ||
691 | |||
692 | static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, | ||
693 | bool clear_pending) | ||
694 | { | ||
695 | if (!timer_pending(timer)) | ||
696 | return 0; | ||
697 | |||
698 | detach_timer(timer, clear_pending); | ||
699 | if (!tbase_get_deferrable(timer->base)) { | ||
700 | timer->base->active_timers--; | ||
701 | if (timer->expires == base->next_timer) | ||
702 | base->next_timer = base->timer_jiffies; | ||
703 | } | ||
704 | return 1; | ||
705 | } | ||
706 | |||
670 | /* | 707 | /* |
671 | * We are using hashed locking: holding per_cpu(tvec_bases).lock | 708 | * We are using hashed locking: holding per_cpu(tvec_bases).lock |
672 | * means that all timers which are tied to this base via timer->base are | 709 | * means that all timers which are tied to this base via timer->base are |
@@ -712,16 +749,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
712 | 749 | ||
713 | base = lock_timer_base(timer, &flags); | 750 | base = lock_timer_base(timer, &flags); |
714 | 751 | ||
715 | if (timer_pending(timer)) { | 752 | ret = detach_if_pending(timer, base, false); |
716 | detach_timer(timer, 0); | 753 | if (!ret && pending_only) |
717 | if (timer->expires == base->next_timer && | 754 | goto out_unlock; |
718 | !tbase_get_deferrable(timer->base)) | ||
719 | base->next_timer = base->timer_jiffies; | ||
720 | ret = 1; | ||
721 | } else { | ||
722 | if (pending_only) | ||
723 | goto out_unlock; | ||
724 | } | ||
725 | 755 | ||
726 | debug_activate(timer, expires); | 756 | debug_activate(timer, expires); |
727 | 757 | ||
@@ -752,9 +782,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
752 | } | 782 | } |
753 | 783 | ||
754 | timer->expires = expires; | 784 | timer->expires = expires; |
755 | if (time_before(timer->expires, base->next_timer) && | ||
756 | !tbase_get_deferrable(timer->base)) | ||
757 | base->next_timer = timer->expires; | ||
758 | internal_add_timer(base, timer); | 785 | internal_add_timer(base, timer); |
759 | 786 | ||
760 | out_unlock: | 787 | out_unlock: |
@@ -920,9 +947,6 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
920 | spin_lock_irqsave(&base->lock, flags); | 947 | spin_lock_irqsave(&base->lock, flags); |
921 | timer_set_base(timer, base); | 948 | timer_set_base(timer, base); |
922 | debug_activate(timer, timer->expires); | 949 | debug_activate(timer, timer->expires); |
923 | if (time_before(timer->expires, base->next_timer) && | ||
924 | !tbase_get_deferrable(timer->base)) | ||
925 | base->next_timer = timer->expires; | ||
926 | internal_add_timer(base, timer); | 950 | internal_add_timer(base, timer); |
927 | /* | 951 | /* |
928 | * Check whether the other CPU is idle and needs to be | 952 | * Check whether the other CPU is idle and needs to be |
@@ -959,13 +983,7 @@ int del_timer(struct timer_list *timer) | |||
959 | timer_stats_timer_clear_start_info(timer); | 983 | timer_stats_timer_clear_start_info(timer); |
960 | if (timer_pending(timer)) { | 984 | if (timer_pending(timer)) { |
961 | base = lock_timer_base(timer, &flags); | 985 | base = lock_timer_base(timer, &flags); |
962 | if (timer_pending(timer)) { | 986 | ret = detach_if_pending(timer, base, true); |
963 | detach_timer(timer, 1); | ||
964 | if (timer->expires == base->next_timer && | ||
965 | !tbase_get_deferrable(timer->base)) | ||
966 | base->next_timer = base->timer_jiffies; | ||
967 | ret = 1; | ||
968 | } | ||
969 | spin_unlock_irqrestore(&base->lock, flags); | 987 | spin_unlock_irqrestore(&base->lock, flags); |
970 | } | 988 | } |
971 | 989 | ||
@@ -990,19 +1008,10 @@ int try_to_del_timer_sync(struct timer_list *timer) | |||
990 | 1008 | ||
991 | base = lock_timer_base(timer, &flags); | 1009 | base = lock_timer_base(timer, &flags); |
992 | 1010 | ||
993 | if (base->running_timer == timer) | 1011 | if (base->running_timer != timer) { |
994 | goto out; | 1012 | timer_stats_timer_clear_start_info(timer); |
995 | 1013 | ret = detach_if_pending(timer, base, true); | |
996 | timer_stats_timer_clear_start_info(timer); | ||
997 | ret = 0; | ||
998 | if (timer_pending(timer)) { | ||
999 | detach_timer(timer, 1); | ||
1000 | if (timer->expires == base->next_timer && | ||
1001 | !tbase_get_deferrable(timer->base)) | ||
1002 | base->next_timer = base->timer_jiffies; | ||
1003 | ret = 1; | ||
1004 | } | 1014 | } |
1005 | out: | ||
1006 | spin_unlock_irqrestore(&base->lock, flags); | 1015 | spin_unlock_irqrestore(&base->lock, flags); |
1007 | 1016 | ||
1008 | return ret; | 1017 | return ret; |
@@ -1089,7 +1098,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
1089 | */ | 1098 | */ |
1090 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { | 1099 | list_for_each_entry_safe(timer, tmp, &tv_list, entry) { |
1091 | BUG_ON(tbase_get_base(timer->base) != base); | 1100 | BUG_ON(tbase_get_base(timer->base) != base); |
1092 | internal_add_timer(base, timer); | 1101 | /* No accounting, while moving them */ |
1102 | __internal_add_timer(base, timer); | ||
1093 | } | 1103 | } |
1094 | 1104 | ||
1095 | return index; | 1105 | return index; |
@@ -1178,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
1178 | timer_stats_account_timer(timer); | 1188 | timer_stats_account_timer(timer); |
1179 | 1189 | ||
1180 | base->running_timer = timer; | 1190 | base->running_timer = timer; |
1181 | detach_timer(timer, 1); | 1191 | detach_expired_timer(timer, base); |
1182 | 1192 | ||
1183 | spin_unlock_irq(&base->lock); | 1193 | spin_unlock_irq(&base->lock); |
1184 | call_timer_fn(timer, fn, data); | 1194 | call_timer_fn(timer, fn, data); |
@@ -1316,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, | |||
1316 | unsigned long get_next_timer_interrupt(unsigned long now) | 1326 | unsigned long get_next_timer_interrupt(unsigned long now) |
1317 | { | 1327 | { |
1318 | struct tvec_base *base = __this_cpu_read(tvec_bases); | 1328 | struct tvec_base *base = __this_cpu_read(tvec_bases); |
1319 | unsigned long expires; | 1329 | unsigned long expires = now + NEXT_TIMER_MAX_DELTA; |
1320 | 1330 | ||
1321 | /* | 1331 | /* |
1322 | * Pretend that there is no timer pending if the cpu is offline. | 1332 | * Pretend that there is no timer pending if the cpu is offline. |
1323 | * Possible pending timers will be migrated later to an active cpu. | 1333 | * Possible pending timers will be migrated later to an active cpu. |
1324 | */ | 1334 | */ |
1325 | if (cpu_is_offline(smp_processor_id())) | 1335 | if (cpu_is_offline(smp_processor_id())) |
1326 | return now + NEXT_TIMER_MAX_DELTA; | 1336 | return expires; |
1337 | |||
1327 | spin_lock(&base->lock); | 1338 | spin_lock(&base->lock); |
1328 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1339 | if (base->active_timers) { |
1329 | base->next_timer = __next_timer_interrupt(base); | 1340 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
1330 | expires = base->next_timer; | 1341 | base->next_timer = __next_timer_interrupt(base); |
1342 | expires = base->next_timer; | ||
1343 | } | ||
1331 | spin_unlock(&base->lock); | 1344 | spin_unlock(&base->lock); |
1332 | 1345 | ||
1333 | if (time_before_eq(expires, now)) | 1346 | if (time_before_eq(expires, now)) |
@@ -1704,6 +1717,7 @@ static int __cpuinit init_timers_cpu(int cpu) | |||
1704 | 1717 | ||
1705 | base->timer_jiffies = jiffies; | 1718 | base->timer_jiffies = jiffies; |
1706 | base->next_timer = base->timer_jiffies; | 1719 | base->next_timer = base->timer_jiffies; |
1720 | base->active_timers = 0; | ||
1707 | return 0; | 1721 | return 0; |
1708 | } | 1722 | } |
1709 | 1723 | ||
@@ -1714,11 +1728,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea | |||
1714 | 1728 | ||
1715 | while (!list_empty(head)) { | 1729 | while (!list_empty(head)) { |
1716 | timer = list_first_entry(head, struct timer_list, entry); | 1730 | timer = list_first_entry(head, struct timer_list, entry); |
1717 | detach_timer(timer, 0); | 1731 | /* We ignore the accounting on the dying cpu */ |
1732 | detach_timer(timer, false); | ||
1718 | timer_set_base(timer, new_base); | 1733 | timer_set_base(timer, new_base); |
1719 | if (time_before(timer->expires, new_base->next_timer) && | ||
1720 | !tbase_get_deferrable(timer->base)) | ||
1721 | new_base->next_timer = timer->expires; | ||
1722 | internal_add_timer(new_base, timer); | 1734 | internal_add_timer(new_base, timer); |
1723 | } | 1735 | } |
1724 | } | 1736 | } |