diff options
Diffstat (limited to 'kernel/timer.c')
-rw-r--r-- | kernel/timer.c | 374 |
1 files changed, 232 insertions, 142 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index c2a8ccfc2882..cb1b86a9c52f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
36 | #include <linux/delay.h> | 36 | #include <linux/delay.h> |
37 | #include <linux/tick.h> | ||
38 | #include <linux/kallsyms.h> | ||
37 | 39 | ||
38 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
39 | #include <asm/unistd.h> | 41 | #include <asm/unistd.h> |
@@ -85,7 +87,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; | |||
85 | * @j: the time in (absolute) jiffies that should be rounded | 87 | * @j: the time in (absolute) jiffies that should be rounded |
86 | * @cpu: the processor number on which the timeout will happen | 88 | * @cpu: the processor number on which the timeout will happen |
87 | * | 89 | * |
88 | * __round_jiffies rounds an absolute time in the future (in jiffies) | 90 | * __round_jiffies() rounds an absolute time in the future (in jiffies) |
89 | * up or down to (approximately) full seconds. This is useful for timers | 91 | * up or down to (approximately) full seconds. This is useful for timers |
90 | * for which the exact time they fire does not matter too much, as long as | 92 | * for which the exact time they fire does not matter too much, as long as |
91 | * they fire approximately every X seconds. | 93 | * they fire approximately every X seconds. |
@@ -98,7 +100,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; | |||
98 | * processors firing at the exact same time, which could lead | 100 | * processors firing at the exact same time, which could lead |
99 | * to lock contention or spurious cache line bouncing. | 101 | * to lock contention or spurious cache line bouncing. |
100 | * | 102 | * |
101 | * The return value is the rounded version of the "j" parameter. | 103 | * The return value is the rounded version of the @j parameter. |
102 | */ | 104 | */ |
103 | unsigned long __round_jiffies(unsigned long j, int cpu) | 105 | unsigned long __round_jiffies(unsigned long j, int cpu) |
104 | { | 106 | { |
@@ -142,7 +144,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies); | |||
142 | * @j: the time in (relative) jiffies that should be rounded | 144 | * @j: the time in (relative) jiffies that should be rounded |
143 | * @cpu: the processor number on which the timeout will happen | 145 | * @cpu: the processor number on which the timeout will happen |
144 | * | 146 | * |
145 | * __round_jiffies_relative rounds a time delta in the future (in jiffies) | 147 | * __round_jiffies_relative() rounds a time delta in the future (in jiffies) |
146 | * up or down to (approximately) full seconds. This is useful for timers | 148 | * up or down to (approximately) full seconds. This is useful for timers |
147 | * for which the exact time they fire does not matter too much, as long as | 149 | * for which the exact time they fire does not matter too much, as long as |
148 | * they fire approximately every X seconds. | 150 | * they fire approximately every X seconds. |
@@ -155,7 +157,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies); | |||
155 | * processors firing at the exact same time, which could lead | 157 | * processors firing at the exact same time, which could lead |
156 | * to lock contention or spurious cache line bouncing. | 158 | * to lock contention or spurious cache line bouncing. |
157 | * | 159 | * |
158 | * The return value is the rounded version of the "j" parameter. | 160 | * The return value is the rounded version of the @j parameter. |
159 | */ | 161 | */ |
160 | unsigned long __round_jiffies_relative(unsigned long j, int cpu) | 162 | unsigned long __round_jiffies_relative(unsigned long j, int cpu) |
161 | { | 163 | { |
@@ -173,7 +175,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); | |||
173 | * round_jiffies - function to round jiffies to a full second | 175 | * round_jiffies - function to round jiffies to a full second |
174 | * @j: the time in (absolute) jiffies that should be rounded | 176 | * @j: the time in (absolute) jiffies that should be rounded |
175 | * | 177 | * |
176 | * round_jiffies rounds an absolute time in the future (in jiffies) | 178 | * round_jiffies() rounds an absolute time in the future (in jiffies) |
177 | * up or down to (approximately) full seconds. This is useful for timers | 179 | * up or down to (approximately) full seconds. This is useful for timers |
178 | * for which the exact time they fire does not matter too much, as long as | 180 | * for which the exact time they fire does not matter too much, as long as |
179 | * they fire approximately every X seconds. | 181 | * they fire approximately every X seconds. |
@@ -182,7 +184,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); | |||
182 | * at the same time, rather than at various times spread out. The goal | 184 | * at the same time, rather than at various times spread out. The goal |
183 | * of this is to have the CPU wake up less, which saves power. | 185 | * of this is to have the CPU wake up less, which saves power. |
184 | * | 186 | * |
185 | * The return value is the rounded version of the "j" parameter. | 187 | * The return value is the rounded version of the @j parameter. |
186 | */ | 188 | */ |
187 | unsigned long round_jiffies(unsigned long j) | 189 | unsigned long round_jiffies(unsigned long j) |
188 | { | 190 | { |
@@ -194,7 +196,7 @@ EXPORT_SYMBOL_GPL(round_jiffies); | |||
194 | * round_jiffies_relative - function to round jiffies to a full second | 196 | * round_jiffies_relative - function to round jiffies to a full second |
195 | * @j: the time in (relative) jiffies that should be rounded | 197 | * @j: the time in (relative) jiffies that should be rounded |
196 | * | 198 | * |
197 | * round_jiffies_relative rounds a time delta in the future (in jiffies) | 199 | * round_jiffies_relative() rounds a time delta in the future (in jiffies) |
198 | * up or down to (approximately) full seconds. This is useful for timers | 200 | * up or down to (approximately) full seconds. This is useful for timers |
199 | * for which the exact time they fire does not matter too much, as long as | 201 | * for which the exact time they fire does not matter too much, as long as |
200 | * they fire approximately every X seconds. | 202 | * they fire approximately every X seconds. |
@@ -203,7 +205,7 @@ EXPORT_SYMBOL_GPL(round_jiffies); | |||
203 | * at the same time, rather than at various times spread out. The goal | 205 | * at the same time, rather than at various times spread out. The goal |
204 | * of this is to have the CPU wake up less, which saves power. | 206 | * of this is to have the CPU wake up less, which saves power. |
205 | * | 207 | * |
206 | * The return value is the rounded version of the "j" parameter. | 208 | * The return value is the rounded version of the @j parameter. |
207 | */ | 209 | */ |
208 | unsigned long round_jiffies_relative(unsigned long j) | 210 | unsigned long round_jiffies_relative(unsigned long j) |
209 | { | 211 | { |
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
262 | list_add_tail(&timer->entry, vec); | 264 | list_add_tail(&timer->entry, vec); |
263 | } | 265 | } |
264 | 266 | ||
267 | #ifdef CONFIG_TIMER_STATS | ||
268 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | ||
269 | { | ||
270 | if (timer->start_site) | ||
271 | return; | ||
272 | |||
273 | timer->start_site = addr; | ||
274 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | ||
275 | timer->start_pid = current->pid; | ||
276 | } | ||
277 | #endif | ||
278 | |||
265 | /** | 279 | /** |
266 | * init_timer - initialize a timer. | 280 | * init_timer - initialize a timer. |
267 | * @timer: the timer to be initialized | 281 | * @timer: the timer to be initialized |
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer) | |||
273 | { | 287 | { |
274 | timer->entry.next = NULL; | 288 | timer->entry.next = NULL; |
275 | timer->base = __raw_get_cpu_var(tvec_bases); | 289 | timer->base = __raw_get_cpu_var(tvec_bases); |
290 | #ifdef CONFIG_TIMER_STATS | ||
291 | timer->start_site = NULL; | ||
292 | timer->start_pid = -1; | ||
293 | memset(timer->start_comm, 0, TASK_COMM_LEN); | ||
294 | #endif | ||
276 | } | 295 | } |
277 | EXPORT_SYMBOL(init_timer); | 296 | EXPORT_SYMBOL(init_timer); |
278 | 297 | ||
279 | static inline void detach_timer(struct timer_list *timer, | 298 | static inline void detach_timer(struct timer_list *timer, |
280 | int clear_pending) | 299 | int clear_pending) |
281 | { | 300 | { |
282 | struct list_head *entry = &timer->entry; | 301 | struct list_head *entry = &timer->entry; |
283 | 302 | ||
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
324 | unsigned long flags; | 343 | unsigned long flags; |
325 | int ret = 0; | 344 | int ret = 0; |
326 | 345 | ||
346 | timer_stats_timer_set_start_info(timer); | ||
327 | BUG_ON(!timer->function); | 347 | BUG_ON(!timer->function); |
328 | 348 | ||
329 | base = lock_timer_base(timer, &flags); | 349 | base = lock_timer_base(timer, &flags); |
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
374 | tvec_base_t *base = per_cpu(tvec_bases, cpu); | 394 | tvec_base_t *base = per_cpu(tvec_bases, cpu); |
375 | unsigned long flags; | 395 | unsigned long flags; |
376 | 396 | ||
397 | timer_stats_timer_set_start_info(timer); | ||
377 | BUG_ON(timer_pending(timer) || !timer->function); | 398 | BUG_ON(timer_pending(timer) || !timer->function); |
378 | spin_lock_irqsave(&base->lock, flags); | 399 | spin_lock_irqsave(&base->lock, flags); |
379 | timer->base = base; | 400 | timer->base = base; |
@@ -387,7 +408,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
387 | * @timer: the timer to be modified | 408 | * @timer: the timer to be modified |
388 | * @expires: new timeout in jiffies | 409 | * @expires: new timeout in jiffies |
389 | * | 410 | * |
390 | * mod_timer is a more efficient way to update the expire field of an | 411 | * mod_timer() is a more efficient way to update the expire field of an |
391 | * active timer (if the timer is inactive it will be activated) | 412 | * active timer (if the timer is inactive it will be activated) |
392 | * | 413 | * |
393 | * mod_timer(timer, expires) is equivalent to: | 414 | * mod_timer(timer, expires) is equivalent to: |
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
406 | { | 427 | { |
407 | BUG_ON(!timer->function); | 428 | BUG_ON(!timer->function); |
408 | 429 | ||
430 | timer_stats_timer_set_start_info(timer); | ||
409 | /* | 431 | /* |
410 | * This is a common optimization triggered by the | 432 | * This is a common optimization triggered by the |
411 | * networking code - if the timer is re-modified | 433 | * networking code - if the timer is re-modified |
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer) | |||
436 | unsigned long flags; | 458 | unsigned long flags; |
437 | int ret = 0; | 459 | int ret = 0; |
438 | 460 | ||
461 | timer_stats_timer_clear_start_info(timer); | ||
439 | if (timer_pending(timer)) { | 462 | if (timer_pending(timer)) { |
440 | base = lock_timer_base(timer, &flags); | 463 | base = lock_timer_base(timer, &flags); |
441 | if (timer_pending(timer)) { | 464 | if (timer_pending(timer)) { |
@@ -490,7 +513,7 @@ out: | |||
490 | * the timer it also makes sure the handler has finished executing on other | 513 | * the timer it also makes sure the handler has finished executing on other |
491 | * CPUs. | 514 | * CPUs. |
492 | * | 515 | * |
493 | * Synchronization rules: callers must prevent restarting of the timer, | 516 | * Synchronization rules: Callers must prevent restarting of the timer, |
494 | * otherwise this function is meaningless. It must not be called from | 517 | * otherwise this function is meaningless. It must not be called from |
495 | * interrupt contexts. The caller must not hold locks which would prevent | 518 | * interrupt contexts. The caller must not hold locks which would prevent |
496 | * completion of the timer's handler. The timer's handler must not call | 519 | * completion of the timer's handler. The timer's handler must not call |
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
569 | fn = timer->function; | 592 | fn = timer->function; |
570 | data = timer->data; | 593 | data = timer->data; |
571 | 594 | ||
595 | timer_stats_account_timer(timer); | ||
596 | |||
572 | set_running_timer(base, timer); | 597 | set_running_timer(base, timer); |
573 | detach_timer(timer, 1); | 598 | detach_timer(timer, 1); |
574 | spin_unlock_irq(&base->lock); | 599 | spin_unlock_irq(&base->lock); |
@@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base) | |||
591 | spin_unlock_irq(&base->lock); | 616 | spin_unlock_irq(&base->lock); |
592 | } | 617 | } |
593 | 618 | ||
594 | #ifdef CONFIG_NO_IDLE_HZ | 619 | #if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) |
595 | /* | 620 | /* |
596 | * Find out when the next timer event is due to happen. This | 621 | * Find out when the next timer event is due to happen. This |
597 | * is used on S/390 to stop all activity when a cpus is idle. | 622 | * is used on S/390 to stop all activity when a cpus is idle. |
598 | * This functions needs to be called disabled. | 623 | * This functions needs to be called disabled. |
599 | */ | 624 | */ |
600 | unsigned long next_timer_interrupt(void) | 625 | static unsigned long __next_timer_interrupt(tvec_base_t *base) |
601 | { | 626 | { |
602 | tvec_base_t *base; | 627 | unsigned long timer_jiffies = base->timer_jiffies; |
603 | struct list_head *list; | 628 | unsigned long expires = timer_jiffies + (LONG_MAX >> 1); |
629 | int index, slot, array, found = 0; | ||
604 | struct timer_list *nte; | 630 | struct timer_list *nte; |
605 | unsigned long expires; | ||
606 | unsigned long hr_expires = MAX_JIFFY_OFFSET; | ||
607 | ktime_t hr_delta; | ||
608 | tvec_t *varray[4]; | 631 | tvec_t *varray[4]; |
609 | int i, j; | ||
610 | |||
611 | hr_delta = hrtimer_get_next_event(); | ||
612 | if (hr_delta.tv64 != KTIME_MAX) { | ||
613 | struct timespec tsdelta; | ||
614 | tsdelta = ktime_to_timespec(hr_delta); | ||
615 | hr_expires = timespec_to_jiffies(&tsdelta); | ||
616 | if (hr_expires < 3) | ||
617 | return hr_expires + jiffies; | ||
618 | } | ||
619 | hr_expires += jiffies; | ||
620 | |||
621 | base = __get_cpu_var(tvec_bases); | ||
622 | spin_lock(&base->lock); | ||
623 | expires = base->timer_jiffies + (LONG_MAX >> 1); | ||
624 | list = NULL; | ||
625 | 632 | ||
626 | /* Look for timer events in tv1. */ | 633 | /* Look for timer events in tv1. */ |
627 | j = base->timer_jiffies & TVR_MASK; | 634 | index = slot = timer_jiffies & TVR_MASK; |
628 | do { | 635 | do { |
629 | list_for_each_entry(nte, base->tv1.vec + j, entry) { | 636 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
637 | found = 1; | ||
630 | expires = nte->expires; | 638 | expires = nte->expires; |
631 | if (j < (base->timer_jiffies & TVR_MASK)) | 639 | /* Look at the cascade bucket(s)? */ |
632 | list = base->tv2.vec + (INDEX(0)); | 640 | if (!index || slot < index) |
633 | goto found; | 641 | goto cascade; |
642 | return expires; | ||
634 | } | 643 | } |
635 | j = (j + 1) & TVR_MASK; | 644 | slot = (slot + 1) & TVR_MASK; |
636 | } while (j != (base->timer_jiffies & TVR_MASK)); | 645 | } while (slot != index); |
646 | |||
647 | cascade: | ||
648 | /* Calculate the next cascade event */ | ||
649 | if (index) | ||
650 | timer_jiffies += TVR_SIZE - index; | ||
651 | timer_jiffies >>= TVR_BITS; | ||
637 | 652 | ||
638 | /* Check tv2-tv5. */ | 653 | /* Check tv2-tv5. */ |
639 | varray[0] = &base->tv2; | 654 | varray[0] = &base->tv2; |
640 | varray[1] = &base->tv3; | 655 | varray[1] = &base->tv3; |
641 | varray[2] = &base->tv4; | 656 | varray[2] = &base->tv4; |
642 | varray[3] = &base->tv5; | 657 | varray[3] = &base->tv5; |
643 | for (i = 0; i < 4; i++) { | 658 | |
644 | j = INDEX(i); | 659 | for (array = 0; array < 4; array++) { |
660 | tvec_t *varp = varray[array]; | ||
661 | |||
662 | index = slot = timer_jiffies & TVN_MASK; | ||
645 | do { | 663 | do { |
646 | if (list_empty(varray[i]->vec + j)) { | 664 | list_for_each_entry(nte, varp->vec + slot, entry) { |
647 | j = (j + 1) & TVN_MASK; | 665 | found = 1; |
648 | continue; | ||
649 | } | ||
650 | list_for_each_entry(nte, varray[i]->vec + j, entry) | ||
651 | if (time_before(nte->expires, expires)) | 666 | if (time_before(nte->expires, expires)) |
652 | expires = nte->expires; | 667 | expires = nte->expires; |
653 | if (j < (INDEX(i)) && i < 3) | 668 | } |
654 | list = varray[i + 1]->vec + (INDEX(i + 1)); | 669 | /* |
655 | goto found; | 670 | * Do we still search for the first timer or are |
656 | } while (j != (INDEX(i))); | 671 | * we looking up the cascade buckets ? |
657 | } | 672 | */ |
658 | found: | 673 | if (found) { |
659 | if (list) { | 674 | /* Look at the cascade bucket(s)? */ |
660 | /* | 675 | if (!index || slot < index) |
661 | * The search wrapped. We need to look at the next list | 676 | break; |
662 | * from next tv element that would cascade into tv element | 677 | return expires; |
663 | * where we found the timer element. | 678 | } |
664 | */ | 679 | slot = (slot + 1) & TVN_MASK; |
665 | list_for_each_entry(nte, list, entry) { | 680 | } while (slot != index); |
666 | if (time_before(nte->expires, expires)) | 681 | |
667 | expires = nte->expires; | 682 | if (index) |
668 | } | 683 | timer_jiffies += TVN_SIZE - index; |
684 | timer_jiffies >>= TVN_BITS; | ||
669 | } | 685 | } |
670 | spin_unlock(&base->lock); | 686 | return expires; |
687 | } | ||
671 | 688 | ||
672 | /* | 689 | /* |
673 | * It can happen that other CPUs service timer IRQs and increment | 690 | * Check, if the next hrtimer event is before the next timer wheel |
674 | * jiffies, but we have not yet got a local timer tick to process | 691 | * event: |
675 | * the timer wheels. In that case, the expiry time can be before | 692 | */ |
676 | * jiffies, but since the high-resolution timer here is relative to | 693 | static unsigned long cmp_next_hrtimer_event(unsigned long now, |
677 | * jiffies, the default expression when high-resolution timers are | 694 | unsigned long expires) |
678 | * not active, | 695 | { |
679 | * | 696 | ktime_t hr_delta = hrtimer_get_next_event(); |
680 | * time_before(MAX_JIFFY_OFFSET + jiffies, expires) | 697 | struct timespec tsdelta; |
681 | * | ||
682 | * would falsely evaluate to true. If that is the case, just | ||
683 | * return jiffies so that we can immediately fire the local timer | ||
684 | */ | ||
685 | if (time_before(expires, jiffies)) | ||
686 | return jiffies; | ||
687 | 698 | ||
688 | if (time_before(hr_expires, expires)) | 699 | if (hr_delta.tv64 == KTIME_MAX) |
689 | return hr_expires; | 700 | return expires; |
690 | 701 | ||
702 | if (hr_delta.tv64 <= TICK_NSEC) | ||
703 | return now; | ||
704 | |||
705 | tsdelta = ktime_to_timespec(hr_delta); | ||
706 | now += timespec_to_jiffies(&tsdelta); | ||
707 | if (time_before(now, expires)) | ||
708 | return now; | ||
691 | return expires; | 709 | return expires; |
692 | } | 710 | } |
711 | |||
712 | /** | ||
713 | * next_timer_interrupt - return the jiffy of the next pending timer | ||
714 | */ | ||
715 | unsigned long get_next_timer_interrupt(unsigned long now) | ||
716 | { | ||
717 | tvec_base_t *base = __get_cpu_var(tvec_bases); | ||
718 | unsigned long expires; | ||
719 | |||
720 | spin_lock(&base->lock); | ||
721 | expires = __next_timer_interrupt(base); | ||
722 | spin_unlock(&base->lock); | ||
723 | |||
724 | if (time_before_eq(expires, now)) | ||
725 | return now; | ||
726 | |||
727 | return cmp_next_hrtimer_event(now, expires); | ||
728 | } | ||
729 | |||
730 | #ifdef CONFIG_NO_IDLE_HZ | ||
731 | unsigned long next_timer_interrupt(void) | ||
732 | { | ||
733 | return get_next_timer_interrupt(jiffies); | ||
734 | } | ||
735 | #endif | ||
736 | |||
693 | #endif | 737 | #endif |
694 | 738 | ||
695 | /******************************************************************/ | 739 | /******************************************************************/ |
@@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday); | |||
832 | * | 876 | * |
833 | * Accumulates current time interval and initializes new clocksource | 877 | * Accumulates current time interval and initializes new clocksource |
834 | */ | 878 | */ |
835 | static int change_clocksource(void) | 879 | static void change_clocksource(void) |
836 | { | 880 | { |
837 | struct clocksource *new; | 881 | struct clocksource *new; |
838 | cycle_t now; | 882 | cycle_t now; |
839 | u64 nsec; | 883 | u64 nsec; |
884 | |||
840 | new = clocksource_get_next(); | 885 | new = clocksource_get_next(); |
841 | if (clock != new) { | 886 | |
842 | now = clocksource_read(new); | 887 | if (clock == new) |
843 | nsec = __get_nsec_offset(); | 888 | return; |
844 | timespec_add_ns(&xtime, nsec); | 889 | |
845 | 890 | now = clocksource_read(new); | |
846 | clock = new; | 891 | nsec = __get_nsec_offset(); |
847 | clock->cycle_last = now; | 892 | timespec_add_ns(&xtime, nsec); |
848 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | 893 | |
849 | clock->name); | 894 | clock = new; |
850 | return 1; | 895 | clock->cycle_last = now; |
851 | } else if (clock->update_callback) { | 896 | |
852 | return clock->update_callback(); | 897 | clock->error = 0; |
853 | } | 898 | clock->xtime_nsec = 0; |
854 | return 0; | 899 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
900 | |||
901 | tick_clock_notify(); | ||
902 | |||
903 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
904 | clock->name); | ||
855 | } | 905 | } |
856 | #else | 906 | #else |
857 | static inline int change_clocksource(void) | 907 | static inline void change_clocksource(void) { } |
858 | { | ||
859 | return 0; | ||
860 | } | ||
861 | #endif | 908 | #endif |
862 | 909 | ||
863 | /** | 910 | /** |
@@ -871,33 +918,56 @@ int timekeeping_is_continuous(void) | |||
871 | do { | 918 | do { |
872 | seq = read_seqbegin(&xtime_lock); | 919 | seq = read_seqbegin(&xtime_lock); |
873 | 920 | ||
874 | ret = clock->is_continuous; | 921 | ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; |
875 | 922 | ||
876 | } while (read_seqretry(&xtime_lock, seq)); | 923 | } while (read_seqretry(&xtime_lock, seq)); |
877 | 924 | ||
878 | return ret; | 925 | return ret; |
879 | } | 926 | } |
880 | 927 | ||
928 | /** | ||
929 | * read_persistent_clock - Return time in seconds from the persistent clock. | ||
930 | * | ||
931 | * Weak dummy function for arches that do not yet support it. | ||
932 | * Returns seconds from epoch using the battery backed persistent clock. | ||
933 | * Returns zero if unsupported. | ||
934 | * | ||
935 | * XXX - Do be sure to remove it once all arches implement it. | ||
936 | */ | ||
937 | unsigned long __attribute__((weak)) read_persistent_clock(void) | ||
938 | { | ||
939 | return 0; | ||
940 | } | ||
941 | |||
881 | /* | 942 | /* |
882 | * timekeeping_init - Initializes the clocksource and common timekeeping values | 943 | * timekeeping_init - Initializes the clocksource and common timekeeping values |
883 | */ | 944 | */ |
884 | void __init timekeeping_init(void) | 945 | void __init timekeeping_init(void) |
885 | { | 946 | { |
886 | unsigned long flags; | 947 | unsigned long flags; |
948 | unsigned long sec = read_persistent_clock(); | ||
887 | 949 | ||
888 | write_seqlock_irqsave(&xtime_lock, flags); | 950 | write_seqlock_irqsave(&xtime_lock, flags); |
889 | 951 | ||
890 | ntp_clear(); | 952 | ntp_clear(); |
891 | 953 | ||
892 | clock = clocksource_get_next(); | 954 | clock = clocksource_get_next(); |
893 | clocksource_calculate_interval(clock, tick_nsec); | 955 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
894 | clock->cycle_last = clocksource_read(clock); | 956 | clock->cycle_last = clocksource_read(clock); |
895 | 957 | ||
958 | xtime.tv_sec = sec; | ||
959 | xtime.tv_nsec = 0; | ||
960 | set_normalized_timespec(&wall_to_monotonic, | ||
961 | -xtime.tv_sec, -xtime.tv_nsec); | ||
962 | |||
896 | write_sequnlock_irqrestore(&xtime_lock, flags); | 963 | write_sequnlock_irqrestore(&xtime_lock, flags); |
897 | } | 964 | } |
898 | 965 | ||
899 | 966 | /* flag for if timekeeping is suspended */ | |
900 | static int timekeeping_suspended; | 967 | static int timekeeping_suspended; |
968 | /* time in seconds when suspend began */ | ||
969 | static unsigned long timekeeping_suspend_time; | ||
970 | |||
901 | /** | 971 | /** |
902 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 972 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
903 | * @dev: unused | 973 | * @dev: unused |
@@ -909,13 +979,26 @@ static int timekeeping_suspended; | |||
909 | static int timekeeping_resume(struct sys_device *dev) | 979 | static int timekeeping_resume(struct sys_device *dev) |
910 | { | 980 | { |
911 | unsigned long flags; | 981 | unsigned long flags; |
982 | unsigned long now = read_persistent_clock(); | ||
912 | 983 | ||
913 | write_seqlock_irqsave(&xtime_lock, flags); | 984 | write_seqlock_irqsave(&xtime_lock, flags); |
914 | /* restart the last cycle value */ | 985 | |
986 | if (now && (now > timekeeping_suspend_time)) { | ||
987 | unsigned long sleep_length = now - timekeeping_suspend_time; | ||
988 | |||
989 | xtime.tv_sec += sleep_length; | ||
990 | wall_to_monotonic.tv_sec -= sleep_length; | ||
991 | } | ||
992 | /* re-base the last cycle value */ | ||
915 | clock->cycle_last = clocksource_read(clock); | 993 | clock->cycle_last = clocksource_read(clock); |
916 | clock->error = 0; | 994 | clock->error = 0; |
917 | timekeeping_suspended = 0; | 995 | timekeeping_suspended = 0; |
918 | write_sequnlock_irqrestore(&xtime_lock, flags); | 996 | write_sequnlock_irqrestore(&xtime_lock, flags); |
997 | |||
998 | touch_softlockup_watchdog(); | ||
999 | /* Resume hrtimers */ | ||
1000 | clock_was_set(); | ||
1001 | |||
919 | return 0; | 1002 | return 0; |
920 | } | 1003 | } |
921 | 1004 | ||
@@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
925 | 1008 | ||
926 | write_seqlock_irqsave(&xtime_lock, flags); | 1009 | write_seqlock_irqsave(&xtime_lock, flags); |
927 | timekeeping_suspended = 1; | 1010 | timekeeping_suspended = 1; |
1011 | timekeeping_suspend_time = read_persistent_clock(); | ||
928 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1012 | write_sequnlock_irqrestore(&xtime_lock, flags); |
929 | return 0; | 1013 | return 0; |
930 | } | 1014 | } |
@@ -1089,11 +1173,8 @@ static void update_wall_time(void) | |||
1089 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 1173 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
1090 | 1174 | ||
1091 | /* check to see if there is a new clocksource to use */ | 1175 | /* check to see if there is a new clocksource to use */ |
1092 | if (change_clocksource()) { | 1176 | change_clocksource(); |
1093 | clock->error = 0; | 1177 | update_vsyscall(&xtime, clock); |
1094 | clock->xtime_nsec = 0; | ||
1095 | clocksource_calculate_interval(clock, tick_nsec); | ||
1096 | } | ||
1097 | } | 1178 | } |
1098 | 1179 | ||
1099 | /* | 1180 | /* |
@@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks) | |||
1162 | * This read-write spinlock protects us from races in SMP while | 1243 | * This read-write spinlock protects us from races in SMP while |
1163 | * playing with xtime and avenrun. | 1244 | * playing with xtime and avenrun. |
1164 | */ | 1245 | */ |
1165 | #ifndef ARCH_HAVE_XTIME_LOCK | 1246 | __attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); |
1166 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); | ||
1167 | 1247 | ||
1168 | EXPORT_SYMBOL(xtime_lock); | 1248 | EXPORT_SYMBOL(xtime_lock); |
1169 | #endif | ||
1170 | 1249 | ||
1171 | /* | 1250 | /* |
1172 | * This function runs timers and the timer-tq in bottom half context. | 1251 | * This function runs timers and the timer-tq in bottom half context. |
@@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1175 | { | 1254 | { |
1176 | tvec_base_t *base = __get_cpu_var(tvec_bases); | 1255 | tvec_base_t *base = __get_cpu_var(tvec_bases); |
1177 | 1256 | ||
1178 | hrtimer_run_queues(); | 1257 | hrtimer_run_queues(); |
1258 | |||
1179 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1259 | if (time_after_eq(jiffies, base->timer_jiffies)) |
1180 | __run_timers(base); | 1260 | __run_timers(base); |
1181 | } | 1261 | } |
@@ -1392,17 +1472,16 @@ asmlinkage long sys_gettid(void) | |||
1392 | } | 1472 | } |
1393 | 1473 | ||
1394 | /** | 1474 | /** |
1395 | * sys_sysinfo - fill in sysinfo struct | 1475 | * do_sysinfo - fill in sysinfo struct |
1396 | * @info: pointer to buffer to fill | 1476 | * @info: pointer to buffer to fill |
1397 | */ | 1477 | */ |
1398 | asmlinkage long sys_sysinfo(struct sysinfo __user *info) | 1478 | int do_sysinfo(struct sysinfo *info) |
1399 | { | 1479 | { |
1400 | struct sysinfo val; | ||
1401 | unsigned long mem_total, sav_total; | 1480 | unsigned long mem_total, sav_total; |
1402 | unsigned int mem_unit, bitcount; | 1481 | unsigned int mem_unit, bitcount; |
1403 | unsigned long seq; | 1482 | unsigned long seq; |
1404 | 1483 | ||
1405 | memset((char *)&val, 0, sizeof(struct sysinfo)); | 1484 | memset(info, 0, sizeof(struct sysinfo)); |
1406 | 1485 | ||
1407 | do { | 1486 | do { |
1408 | struct timespec tp; | 1487 | struct timespec tp; |
@@ -1422,17 +1501,17 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) | |||
1422 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; | 1501 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; |
1423 | tp.tv_sec++; | 1502 | tp.tv_sec++; |
1424 | } | 1503 | } |
1425 | val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); | 1504 | info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); |
1426 | 1505 | ||
1427 | val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); | 1506 | info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); |
1428 | val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); | 1507 | info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); |
1429 | val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); | 1508 | info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); |
1430 | 1509 | ||
1431 | val.procs = nr_threads; | 1510 | info->procs = nr_threads; |
1432 | } while (read_seqretry(&xtime_lock, seq)); | 1511 | } while (read_seqretry(&xtime_lock, seq)); |
1433 | 1512 | ||
1434 | si_meminfo(&val); | 1513 | si_meminfo(info); |
1435 | si_swapinfo(&val); | 1514 | si_swapinfo(info); |
1436 | 1515 | ||
1437 | /* | 1516 | /* |
1438 | * If the sum of all the available memory (i.e. ram + swap) | 1517 | * If the sum of all the available memory (i.e. ram + swap) |
@@ -1443,11 +1522,11 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) | |||
1443 | * -Erik Andersen <andersee@debian.org> | 1522 | * -Erik Andersen <andersee@debian.org> |
1444 | */ | 1523 | */ |
1445 | 1524 | ||
1446 | mem_total = val.totalram + val.totalswap; | 1525 | mem_total = info->totalram + info->totalswap; |
1447 | if (mem_total < val.totalram || mem_total < val.totalswap) | 1526 | if (mem_total < info->totalram || mem_total < info->totalswap) |
1448 | goto out; | 1527 | goto out; |
1449 | bitcount = 0; | 1528 | bitcount = 0; |
1450 | mem_unit = val.mem_unit; | 1529 | mem_unit = info->mem_unit; |
1451 | while (mem_unit > 1) { | 1530 | while (mem_unit > 1) { |
1452 | bitcount++; | 1531 | bitcount++; |
1453 | mem_unit >>= 1; | 1532 | mem_unit >>= 1; |
@@ -1459,22 +1538,31 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) | |||
1459 | 1538 | ||
1460 | /* | 1539 | /* |
1461 | * If mem_total did not overflow, multiply all memory values by | 1540 | * If mem_total did not overflow, multiply all memory values by |
1462 | * val.mem_unit and set it to 1. This leaves things compatible | 1541 | * info->mem_unit and set it to 1. This leaves things compatible |
1463 | * with 2.2.x, and also retains compatibility with earlier 2.4.x | 1542 | * with 2.2.x, and also retains compatibility with earlier 2.4.x |
1464 | * kernels... | 1543 | * kernels... |
1465 | */ | 1544 | */ |
1466 | 1545 | ||
1467 | val.mem_unit = 1; | 1546 | info->mem_unit = 1; |
1468 | val.totalram <<= bitcount; | 1547 | info->totalram <<= bitcount; |
1469 | val.freeram <<= bitcount; | 1548 | info->freeram <<= bitcount; |
1470 | val.sharedram <<= bitcount; | 1549 | info->sharedram <<= bitcount; |
1471 | val.bufferram <<= bitcount; | 1550 | info->bufferram <<= bitcount; |
1472 | val.totalswap <<= bitcount; | 1551 | info->totalswap <<= bitcount; |
1473 | val.freeswap <<= bitcount; | 1552 | info->freeswap <<= bitcount; |
1474 | val.totalhigh <<= bitcount; | 1553 | info->totalhigh <<= bitcount; |
1475 | val.freehigh <<= bitcount; | 1554 | info->freehigh <<= bitcount; |
1555 | |||
1556 | out: | ||
1557 | return 0; | ||
1558 | } | ||
1559 | |||
1560 | asmlinkage long sys_sysinfo(struct sysinfo __user *info) | ||
1561 | { | ||
1562 | struct sysinfo val; | ||
1563 | |||
1564 | do_sysinfo(&val); | ||
1476 | 1565 | ||
1477 | out: | ||
1478 | if (copy_to_user(info, &val, sizeof(struct sysinfo))) | 1566 | if (copy_to_user(info, &val, sizeof(struct sysinfo))) |
1479 | return -EFAULT; | 1567 | return -EFAULT; |
1480 | 1568 | ||
@@ -1613,6 +1701,8 @@ void __init init_timers(void) | |||
1613 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, | 1701 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, |
1614 | (void *)(long)smp_processor_id()); | 1702 | (void *)(long)smp_processor_id()); |
1615 | 1703 | ||
1704 | init_timer_stats(); | ||
1705 | |||
1616 | BUG_ON(err == NOTIFY_BAD); | 1706 | BUG_ON(err == NOTIFY_BAD); |
1617 | register_cpu_notifier(&timers_nb); | 1707 | register_cpu_notifier(&timers_nb); |
1618 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); | 1708 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); |
@@ -1624,7 +1714,7 @@ struct time_interpolator *time_interpolator __read_mostly; | |||
1624 | static struct time_interpolator *time_interpolator_list __read_mostly; | 1714 | static struct time_interpolator *time_interpolator_list __read_mostly; |
1625 | static DEFINE_SPINLOCK(time_interpolator_lock); | 1715 | static DEFINE_SPINLOCK(time_interpolator_lock); |
1626 | 1716 | ||
1627 | static inline u64 time_interpolator_get_cycles(unsigned int src) | 1717 | static inline cycles_t time_interpolator_get_cycles(unsigned int src) |
1628 | { | 1718 | { |
1629 | unsigned long (*x)(void); | 1719 | unsigned long (*x)(void); |
1630 | 1720 | ||
@@ -1650,8 +1740,8 @@ static inline u64 time_interpolator_get_counter(int writelock) | |||
1650 | 1740 | ||
1651 | if (time_interpolator->jitter) | 1741 | if (time_interpolator->jitter) |
1652 | { | 1742 | { |
1653 | u64 lcycle; | 1743 | cycles_t lcycle; |
1654 | u64 now; | 1744 | cycles_t now; |
1655 | 1745 | ||
1656 | do { | 1746 | do { |
1657 | lcycle = time_interpolator->last_cycle; | 1747 | lcycle = time_interpolator->last_cycle; |