aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c374
1 files changed, 232 insertions, 142 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index c2a8ccfc2882..cb1b86a9c52f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -34,6 +34,8 @@
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/tick.h>
38#include <linux/kallsyms.h>
37 39
38#include <asm/uaccess.h> 40#include <asm/uaccess.h>
39#include <asm/unistd.h> 41#include <asm/unistd.h>
@@ -85,7 +87,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
85 * @j: the time in (absolute) jiffies that should be rounded 87 * @j: the time in (absolute) jiffies that should be rounded
86 * @cpu: the processor number on which the timeout will happen 88 * @cpu: the processor number on which the timeout will happen
87 * 89 *
88 * __round_jiffies rounds an absolute time in the future (in jiffies) 90 * __round_jiffies() rounds an absolute time in the future (in jiffies)
89 * up or down to (approximately) full seconds. This is useful for timers 91 * up or down to (approximately) full seconds. This is useful for timers
90 * for which the exact time they fire does not matter too much, as long as 92 * for which the exact time they fire does not matter too much, as long as
91 * they fire approximately every X seconds. 93 * they fire approximately every X seconds.
@@ -98,7 +100,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
98 * processors firing at the exact same time, which could lead 100 * processors firing at the exact same time, which could lead
99 * to lock contention or spurious cache line bouncing. 101 * to lock contention or spurious cache line bouncing.
100 * 102 *
101 * The return value is the rounded version of the "j" parameter. 103 * The return value is the rounded version of the @j parameter.
102 */ 104 */
103unsigned long __round_jiffies(unsigned long j, int cpu) 105unsigned long __round_jiffies(unsigned long j, int cpu)
104{ 106{
@@ -142,7 +144,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
142 * @j: the time in (relative) jiffies that should be rounded 144 * @j: the time in (relative) jiffies that should be rounded
143 * @cpu: the processor number on which the timeout will happen 145 * @cpu: the processor number on which the timeout will happen
144 * 146 *
145 * __round_jiffies_relative rounds a time delta in the future (in jiffies) 147 * __round_jiffies_relative() rounds a time delta in the future (in jiffies)
146 * up or down to (approximately) full seconds. This is useful for timers 148 * up or down to (approximately) full seconds. This is useful for timers
147 * for which the exact time they fire does not matter too much, as long as 149 * for which the exact time they fire does not matter too much, as long as
148 * they fire approximately every X seconds. 150 * they fire approximately every X seconds.
@@ -155,7 +157,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
155 * processors firing at the exact same time, which could lead 157 * processors firing at the exact same time, which could lead
156 * to lock contention or spurious cache line bouncing. 158 * to lock contention or spurious cache line bouncing.
157 * 159 *
158 * The return value is the rounded version of the "j" parameter. 160 * The return value is the rounded version of the @j parameter.
159 */ 161 */
160unsigned long __round_jiffies_relative(unsigned long j, int cpu) 162unsigned long __round_jiffies_relative(unsigned long j, int cpu)
161{ 163{
@@ -173,7 +175,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
173 * round_jiffies - function to round jiffies to a full second 175 * round_jiffies - function to round jiffies to a full second
174 * @j: the time in (absolute) jiffies that should be rounded 176 * @j: the time in (absolute) jiffies that should be rounded
175 * 177 *
176 * round_jiffies rounds an absolute time in the future (in jiffies) 178 * round_jiffies() rounds an absolute time in the future (in jiffies)
177 * up or down to (approximately) full seconds. This is useful for timers 179 * up or down to (approximately) full seconds. This is useful for timers
178 * for which the exact time they fire does not matter too much, as long as 180 * for which the exact time they fire does not matter too much, as long as
179 * they fire approximately every X seconds. 181 * they fire approximately every X seconds.
@@ -182,7 +184,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
182 * at the same time, rather than at various times spread out. The goal 184 * at the same time, rather than at various times spread out. The goal
183 * of this is to have the CPU wake up less, which saves power. 185 * of this is to have the CPU wake up less, which saves power.
184 * 186 *
185 * The return value is the rounded version of the "j" parameter. 187 * The return value is the rounded version of the @j parameter.
186 */ 188 */
187unsigned long round_jiffies(unsigned long j) 189unsigned long round_jiffies(unsigned long j)
188{ 190{
@@ -194,7 +196,7 @@ EXPORT_SYMBOL_GPL(round_jiffies);
194 * round_jiffies_relative - function to round jiffies to a full second 196 * round_jiffies_relative - function to round jiffies to a full second
195 * @j: the time in (relative) jiffies that should be rounded 197 * @j: the time in (relative) jiffies that should be rounded
196 * 198 *
197 * round_jiffies_relative rounds a time delta in the future (in jiffies) 199 * round_jiffies_relative() rounds a time delta in the future (in jiffies)
198 * up or down to (approximately) full seconds. This is useful for timers 200 * up or down to (approximately) full seconds. This is useful for timers
199 * for which the exact time they fire does not matter too much, as long as 201 * for which the exact time they fire does not matter too much, as long as
200 * they fire approximately every X seconds. 202 * they fire approximately every X seconds.
@@ -203,7 +205,7 @@ EXPORT_SYMBOL_GPL(round_jiffies);
203 * at the same time, rather than at various times spread out. The goal 205 * at the same time, rather than at various times spread out. The goal
204 * of this is to have the CPU wake up less, which saves power. 206 * of this is to have the CPU wake up less, which saves power.
205 * 207 *
206 * The return value is the rounded version of the "j" parameter. 208 * The return value is the rounded version of the @j parameter.
207 */ 209 */
208unsigned long round_jiffies_relative(unsigned long j) 210unsigned long round_jiffies_relative(unsigned long j)
209{ 211{
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
262 list_add_tail(&timer->entry, vec); 264 list_add_tail(&timer->entry, vec);
263} 265}
264 266
267#ifdef CONFIG_TIMER_STATS
268void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
269{
270 if (timer->start_site)
271 return;
272
273 timer->start_site = addr;
274 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
275 timer->start_pid = current->pid;
276}
277#endif
278
265/** 279/**
266 * init_timer - initialize a timer. 280 * init_timer - initialize a timer.
267 * @timer: the timer to be initialized 281 * @timer: the timer to be initialized
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer)
273{ 287{
274 timer->entry.next = NULL; 288 timer->entry.next = NULL;
275 timer->base = __raw_get_cpu_var(tvec_bases); 289 timer->base = __raw_get_cpu_var(tvec_bases);
290#ifdef CONFIG_TIMER_STATS
291 timer->start_site = NULL;
292 timer->start_pid = -1;
293 memset(timer->start_comm, 0, TASK_COMM_LEN);
294#endif
276} 295}
277EXPORT_SYMBOL(init_timer); 296EXPORT_SYMBOL(init_timer);
278 297
279static inline void detach_timer(struct timer_list *timer, 298static inline void detach_timer(struct timer_list *timer,
280 int clear_pending) 299 int clear_pending)
281{ 300{
282 struct list_head *entry = &timer->entry; 301 struct list_head *entry = &timer->entry;
283 302
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
324 unsigned long flags; 343 unsigned long flags;
325 int ret = 0; 344 int ret = 0;
326 345
346 timer_stats_timer_set_start_info(timer);
327 BUG_ON(!timer->function); 347 BUG_ON(!timer->function);
328 348
329 base = lock_timer_base(timer, &flags); 349 base = lock_timer_base(timer, &flags);
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
374 tvec_base_t *base = per_cpu(tvec_bases, cpu); 394 tvec_base_t *base = per_cpu(tvec_bases, cpu);
375 unsigned long flags; 395 unsigned long flags;
376 396
397 timer_stats_timer_set_start_info(timer);
377 BUG_ON(timer_pending(timer) || !timer->function); 398 BUG_ON(timer_pending(timer) || !timer->function);
378 spin_lock_irqsave(&base->lock, flags); 399 spin_lock_irqsave(&base->lock, flags);
379 timer->base = base; 400 timer->base = base;
@@ -387,7 +408,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
387 * @timer: the timer to be modified 408 * @timer: the timer to be modified
388 * @expires: new timeout in jiffies 409 * @expires: new timeout in jiffies
389 * 410 *
390 * mod_timer is a more efficient way to update the expire field of an 411 * mod_timer() is a more efficient way to update the expire field of an
391 * active timer (if the timer is inactive it will be activated) 412 * active timer (if the timer is inactive it will be activated)
392 * 413 *
393 * mod_timer(timer, expires) is equivalent to: 414 * mod_timer(timer, expires) is equivalent to:
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
406{ 427{
407 BUG_ON(!timer->function); 428 BUG_ON(!timer->function);
408 429
430 timer_stats_timer_set_start_info(timer);
409 /* 431 /*
410 * This is a common optimization triggered by the 432 * This is a common optimization triggered by the
411 * networking code - if the timer is re-modified 433 * networking code - if the timer is re-modified
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer)
436 unsigned long flags; 458 unsigned long flags;
437 int ret = 0; 459 int ret = 0;
438 460
461 timer_stats_timer_clear_start_info(timer);
439 if (timer_pending(timer)) { 462 if (timer_pending(timer)) {
440 base = lock_timer_base(timer, &flags); 463 base = lock_timer_base(timer, &flags);
441 if (timer_pending(timer)) { 464 if (timer_pending(timer)) {
@@ -490,7 +513,7 @@ out:
490 * the timer it also makes sure the handler has finished executing on other 513 * the timer it also makes sure the handler has finished executing on other
491 * CPUs. 514 * CPUs.
492 * 515 *
493 * Synchronization rules: callers must prevent restarting of the timer, 516 * Synchronization rules: Callers must prevent restarting of the timer,
494 * otherwise this function is meaningless. It must not be called from 517 * otherwise this function is meaningless. It must not be called from
495 * interrupt contexts. The caller must not hold locks which would prevent 518 * interrupt contexts. The caller must not hold locks which would prevent
496 * completion of the timer's handler. The timer's handler must not call 519 * completion of the timer's handler. The timer's handler must not call
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base)
569 fn = timer->function; 592 fn = timer->function;
570 data = timer->data; 593 data = timer->data;
571 594
595 timer_stats_account_timer(timer);
596
572 set_running_timer(base, timer); 597 set_running_timer(base, timer);
573 detach_timer(timer, 1); 598 detach_timer(timer, 1);
574 spin_unlock_irq(&base->lock); 599 spin_unlock_irq(&base->lock);
@@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base)
591 spin_unlock_irq(&base->lock); 616 spin_unlock_irq(&base->lock);
592} 617}
593 618
594#ifdef CONFIG_NO_IDLE_HZ 619#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
595/* 620/*
596 * Find out when the next timer event is due to happen. This 621 * Find out when the next timer event is due to happen. This
597 * is used on S/390 to stop all activity when a cpus is idle. 622 * is used on S/390 to stop all activity when a cpus is idle.
598 * This functions needs to be called disabled. 623 * This functions needs to be called disabled.
599 */ 624 */
600unsigned long next_timer_interrupt(void) 625static unsigned long __next_timer_interrupt(tvec_base_t *base)
601{ 626{
602 tvec_base_t *base; 627 unsigned long timer_jiffies = base->timer_jiffies;
603 struct list_head *list; 628 unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
629 int index, slot, array, found = 0;
604 struct timer_list *nte; 630 struct timer_list *nte;
605 unsigned long expires;
606 unsigned long hr_expires = MAX_JIFFY_OFFSET;
607 ktime_t hr_delta;
608 tvec_t *varray[4]; 631 tvec_t *varray[4];
609 int i, j;
610
611 hr_delta = hrtimer_get_next_event();
612 if (hr_delta.tv64 != KTIME_MAX) {
613 struct timespec tsdelta;
614 tsdelta = ktime_to_timespec(hr_delta);
615 hr_expires = timespec_to_jiffies(&tsdelta);
616 if (hr_expires < 3)
617 return hr_expires + jiffies;
618 }
619 hr_expires += jiffies;
620
621 base = __get_cpu_var(tvec_bases);
622 spin_lock(&base->lock);
623 expires = base->timer_jiffies + (LONG_MAX >> 1);
624 list = NULL;
625 632
626 /* Look for timer events in tv1. */ 633 /* Look for timer events in tv1. */
627 j = base->timer_jiffies & TVR_MASK; 634 index = slot = timer_jiffies & TVR_MASK;
628 do { 635 do {
629 list_for_each_entry(nte, base->tv1.vec + j, entry) { 636 list_for_each_entry(nte, base->tv1.vec + slot, entry) {
637 found = 1;
630 expires = nte->expires; 638 expires = nte->expires;
631 if (j < (base->timer_jiffies & TVR_MASK)) 639 /* Look at the cascade bucket(s)? */
632 list = base->tv2.vec + (INDEX(0)); 640 if (!index || slot < index)
633 goto found; 641 goto cascade;
642 return expires;
634 } 643 }
635 j = (j + 1) & TVR_MASK; 644 slot = (slot + 1) & TVR_MASK;
636 } while (j != (base->timer_jiffies & TVR_MASK)); 645 } while (slot != index);
646
647cascade:
648 /* Calculate the next cascade event */
649 if (index)
650 timer_jiffies += TVR_SIZE - index;
651 timer_jiffies >>= TVR_BITS;
637 652
638 /* Check tv2-tv5. */ 653 /* Check tv2-tv5. */
639 varray[0] = &base->tv2; 654 varray[0] = &base->tv2;
640 varray[1] = &base->tv3; 655 varray[1] = &base->tv3;
641 varray[2] = &base->tv4; 656 varray[2] = &base->tv4;
642 varray[3] = &base->tv5; 657 varray[3] = &base->tv5;
643 for (i = 0; i < 4; i++) { 658
644 j = INDEX(i); 659 for (array = 0; array < 4; array++) {
660 tvec_t *varp = varray[array];
661
662 index = slot = timer_jiffies & TVN_MASK;
645 do { 663 do {
646 if (list_empty(varray[i]->vec + j)) { 664 list_for_each_entry(nte, varp->vec + slot, entry) {
647 j = (j + 1) & TVN_MASK; 665 found = 1;
648 continue;
649 }
650 list_for_each_entry(nte, varray[i]->vec + j, entry)
651 if (time_before(nte->expires, expires)) 666 if (time_before(nte->expires, expires))
652 expires = nte->expires; 667 expires = nte->expires;
653 if (j < (INDEX(i)) && i < 3) 668 }
654 list = varray[i + 1]->vec + (INDEX(i + 1)); 669 /*
655 goto found; 670 * Do we still search for the first timer or are
656 } while (j != (INDEX(i))); 671 * we looking up the cascade buckets ?
657 } 672 */
658found: 673 if (found) {
659 if (list) { 674 /* Look at the cascade bucket(s)? */
660 /* 675 if (!index || slot < index)
661 * The search wrapped. We need to look at the next list 676 break;
662 * from next tv element that would cascade into tv element 677 return expires;
663 * where we found the timer element. 678 }
664 */ 679 slot = (slot + 1) & TVN_MASK;
665 list_for_each_entry(nte, list, entry) { 680 } while (slot != index);
666 if (time_before(nte->expires, expires)) 681
667 expires = nte->expires; 682 if (index)
668 } 683 timer_jiffies += TVN_SIZE - index;
684 timer_jiffies >>= TVN_BITS;
669 } 685 }
670 spin_unlock(&base->lock); 686 return expires;
687}
671 688
672 /* 689/*
673 * It can happen that other CPUs service timer IRQs and increment 690 * Check, if the next hrtimer event is before the next timer wheel
674 * jiffies, but we have not yet got a local timer tick to process 691 * event:
675 * the timer wheels. In that case, the expiry time can be before 692 */
676 * jiffies, but since the high-resolution timer here is relative to 693static unsigned long cmp_next_hrtimer_event(unsigned long now,
677 * jiffies, the default expression when high-resolution timers are 694 unsigned long expires)
678 * not active, 695{
679 * 696 ktime_t hr_delta = hrtimer_get_next_event();
680 * time_before(MAX_JIFFY_OFFSET + jiffies, expires) 697 struct timespec tsdelta;
681 *
682 * would falsely evaluate to true. If that is the case, just
683 * return jiffies so that we can immediately fire the local timer
684 */
685 if (time_before(expires, jiffies))
686 return jiffies;
687 698
688 if (time_before(hr_expires, expires)) 699 if (hr_delta.tv64 == KTIME_MAX)
689 return hr_expires; 700 return expires;
690 701
702 if (hr_delta.tv64 <= TICK_NSEC)
703 return now;
704
705 tsdelta = ktime_to_timespec(hr_delta);
706 now += timespec_to_jiffies(&tsdelta);
707 if (time_before(now, expires))
708 return now;
691 return expires; 709 return expires;
692} 710}
711
712/**
713 * next_timer_interrupt - return the jiffy of the next pending timer
714 */
715unsigned long get_next_timer_interrupt(unsigned long now)
716{
717 tvec_base_t *base = __get_cpu_var(tvec_bases);
718 unsigned long expires;
719
720 spin_lock(&base->lock);
721 expires = __next_timer_interrupt(base);
722 spin_unlock(&base->lock);
723
724 if (time_before_eq(expires, now))
725 return now;
726
727 return cmp_next_hrtimer_event(now, expires);
728}
729
730#ifdef CONFIG_NO_IDLE_HZ
731unsigned long next_timer_interrupt(void)
732{
733 return get_next_timer_interrupt(jiffies);
734}
735#endif
736
693#endif 737#endif
694 738
695/******************************************************************/ 739/******************************************************************/
@@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday);
832 * 876 *
833 * Accumulates current time interval and initializes new clocksource 877 * Accumulates current time interval and initializes new clocksource
834 */ 878 */
835static int change_clocksource(void) 879static void change_clocksource(void)
836{ 880{
837 struct clocksource *new; 881 struct clocksource *new;
838 cycle_t now; 882 cycle_t now;
839 u64 nsec; 883 u64 nsec;
884
840 new = clocksource_get_next(); 885 new = clocksource_get_next();
841 if (clock != new) { 886
842 now = clocksource_read(new); 887 if (clock == new)
843 nsec = __get_nsec_offset(); 888 return;
844 timespec_add_ns(&xtime, nsec); 889
845 890 now = clocksource_read(new);
846 clock = new; 891 nsec = __get_nsec_offset();
847 clock->cycle_last = now; 892 timespec_add_ns(&xtime, nsec);
848 printk(KERN_INFO "Time: %s clocksource has been installed.\n", 893
849 clock->name); 894 clock = new;
850 return 1; 895 clock->cycle_last = now;
851 } else if (clock->update_callback) { 896
852 return clock->update_callback(); 897 clock->error = 0;
853 } 898 clock->xtime_nsec = 0;
854 return 0; 899 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
900
901 tick_clock_notify();
902
903 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
904 clock->name);
855} 905}
856#else 906#else
857static inline int change_clocksource(void) 907static inline void change_clocksource(void) { }
858{
859 return 0;
860}
861#endif 908#endif
862 909
863/** 910/**
@@ -871,33 +918,56 @@ int timekeeping_is_continuous(void)
871 do { 918 do {
872 seq = read_seqbegin(&xtime_lock); 919 seq = read_seqbegin(&xtime_lock);
873 920
874 ret = clock->is_continuous; 921 ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
875 922
876 } while (read_seqretry(&xtime_lock, seq)); 923 } while (read_seqretry(&xtime_lock, seq));
877 924
878 return ret; 925 return ret;
879} 926}
880 927
928/**
929 * read_persistent_clock - Return time in seconds from the persistent clock.
930 *
931 * Weak dummy function for arches that do not yet support it.
932 * Returns seconds from epoch using the battery backed persistent clock.
933 * Returns zero if unsupported.
934 *
935 * XXX - Do be sure to remove it once all arches implement it.
936 */
937unsigned long __attribute__((weak)) read_persistent_clock(void)
938{
939 return 0;
940}
941
881/* 942/*
882 * timekeeping_init - Initializes the clocksource and common timekeeping values 943 * timekeeping_init - Initializes the clocksource and common timekeeping values
883 */ 944 */
884void __init timekeeping_init(void) 945void __init timekeeping_init(void)
885{ 946{
886 unsigned long flags; 947 unsigned long flags;
948 unsigned long sec = read_persistent_clock();
887 949
888 write_seqlock_irqsave(&xtime_lock, flags); 950 write_seqlock_irqsave(&xtime_lock, flags);
889 951
890 ntp_clear(); 952 ntp_clear();
891 953
892 clock = clocksource_get_next(); 954 clock = clocksource_get_next();
893 clocksource_calculate_interval(clock, tick_nsec); 955 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
894 clock->cycle_last = clocksource_read(clock); 956 clock->cycle_last = clocksource_read(clock);
895 957
958 xtime.tv_sec = sec;
959 xtime.tv_nsec = 0;
960 set_normalized_timespec(&wall_to_monotonic,
961 -xtime.tv_sec, -xtime.tv_nsec);
962
896 write_sequnlock_irqrestore(&xtime_lock, flags); 963 write_sequnlock_irqrestore(&xtime_lock, flags);
897} 964}
898 965
899 966/* flag for if timekeeping is suspended */
900static int timekeeping_suspended; 967static int timekeeping_suspended;
968/* time in seconds when suspend began */
969static unsigned long timekeeping_suspend_time;
970
901/** 971/**
902 * timekeeping_resume - Resumes the generic timekeeping subsystem. 972 * timekeeping_resume - Resumes the generic timekeeping subsystem.
903 * @dev: unused 973 * @dev: unused
@@ -909,13 +979,26 @@ static int timekeeping_suspended;
909static int timekeeping_resume(struct sys_device *dev) 979static int timekeeping_resume(struct sys_device *dev)
910{ 980{
911 unsigned long flags; 981 unsigned long flags;
982 unsigned long now = read_persistent_clock();
912 983
913 write_seqlock_irqsave(&xtime_lock, flags); 984 write_seqlock_irqsave(&xtime_lock, flags);
914 /* restart the last cycle value */ 985
986 if (now && (now > timekeeping_suspend_time)) {
987 unsigned long sleep_length = now - timekeeping_suspend_time;
988
989 xtime.tv_sec += sleep_length;
990 wall_to_monotonic.tv_sec -= sleep_length;
991 }
992 /* re-base the last cycle value */
915 clock->cycle_last = clocksource_read(clock); 993 clock->cycle_last = clocksource_read(clock);
916 clock->error = 0; 994 clock->error = 0;
917 timekeeping_suspended = 0; 995 timekeeping_suspended = 0;
918 write_sequnlock_irqrestore(&xtime_lock, flags); 996 write_sequnlock_irqrestore(&xtime_lock, flags);
997
998 touch_softlockup_watchdog();
999 /* Resume hrtimers */
1000 clock_was_set();
1001
919 return 0; 1002 return 0;
920} 1003}
921 1004
@@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
925 1008
926 write_seqlock_irqsave(&xtime_lock, flags); 1009 write_seqlock_irqsave(&xtime_lock, flags);
927 timekeeping_suspended = 1; 1010 timekeeping_suspended = 1;
1011 timekeeping_suspend_time = read_persistent_clock();
928 write_sequnlock_irqrestore(&xtime_lock, flags); 1012 write_sequnlock_irqrestore(&xtime_lock, flags);
929 return 0; 1013 return 0;
930} 1014}
@@ -1089,11 +1173,8 @@ static void update_wall_time(void)
1089 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; 1173 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
1090 1174
1091 /* check to see if there is a new clocksource to use */ 1175 /* check to see if there is a new clocksource to use */
1092 if (change_clocksource()) { 1176 change_clocksource();
1093 clock->error = 0; 1177 update_vsyscall(&xtime, clock);
1094 clock->xtime_nsec = 0;
1095 clocksource_calculate_interval(clock, tick_nsec);
1096 }
1097} 1178}
1098 1179
1099/* 1180/*
@@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks)
1162 * This read-write spinlock protects us from races in SMP while 1243 * This read-write spinlock protects us from races in SMP while
1163 * playing with xtime and avenrun. 1244 * playing with xtime and avenrun.
1164 */ 1245 */
1165#ifndef ARCH_HAVE_XTIME_LOCK 1246__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1166__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1167 1247
1168EXPORT_SYMBOL(xtime_lock); 1248EXPORT_SYMBOL(xtime_lock);
1169#endif
1170 1249
1171/* 1250/*
1172 * This function runs timers and the timer-tq in bottom half context. 1251 * This function runs timers and the timer-tq in bottom half context.
@@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h)
1175{ 1254{
1176 tvec_base_t *base = __get_cpu_var(tvec_bases); 1255 tvec_base_t *base = __get_cpu_var(tvec_bases);
1177 1256
1178 hrtimer_run_queues(); 1257 hrtimer_run_queues();
1258
1179 if (time_after_eq(jiffies, base->timer_jiffies)) 1259 if (time_after_eq(jiffies, base->timer_jiffies))
1180 __run_timers(base); 1260 __run_timers(base);
1181} 1261}
@@ -1392,17 +1472,16 @@ asmlinkage long sys_gettid(void)
1392} 1472}
1393 1473
1394/** 1474/**
1395 * sys_sysinfo - fill in sysinfo struct 1475 * do_sysinfo - fill in sysinfo struct
1396 * @info: pointer to buffer to fill 1476 * @info: pointer to buffer to fill
1397 */ 1477 */
1398asmlinkage long sys_sysinfo(struct sysinfo __user *info) 1478int do_sysinfo(struct sysinfo *info)
1399{ 1479{
1400 struct sysinfo val;
1401 unsigned long mem_total, sav_total; 1480 unsigned long mem_total, sav_total;
1402 unsigned int mem_unit, bitcount; 1481 unsigned int mem_unit, bitcount;
1403 unsigned long seq; 1482 unsigned long seq;
1404 1483
1405 memset((char *)&val, 0, sizeof(struct sysinfo)); 1484 memset(info, 0, sizeof(struct sysinfo));
1406 1485
1407 do { 1486 do {
1408 struct timespec tp; 1487 struct timespec tp;
@@ -1422,17 +1501,17 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1422 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; 1501 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1423 tp.tv_sec++; 1502 tp.tv_sec++;
1424 } 1503 }
1425 val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 1504 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1426 1505
1427 val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); 1506 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
1428 val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); 1507 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
1429 val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); 1508 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
1430 1509
1431 val.procs = nr_threads; 1510 info->procs = nr_threads;
1432 } while (read_seqretry(&xtime_lock, seq)); 1511 } while (read_seqretry(&xtime_lock, seq));
1433 1512
1434 si_meminfo(&val); 1513 si_meminfo(info);
1435 si_swapinfo(&val); 1514 si_swapinfo(info);
1436 1515
1437 /* 1516 /*
1438 * If the sum of all the available memory (i.e. ram + swap) 1517 * If the sum of all the available memory (i.e. ram + swap)
@@ -1443,11 +1522,11 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1443 * -Erik Andersen <andersee@debian.org> 1522 * -Erik Andersen <andersee@debian.org>
1444 */ 1523 */
1445 1524
1446 mem_total = val.totalram + val.totalswap; 1525 mem_total = info->totalram + info->totalswap;
1447 if (mem_total < val.totalram || mem_total < val.totalswap) 1526 if (mem_total < info->totalram || mem_total < info->totalswap)
1448 goto out; 1527 goto out;
1449 bitcount = 0; 1528 bitcount = 0;
1450 mem_unit = val.mem_unit; 1529 mem_unit = info->mem_unit;
1451 while (mem_unit > 1) { 1530 while (mem_unit > 1) {
1452 bitcount++; 1531 bitcount++;
1453 mem_unit >>= 1; 1532 mem_unit >>= 1;
@@ -1459,22 +1538,31 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1459 1538
1460 /* 1539 /*
1461 * If mem_total did not overflow, multiply all memory values by 1540 * If mem_total did not overflow, multiply all memory values by
1462 * val.mem_unit and set it to 1. This leaves things compatible 1541 * info->mem_unit and set it to 1. This leaves things compatible
1463 * with 2.2.x, and also retains compatibility with earlier 2.4.x 1542 * with 2.2.x, and also retains compatibility with earlier 2.4.x
1464 * kernels... 1543 * kernels...
1465 */ 1544 */
1466 1545
1467 val.mem_unit = 1; 1546 info->mem_unit = 1;
1468 val.totalram <<= bitcount; 1547 info->totalram <<= bitcount;
1469 val.freeram <<= bitcount; 1548 info->freeram <<= bitcount;
1470 val.sharedram <<= bitcount; 1549 info->sharedram <<= bitcount;
1471 val.bufferram <<= bitcount; 1550 info->bufferram <<= bitcount;
1472 val.totalswap <<= bitcount; 1551 info->totalswap <<= bitcount;
1473 val.freeswap <<= bitcount; 1552 info->freeswap <<= bitcount;
1474 val.totalhigh <<= bitcount; 1553 info->totalhigh <<= bitcount;
1475 val.freehigh <<= bitcount; 1554 info->freehigh <<= bitcount;
1555
1556out:
1557 return 0;
1558}
1559
1560asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1561{
1562 struct sysinfo val;
1563
1564 do_sysinfo(&val);
1476 1565
1477 out:
1478 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 1566 if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1479 return -EFAULT; 1567 return -EFAULT;
1480 1568
@@ -1613,6 +1701,8 @@ void __init init_timers(void)
1613 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1701 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1614 (void *)(long)smp_processor_id()); 1702 (void *)(long)smp_processor_id());
1615 1703
1704 init_timer_stats();
1705
1616 BUG_ON(err == NOTIFY_BAD); 1706 BUG_ON(err == NOTIFY_BAD);
1617 register_cpu_notifier(&timers_nb); 1707 register_cpu_notifier(&timers_nb);
1618 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); 1708 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
@@ -1624,7 +1714,7 @@ struct time_interpolator *time_interpolator __read_mostly;
1624static struct time_interpolator *time_interpolator_list __read_mostly; 1714static struct time_interpolator *time_interpolator_list __read_mostly;
1625static DEFINE_SPINLOCK(time_interpolator_lock); 1715static DEFINE_SPINLOCK(time_interpolator_lock);
1626 1716
1627static inline u64 time_interpolator_get_cycles(unsigned int src) 1717static inline cycles_t time_interpolator_get_cycles(unsigned int src)
1628{ 1718{
1629 unsigned long (*x)(void); 1719 unsigned long (*x)(void);
1630 1720
@@ -1650,8 +1740,8 @@ static inline u64 time_interpolator_get_counter(int writelock)
1650 1740
1651 if (time_interpolator->jitter) 1741 if (time_interpolator->jitter)
1652 { 1742 {
1653 u64 lcycle; 1743 cycles_t lcycle;
1654 u64 now; 1744 cycles_t now;
1655 1745
1656 do { 1746 do {
1657 lcycle = time_interpolator->last_cycle; 1747 lcycle = time_interpolator->last_cycle;