diff options
Diffstat (limited to 'kernel/timer.c')
| -rw-r--r-- | kernel/timer.c | 534 |
1 files changed, 435 insertions, 99 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index f35b3939e937..4f55622b0d38 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -84,7 +84,7 @@ typedef struct tvec_t_base_s tvec_base_t; | |||
| 84 | 84 | ||
| 85 | tvec_base_t boot_tvec_bases; | 85 | tvec_base_t boot_tvec_bases; |
| 86 | EXPORT_SYMBOL(boot_tvec_bases); | 86 | EXPORT_SYMBOL(boot_tvec_bases); |
| 87 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases }; | 87 | static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; |
| 88 | 88 | ||
| 89 | static inline void set_running_timer(tvec_base_t *base, | 89 | static inline void set_running_timer(tvec_base_t *base, |
| 90 | struct timer_list *timer) | 90 | struct timer_list *timer) |
| @@ -136,7 +136,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
| 136 | list_add_tail(&timer->entry, vec); | 136 | list_add_tail(&timer->entry, vec); |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | /*** | 139 | /** |
| 140 | * init_timer - initialize a timer. | 140 | * init_timer - initialize a timer. |
| 141 | * @timer: the timer to be initialized | 141 | * @timer: the timer to be initialized |
| 142 | * | 142 | * |
| @@ -146,7 +146,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
| 146 | void fastcall init_timer(struct timer_list *timer) | 146 | void fastcall init_timer(struct timer_list *timer) |
| 147 | { | 147 | { |
| 148 | timer->entry.next = NULL; | 148 | timer->entry.next = NULL; |
| 149 | timer->base = per_cpu(tvec_bases, raw_smp_processor_id()); | 149 | timer->base = __raw_get_cpu_var(tvec_bases); |
| 150 | } | 150 | } |
| 151 | EXPORT_SYMBOL(init_timer); | 151 | EXPORT_SYMBOL(init_timer); |
| 152 | 152 | ||
| @@ -175,6 +175,7 @@ static inline void detach_timer(struct timer_list *timer, | |||
| 175 | */ | 175 | */ |
| 176 | static tvec_base_t *lock_timer_base(struct timer_list *timer, | 176 | static tvec_base_t *lock_timer_base(struct timer_list *timer, |
| 177 | unsigned long *flags) | 177 | unsigned long *flags) |
| 178 | __acquires(timer->base->lock) | ||
| 178 | { | 179 | { |
| 179 | tvec_base_t *base; | 180 | tvec_base_t *base; |
| 180 | 181 | ||
| @@ -235,7 +236,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 235 | 236 | ||
| 236 | EXPORT_SYMBOL(__mod_timer); | 237 | EXPORT_SYMBOL(__mod_timer); |
| 237 | 238 | ||
| 238 | /*** | 239 | /** |
| 239 | * add_timer_on - start a timer on a particular CPU | 240 | * add_timer_on - start a timer on a particular CPU |
| 240 | * @timer: the timer to be added | 241 | * @timer: the timer to be added |
| 241 | * @cpu: the CPU to start it on | 242 | * @cpu: the CPU to start it on |
| @@ -255,9 +256,10 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
| 255 | } | 256 | } |
| 256 | 257 | ||
| 257 | 258 | ||
| 258 | /*** | 259 | /** |
| 259 | * mod_timer - modify a timer's timeout | 260 | * mod_timer - modify a timer's timeout |
| 260 | * @timer: the timer to be modified | 261 | * @timer: the timer to be modified |
| 262 | * @expires: new timeout in jiffies | ||
| 261 | * | 263 | * |
| 262 | * mod_timer is a more efficient way to update the expire field of an | 264 | * mod_timer is a more efficient way to update the expire field of an |
| 263 | * active timer (if the timer is inactive it will be activated) | 265 | * active timer (if the timer is inactive it will be activated) |
| @@ -291,7 +293,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 291 | 293 | ||
| 292 | EXPORT_SYMBOL(mod_timer); | 294 | EXPORT_SYMBOL(mod_timer); |
| 293 | 295 | ||
| 294 | /*** | 296 | /** |
| 295 | * del_timer - deactive a timer. | 297 | * del_timer - deactive a timer. |
| 296 | * @timer: the timer to be deactivated | 298 | * @timer: the timer to be deactivated |
| 297 | * | 299 | * |
| @@ -323,7 +325,10 @@ int del_timer(struct timer_list *timer) | |||
| 323 | EXPORT_SYMBOL(del_timer); | 325 | EXPORT_SYMBOL(del_timer); |
| 324 | 326 | ||
| 325 | #ifdef CONFIG_SMP | 327 | #ifdef CONFIG_SMP |
| 326 | /* | 328 | /** |
| 329 | * try_to_del_timer_sync - Try to deactivate a timer | ||
| 330 | * @timer: timer do del | ||
| 331 | * | ||
| 327 | * This function tries to deactivate a timer. Upon successful (ret >= 0) | 332 | * This function tries to deactivate a timer. Upon successful (ret >= 0) |
| 328 | * exit the timer is not queued and the handler is not running on any CPU. | 333 | * exit the timer is not queued and the handler is not running on any CPU. |
| 329 | * | 334 | * |
| @@ -351,7 +356,7 @@ out: | |||
| 351 | return ret; | 356 | return ret; |
| 352 | } | 357 | } |
| 353 | 358 | ||
| 354 | /*** | 359 | /** |
| 355 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | 360 | * del_timer_sync - deactivate a timer and wait for the handler to finish. |
| 356 | * @timer: the timer to be deactivated | 361 | * @timer: the timer to be deactivated |
| 357 | * | 362 | * |
| @@ -374,6 +379,7 @@ int del_timer_sync(struct timer_list *timer) | |||
| 374 | int ret = try_to_del_timer_sync(timer); | 379 | int ret = try_to_del_timer_sync(timer); |
| 375 | if (ret >= 0) | 380 | if (ret >= 0) |
| 376 | return ret; | 381 | return ret; |
| 382 | cpu_relax(); | ||
| 377 | } | 383 | } |
| 378 | } | 384 | } |
| 379 | 385 | ||
| @@ -400,15 +406,15 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) | |||
| 400 | return index; | 406 | return index; |
| 401 | } | 407 | } |
| 402 | 408 | ||
| 403 | /*** | 409 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
| 410 | |||
| 411 | /** | ||
| 404 | * __run_timers - run all expired timers (if any) on this CPU. | 412 | * __run_timers - run all expired timers (if any) on this CPU. |
| 405 | * @base: the timer vector to be processed. | 413 | * @base: the timer vector to be processed. |
| 406 | * | 414 | * |
| 407 | * This function cascades all vectors and executes all expired timer | 415 | * This function cascades all vectors and executes all expired timer |
| 408 | * vectors. | 416 | * vectors. |
| 409 | */ | 417 | */ |
| 410 | #define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK | ||
| 411 | |||
| 412 | static inline void __run_timers(tvec_base_t *base) | 418 | static inline void __run_timers(tvec_base_t *base) |
| 413 | { | 419 | { |
| 414 | struct timer_list *timer; | 420 | struct timer_list *timer; |
| @@ -597,7 +603,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ | |||
| 597 | long time_precision = 1; /* clock precision (us) */ | 603 | long time_precision = 1; /* clock precision (us) */ |
| 598 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ | 604 | long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ |
| 599 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ | 605 | long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ |
| 600 | static long time_phase; /* phase offset (scaled us) */ | ||
| 601 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; | 606 | long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC; |
| 602 | /* frequency offset (scaled ppm)*/ | 607 | /* frequency offset (scaled ppm)*/ |
| 603 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ | 608 | static long time_adj; /* tick adjust (scaled 1 / HZ) */ |
| @@ -747,27 +752,14 @@ static long adjtime_adjustment(void) | |||
| 747 | } | 752 | } |
| 748 | 753 | ||
| 749 | /* in the NTP reference this is called "hardclock()" */ | 754 | /* in the NTP reference this is called "hardclock()" */ |
| 750 | static void update_wall_time_one_tick(void) | 755 | static void update_ntp_one_tick(void) |
| 751 | { | 756 | { |
| 752 | long time_adjust_step, delta_nsec; | 757 | long time_adjust_step; |
| 753 | 758 | ||
| 754 | time_adjust_step = adjtime_adjustment(); | 759 | time_adjust_step = adjtime_adjustment(); |
| 755 | if (time_adjust_step) | 760 | if (time_adjust_step) |
| 756 | /* Reduce by this step the amount of time left */ | 761 | /* Reduce by this step the amount of time left */ |
| 757 | time_adjust -= time_adjust_step; | 762 | time_adjust -= time_adjust_step; |
| 758 | delta_nsec = tick_nsec + time_adjust_step * 1000; | ||
| 759 | /* | ||
| 760 | * Advance the phase, once it gets to one microsecond, then | ||
| 761 | * advance the tick more. | ||
| 762 | */ | ||
| 763 | time_phase += time_adj; | ||
| 764 | if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { | ||
| 765 | long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10)); | ||
| 766 | time_phase -= ltemp << (SHIFT_SCALE - 10); | ||
| 767 | delta_nsec += ltemp; | ||
| 768 | } | ||
| 769 | xtime.tv_nsec += delta_nsec; | ||
| 770 | time_interpolator_update(delta_nsec); | ||
| 771 | 763 | ||
| 772 | /* Changes by adjtime() do not take effect till next tick. */ | 764 | /* Changes by adjtime() do not take effect till next tick. */ |
| 773 | if (time_next_adjust != 0) { | 765 | if (time_next_adjust != 0) { |
| @@ -780,36 +772,404 @@ static void update_wall_time_one_tick(void) | |||
| 780 | * Return how long ticks are at the moment, that is, how much time | 772 | * Return how long ticks are at the moment, that is, how much time |
| 781 | * update_wall_time_one_tick will add to xtime next time we call it | 773 | * update_wall_time_one_tick will add to xtime next time we call it |
| 782 | * (assuming no calls to do_adjtimex in the meantime). | 774 | * (assuming no calls to do_adjtimex in the meantime). |
| 783 | * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 | 775 | * The return value is in fixed-point nanoseconds shifted by the |
| 784 | * bits to the right of the binary point. | 776 | * specified number of bits to the right of the binary point. |
| 785 | * This function has no side-effects. | 777 | * This function has no side-effects. |
| 786 | */ | 778 | */ |
| 787 | u64 current_tick_length(void) | 779 | u64 current_tick_length(void) |
| 788 | { | 780 | { |
| 789 | long delta_nsec; | 781 | long delta_nsec; |
| 782 | u64 ret; | ||
| 790 | 783 | ||
| 784 | /* calculate the finest interval NTP will allow. | ||
| 785 | * ie: nanosecond value shifted by (SHIFT_SCALE - 10) | ||
| 786 | */ | ||
| 791 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; | 787 | delta_nsec = tick_nsec + adjtime_adjustment() * 1000; |
| 792 | return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; | 788 | ret = (u64)delta_nsec << TICK_LENGTH_SHIFT; |
| 789 | ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10)); | ||
| 790 | |||
| 791 | return ret; | ||
| 793 | } | 792 | } |
| 794 | 793 | ||
| 795 | /* | 794 | /* XXX - all of this timekeeping code should be later moved to time.c */ |
| 796 | * Using a loop looks inefficient, but "ticks" is | 795 | #include <linux/clocksource.h> |
| 797 | * usually just one (we shouldn't be losing ticks, | 796 | static struct clocksource *clock; /* pointer to current clocksource */ |
| 798 | * we're doing this this way mainly for interrupt | 797 | |
| 799 | * latency reasons, not because we think we'll | 798 | #ifdef CONFIG_GENERIC_TIME |
| 800 | * have lots of lost timer ticks | 799 | /** |
| 800 | * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook | ||
| 801 | * | ||
| 802 | * private function, must hold xtime_lock lock when being | ||
| 803 | * called. Returns the number of nanoseconds since the | ||
| 804 | * last call to update_wall_time() (adjusted by NTP scaling) | ||
| 805 | */ | ||
| 806 | static inline s64 __get_nsec_offset(void) | ||
| 807 | { | ||
| 808 | cycle_t cycle_now, cycle_delta; | ||
| 809 | s64 ns_offset; | ||
| 810 | |||
| 811 | /* read clocksource: */ | ||
| 812 | cycle_now = clocksource_read(clock); | ||
| 813 | |||
| 814 | /* calculate the delta since the last update_wall_time: */ | ||
| 815 | cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; | ||
| 816 | |||
| 817 | /* convert to nanoseconds: */ | ||
| 818 | ns_offset = cyc2ns(clock, cycle_delta); | ||
| 819 | |||
| 820 | return ns_offset; | ||
| 821 | } | ||
| 822 | |||
| 823 | /** | ||
| 824 | * __get_realtime_clock_ts - Returns the time of day in a timespec | ||
| 825 | * @ts: pointer to the timespec to be set | ||
| 826 | * | ||
| 827 | * Returns the time of day in a timespec. Used by | ||
| 828 | * do_gettimeofday() and get_realtime_clock_ts(). | ||
| 829 | */ | ||
| 830 | static inline void __get_realtime_clock_ts(struct timespec *ts) | ||
| 831 | { | ||
| 832 | unsigned long seq; | ||
| 833 | s64 nsecs; | ||
| 834 | |||
| 835 | do { | ||
| 836 | seq = read_seqbegin(&xtime_lock); | ||
| 837 | |||
| 838 | *ts = xtime; | ||
| 839 | nsecs = __get_nsec_offset(); | ||
| 840 | |||
| 841 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 842 | |||
| 843 | timespec_add_ns(ts, nsecs); | ||
| 844 | } | ||
| 845 | |||
| 846 | /** | ||
| 847 | * getnstimeofday - Returns the time of day in a timespec | ||
| 848 | * @ts: pointer to the timespec to be set | ||
| 849 | * | ||
| 850 | * Returns the time of day in a timespec. | ||
| 851 | */ | ||
| 852 | void getnstimeofday(struct timespec *ts) | ||
| 853 | { | ||
| 854 | __get_realtime_clock_ts(ts); | ||
| 855 | } | ||
| 856 | |||
| 857 | EXPORT_SYMBOL(getnstimeofday); | ||
| 858 | |||
| 859 | /** | ||
| 860 | * do_gettimeofday - Returns the time of day in a timeval | ||
| 861 | * @tv: pointer to the timeval to be set | ||
| 862 | * | ||
| 863 | * NOTE: Users should be converted to using get_realtime_clock_ts() | ||
| 801 | */ | 864 | */ |
| 802 | static void update_wall_time(unsigned long ticks) | 865 | void do_gettimeofday(struct timeval *tv) |
| 803 | { | 866 | { |
| 867 | struct timespec now; | ||
| 868 | |||
| 869 | __get_realtime_clock_ts(&now); | ||
| 870 | tv->tv_sec = now.tv_sec; | ||
| 871 | tv->tv_usec = now.tv_nsec/1000; | ||
| 872 | } | ||
| 873 | |||
| 874 | EXPORT_SYMBOL(do_gettimeofday); | ||
| 875 | /** | ||
| 876 | * do_settimeofday - Sets the time of day | ||
| 877 | * @tv: pointer to the timespec variable containing the new time | ||
| 878 | * | ||
| 879 | * Sets the time of day to the new time and update NTP and notify hrtimers | ||
| 880 | */ | ||
| 881 | int do_settimeofday(struct timespec *tv) | ||
| 882 | { | ||
| 883 | unsigned long flags; | ||
| 884 | time_t wtm_sec, sec = tv->tv_sec; | ||
| 885 | long wtm_nsec, nsec = tv->tv_nsec; | ||
| 886 | |||
| 887 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
| 888 | return -EINVAL; | ||
| 889 | |||
| 890 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 891 | |||
| 892 | nsec -= __get_nsec_offset(); | ||
| 893 | |||
| 894 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
| 895 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
| 896 | |||
| 897 | set_normalized_timespec(&xtime, sec, nsec); | ||
| 898 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
| 899 | |||
| 900 | clock->error = 0; | ||
| 901 | ntp_clear(); | ||
| 902 | |||
| 903 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 904 | |||
| 905 | /* signal hrtimers about time change */ | ||
| 906 | clock_was_set(); | ||
| 907 | |||
| 908 | return 0; | ||
| 909 | } | ||
| 910 | |||
| 911 | EXPORT_SYMBOL(do_settimeofday); | ||
| 912 | |||
| 913 | /** | ||
| 914 | * change_clocksource - Swaps clocksources if a new one is available | ||
| 915 | * | ||
| 916 | * Accumulates current time interval and initializes new clocksource | ||
| 917 | */ | ||
| 918 | static int change_clocksource(void) | ||
| 919 | { | ||
| 920 | struct clocksource *new; | ||
| 921 | cycle_t now; | ||
| 922 | u64 nsec; | ||
| 923 | new = clocksource_get_next(); | ||
| 924 | if (clock != new) { | ||
| 925 | now = clocksource_read(new); | ||
| 926 | nsec = __get_nsec_offset(); | ||
| 927 | timespec_add_ns(&xtime, nsec); | ||
| 928 | |||
| 929 | clock = new; | ||
| 930 | clock->cycle_last = now; | ||
| 931 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
| 932 | clock->name); | ||
| 933 | return 1; | ||
| 934 | } else if (clock->update_callback) { | ||
| 935 | return clock->update_callback(); | ||
| 936 | } | ||
| 937 | return 0; | ||
| 938 | } | ||
| 939 | #else | ||
| 940 | #define change_clocksource() (0) | ||
| 941 | #endif | ||
| 942 | |||
| 943 | /** | ||
| 944 | * timeofday_is_continuous - check to see if timekeeping is free running | ||
| 945 | */ | ||
| 946 | int timekeeping_is_continuous(void) | ||
| 947 | { | ||
| 948 | unsigned long seq; | ||
| 949 | int ret; | ||
| 950 | |||
| 804 | do { | 951 | do { |
| 805 | ticks--; | 952 | seq = read_seqbegin(&xtime_lock); |
| 806 | update_wall_time_one_tick(); | 953 | |
| 807 | if (xtime.tv_nsec >= 1000000000) { | 954 | ret = clock->is_continuous; |
| 808 | xtime.tv_nsec -= 1000000000; | 955 | |
| 956 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 957 | |||
| 958 | return ret; | ||
| 959 | } | ||
| 960 | |||
| 961 | /* | ||
| 962 | * timekeeping_init - Initializes the clocksource and common timekeeping values | ||
| 963 | */ | ||
| 964 | void __init timekeeping_init(void) | ||
| 965 | { | ||
| 966 | unsigned long flags; | ||
| 967 | |||
| 968 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 969 | clock = clocksource_get_next(); | ||
| 970 | clocksource_calculate_interval(clock, tick_nsec); | ||
| 971 | clock->cycle_last = clocksource_read(clock); | ||
| 972 | ntp_clear(); | ||
| 973 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 974 | } | ||
| 975 | |||
| 976 | |||
| 977 | static int timekeeping_suspended; | ||
| 978 | /** | ||
| 979 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | ||
| 980 | * @dev: unused | ||
| 981 | * | ||
| 982 | * This is for the generic clocksource timekeeping. | ||
| 983 | * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are | ||
| 984 | * still managed by arch specific suspend/resume code. | ||
| 985 | */ | ||
| 986 | static int timekeeping_resume(struct sys_device *dev) | ||
| 987 | { | ||
| 988 | unsigned long flags; | ||
| 989 | |||
| 990 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 991 | /* restart the last cycle value */ | ||
| 992 | clock->cycle_last = clocksource_read(clock); | ||
| 993 | clock->error = 0; | ||
| 994 | timekeeping_suspended = 0; | ||
| 995 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 996 | return 0; | ||
| 997 | } | ||
| 998 | |||
| 999 | static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | ||
| 1000 | { | ||
| 1001 | unsigned long flags; | ||
| 1002 | |||
| 1003 | write_seqlock_irqsave(&xtime_lock, flags); | ||
| 1004 | timekeeping_suspended = 1; | ||
| 1005 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
| 1006 | return 0; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | /* sysfs resume/suspend bits for timekeeping */ | ||
| 1010 | static struct sysdev_class timekeeping_sysclass = { | ||
| 1011 | .resume = timekeeping_resume, | ||
| 1012 | .suspend = timekeeping_suspend, | ||
| 1013 | set_kset_name("timekeeping"), | ||
| 1014 | }; | ||
| 1015 | |||
| 1016 | static struct sys_device device_timer = { | ||
| 1017 | .id = 0, | ||
| 1018 | .cls = &timekeeping_sysclass, | ||
| 1019 | }; | ||
| 1020 | |||
| 1021 | static int __init timekeeping_init_device(void) | ||
| 1022 | { | ||
| 1023 | int error = sysdev_class_register(&timekeeping_sysclass); | ||
| 1024 | if (!error) | ||
| 1025 | error = sysdev_register(&device_timer); | ||
| 1026 | return error; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | device_initcall(timekeeping_init_device); | ||
| 1030 | |||
| 1031 | /* | ||
| 1032 | * If the error is already larger, we look ahead even further | ||
| 1033 | * to compensate for late or lost adjustments. | ||
| 1034 | */ | ||
| 1035 | static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset) | ||
| 1036 | { | ||
| 1037 | s64 tick_error, i; | ||
| 1038 | u32 look_ahead, adj; | ||
| 1039 | s32 error2, mult; | ||
| 1040 | |||
| 1041 | /* | ||
| 1042 | * Use the current error value to determine how much to look ahead. | ||
| 1043 | * The larger the error the slower we adjust for it to avoid problems | ||
| 1044 | * with losing too many ticks, otherwise we would overadjust and | ||
| 1045 | * produce an even larger error. The smaller the adjustment the | ||
| 1046 | * faster we try to adjust for it, as lost ticks can do less harm | ||
| 1047 | * here. This is tuned so that an error of about 1 msec is adusted | ||
| 1048 | * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). | ||
| 1049 | */ | ||
| 1050 | error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); | ||
| 1051 | error2 = abs(error2); | ||
| 1052 | for (look_ahead = 0; error2 > 0; look_ahead++) | ||
| 1053 | error2 >>= 2; | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * Now calculate the error in (1 << look_ahead) ticks, but first | ||
| 1057 | * remove the single look ahead already included in the error. | ||
| 1058 | */ | ||
| 1059 | tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1); | ||
| 1060 | tick_error -= clock->xtime_interval >> 1; | ||
| 1061 | error = ((error - tick_error) >> look_ahead) + tick_error; | ||
| 1062 | |||
| 1063 | /* Finally calculate the adjustment shift value. */ | ||
| 1064 | i = *interval; | ||
| 1065 | mult = 1; | ||
| 1066 | if (error < 0) { | ||
| 1067 | error = -error; | ||
| 1068 | *interval = -*interval; | ||
| 1069 | *offset = -*offset; | ||
| 1070 | mult = -1; | ||
| 1071 | } | ||
| 1072 | for (adj = 0; error > i; adj++) | ||
| 1073 | error >>= 1; | ||
| 1074 | |||
| 1075 | *interval <<= adj; | ||
| 1076 | *offset <<= adj; | ||
| 1077 | return mult << adj; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | /* | ||
| 1081 | * Adjust the multiplier to reduce the error value, | ||
| 1082 | * this is optimized for the most common adjustments of -1,0,1, | ||
| 1083 | * for other values we can do a bit more work. | ||
| 1084 | */ | ||
| 1085 | static void clocksource_adjust(struct clocksource *clock, s64 offset) | ||
| 1086 | { | ||
| 1087 | s64 error, interval = clock->cycle_interval; | ||
| 1088 | int adj; | ||
| 1089 | |||
| 1090 | error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1); | ||
| 1091 | if (error > interval) { | ||
| 1092 | error >>= 2; | ||
| 1093 | if (likely(error <= interval)) | ||
| 1094 | adj = 1; | ||
| 1095 | else | ||
| 1096 | adj = clocksource_bigadjust(error, &interval, &offset); | ||
| 1097 | } else if (error < -interval) { | ||
| 1098 | error >>= 2; | ||
| 1099 | if (likely(error >= -interval)) { | ||
| 1100 | adj = -1; | ||
| 1101 | interval = -interval; | ||
| 1102 | offset = -offset; | ||
| 1103 | } else | ||
| 1104 | adj = clocksource_bigadjust(error, &interval, &offset); | ||
| 1105 | } else | ||
| 1106 | return; | ||
| 1107 | |||
| 1108 | clock->mult += adj; | ||
| 1109 | clock->xtime_interval += interval; | ||
| 1110 | clock->xtime_nsec -= offset; | ||
| 1111 | clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift); | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | /** | ||
| 1115 | * update_wall_time - Uses the current clocksource to increment the wall time | ||
| 1116 | * | ||
| 1117 | * Called from the timer interrupt, must hold a write on xtime_lock. | ||
| 1118 | */ | ||
| 1119 | static void update_wall_time(void) | ||
| 1120 | { | ||
| 1121 | cycle_t offset; | ||
| 1122 | |||
| 1123 | /* Make sure we're fully resumed: */ | ||
| 1124 | if (unlikely(timekeeping_suspended)) | ||
| 1125 | return; | ||
| 1126 | |||
| 1127 | #ifdef CONFIG_GENERIC_TIME | ||
| 1128 | offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; | ||
| 1129 | #else | ||
| 1130 | offset = clock->cycle_interval; | ||
| 1131 | #endif | ||
| 1132 | clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; | ||
| 1133 | |||
| 1134 | /* normally this loop will run just once, however in the | ||
| 1135 | * case of lost or late ticks, it will accumulate correctly. | ||
| 1136 | */ | ||
| 1137 | while (offset >= clock->cycle_interval) { | ||
| 1138 | /* accumulate one interval */ | ||
| 1139 | clock->xtime_nsec += clock->xtime_interval; | ||
| 1140 | clock->cycle_last += clock->cycle_interval; | ||
| 1141 | offset -= clock->cycle_interval; | ||
| 1142 | |||
| 1143 | if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { | ||
| 1144 | clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; | ||
| 809 | xtime.tv_sec++; | 1145 | xtime.tv_sec++; |
| 810 | second_overflow(); | 1146 | second_overflow(); |
| 811 | } | 1147 | } |
| 812 | } while (ticks); | 1148 | |
| 1149 | /* interpolator bits */ | ||
| 1150 | time_interpolator_update(clock->xtime_interval | ||
| 1151 | >> clock->shift); | ||
| 1152 | /* increment the NTP state machine */ | ||
| 1153 | update_ntp_one_tick(); | ||
| 1154 | |||
| 1155 | /* accumulate error between NTP and clock interval */ | ||
| 1156 | clock->error += current_tick_length(); | ||
| 1157 | clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | /* correct the clock when NTP error is too big */ | ||
| 1161 | clocksource_adjust(clock, offset); | ||
| 1162 | |||
| 1163 | /* store full nanoseconds into xtime */ | ||
| 1164 | xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; | ||
| 1165 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | ||
| 1166 | |||
| 1167 | /* check to see if there is a new clocksource to use */ | ||
| 1168 | if (change_clocksource()) { | ||
| 1169 | clock->error = 0; | ||
| 1170 | clock->xtime_nsec = 0; | ||
| 1171 | clocksource_calculate_interval(clock, tick_nsec); | ||
| 1172 | } | ||
| 813 | } | 1173 | } |
| 814 | 1174 | ||
| 815 | /* | 1175 | /* |
| @@ -862,10 +1222,8 @@ static inline void calc_load(unsigned long ticks) | |||
| 862 | unsigned long active_tasks; /* fixed-point */ | 1222 | unsigned long active_tasks; /* fixed-point */ |
| 863 | static int count = LOAD_FREQ; | 1223 | static int count = LOAD_FREQ; |
| 864 | 1224 | ||
| 865 | count -= ticks; | 1225 | active_tasks = count_active_tasks(); |
| 866 | if (count < 0) { | 1226 | for (count -= ticks; count < 0; count += LOAD_FREQ) { |
| 867 | count += LOAD_FREQ; | ||
| 868 | active_tasks = count_active_tasks(); | ||
| 869 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); | 1227 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); |
| 870 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); | 1228 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); |
| 871 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); | 1229 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); |
| @@ -880,7 +1238,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES; | |||
| 880 | * playing with xtime and avenrun. | 1238 | * playing with xtime and avenrun. |
| 881 | */ | 1239 | */ |
| 882 | #ifndef ARCH_HAVE_XTIME_LOCK | 1240 | #ifndef ARCH_HAVE_XTIME_LOCK |
| 883 | seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; | 1241 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); |
| 884 | 1242 | ||
| 885 | EXPORT_SYMBOL(xtime_lock); | 1243 | EXPORT_SYMBOL(xtime_lock); |
| 886 | #endif | 1244 | #endif |
| @@ -910,15 +1268,10 @@ void run_local_timers(void) | |||
| 910 | * Called by the timer interrupt. xtime_lock must already be taken | 1268 | * Called by the timer interrupt. xtime_lock must already be taken |
| 911 | * by the timer IRQ! | 1269 | * by the timer IRQ! |
| 912 | */ | 1270 | */ |
| 913 | static inline void update_times(void) | 1271 | static inline void update_times(unsigned long ticks) |
| 914 | { | 1272 | { |
| 915 | unsigned long ticks; | 1273 | wall_jiffies += ticks; |
| 916 | 1274 | update_wall_time(); | |
| 917 | ticks = jiffies - wall_jiffies; | ||
| 918 | if (ticks) { | ||
| 919 | wall_jiffies += ticks; | ||
| 920 | update_wall_time(ticks); | ||
| 921 | } | ||
| 922 | calc_load(ticks); | 1275 | calc_load(ticks); |
| 923 | } | 1276 | } |
| 924 | 1277 | ||
| @@ -928,12 +1281,10 @@ static inline void update_times(void) | |||
| 928 | * jiffies is defined in the linker script... | 1281 | * jiffies is defined in the linker script... |
| 929 | */ | 1282 | */ |
| 930 | 1283 | ||
| 931 | void do_timer(struct pt_regs *regs) | 1284 | void do_timer(unsigned long ticks) |
| 932 | { | 1285 | { |
| 933 | jiffies_64++; | 1286 | jiffies_64 += ticks; |
| 934 | /* prevent loading jiffies before storing new jiffies_64 value. */ | 1287 | update_times(ticks); |
| 935 | barrier(); | ||
| 936 | update_times(); | ||
| 937 | } | 1288 | } |
| 938 | 1289 | ||
| 939 | #ifdef __ARCH_WANT_SYS_ALARM | 1290 | #ifdef __ARCH_WANT_SYS_ALARM |
| @@ -971,46 +1322,19 @@ asmlinkage long sys_getpid(void) | |||
| 971 | } | 1322 | } |
| 972 | 1323 | ||
| 973 | /* | 1324 | /* |
| 974 | * Accessing ->group_leader->real_parent is not SMP-safe, it could | 1325 | * Accessing ->real_parent is not SMP-safe, it could |
| 975 | * change from under us. However, rather than getting any lock | 1326 | * change from under us. However, we can use a stale |
| 976 | * we can use an optimistic algorithm: get the parent | 1327 | * value of ->real_parent under rcu_read_lock(), see |
| 977 | * pid, and go back and check that the parent is still | 1328 | * release_task()->call_rcu(delayed_put_task_struct). |
| 978 | * the same. If it has changed (which is extremely unlikely | ||
| 979 | * indeed), we just try again.. | ||
| 980 | * | ||
| 981 | * NOTE! This depends on the fact that even if we _do_ | ||
| 982 | * get an old value of "parent", we can happily dereference | ||
| 983 | * the pointer (it was and remains a dereferencable kernel pointer | ||
| 984 | * no matter what): we just can't necessarily trust the result | ||
| 985 | * until we know that the parent pointer is valid. | ||
| 986 | * | ||
| 987 | * NOTE2: ->group_leader never changes from under us. | ||
| 988 | */ | 1329 | */ |
| 989 | asmlinkage long sys_getppid(void) | 1330 | asmlinkage long sys_getppid(void) |
| 990 | { | 1331 | { |
| 991 | int pid; | 1332 | int pid; |
| 992 | struct task_struct *me = current; | ||
| 993 | struct task_struct *parent; | ||
| 994 | 1333 | ||
| 995 | parent = me->group_leader->real_parent; | 1334 | rcu_read_lock(); |
| 996 | for (;;) { | 1335 | pid = rcu_dereference(current->real_parent)->tgid; |
| 997 | pid = parent->tgid; | 1336 | rcu_read_unlock(); |
| 998 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
| 999 | { | ||
| 1000 | struct task_struct *old = parent; | ||
| 1001 | 1337 | ||
| 1002 | /* | ||
| 1003 | * Make sure we read the pid before re-reading the | ||
| 1004 | * parent pointer: | ||
| 1005 | */ | ||
| 1006 | smp_rmb(); | ||
| 1007 | parent = me->group_leader->real_parent; | ||
| 1008 | if (old != parent) | ||
| 1009 | continue; | ||
| 1010 | } | ||
| 1011 | #endif | ||
| 1012 | break; | ||
| 1013 | } | ||
| 1014 | return pid; | 1338 | return pid; |
| 1015 | } | 1339 | } |
| 1016 | 1340 | ||
| @@ -1042,7 +1366,7 @@ asmlinkage long sys_getegid(void) | |||
| 1042 | 1366 | ||
| 1043 | static void process_timeout(unsigned long __data) | 1367 | static void process_timeout(unsigned long __data) |
| 1044 | { | 1368 | { |
| 1045 | wake_up_process((task_t *)__data); | 1369 | wake_up_process((struct task_struct *)__data); |
| 1046 | } | 1370 | } |
| 1047 | 1371 | ||
| 1048 | /** | 1372 | /** |
| @@ -1144,8 +1468,9 @@ asmlinkage long sys_gettid(void) | |||
| 1144 | return current->pid; | 1468 | return current->pid; |
| 1145 | } | 1469 | } |
| 1146 | 1470 | ||
| 1147 | /* | 1471 | /** |
| 1148 | * sys_sysinfo - fill in sysinfo struct | 1472 | * sys_sysinfo - fill in sysinfo struct |
| 1473 | * @info: pointer to buffer to fill | ||
| 1149 | */ | 1474 | */ |
| 1150 | asmlinkage long sys_sysinfo(struct sysinfo __user *info) | 1475 | asmlinkage long sys_sysinfo(struct sysinfo __user *info) |
| 1151 | { | 1476 | { |
| @@ -1233,6 +1558,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) | |||
| 1233 | return 0; | 1558 | return 0; |
| 1234 | } | 1559 | } |
| 1235 | 1560 | ||
| 1561 | /* | ||
| 1562 | * lockdep: we want to track each per-CPU base as a separate lock-class, | ||
| 1563 | * but timer-bases are kmalloc()-ed, so we need to attach separate | ||
| 1564 | * keys to them: | ||
| 1565 | */ | ||
| 1566 | static struct lock_class_key base_lock_keys[NR_CPUS]; | ||
| 1567 | |||
| 1236 | static int __devinit init_timers_cpu(int cpu) | 1568 | static int __devinit init_timers_cpu(int cpu) |
| 1237 | { | 1569 | { |
| 1238 | int j; | 1570 | int j; |
| @@ -1268,6 +1600,8 @@ static int __devinit init_timers_cpu(int cpu) | |||
| 1268 | } | 1600 | } |
| 1269 | 1601 | ||
| 1270 | spin_lock_init(&base->lock); | 1602 | spin_lock_init(&base->lock); |
| 1603 | lockdep_set_class(&base->lock, base_lock_keys + cpu); | ||
| 1604 | |||
| 1271 | for (j = 0; j < TVN_SIZE; j++) { | 1605 | for (j = 0; j < TVN_SIZE; j++) { |
| 1272 | INIT_LIST_HEAD(base->tv5.vec + j); | 1606 | INIT_LIST_HEAD(base->tv5.vec + j); |
| 1273 | INIT_LIST_HEAD(base->tv4.vec + j); | 1607 | INIT_LIST_HEAD(base->tv4.vec + j); |
| @@ -1326,7 +1660,7 @@ static void __devinit migrate_timers(int cpu) | |||
| 1326 | } | 1660 | } |
| 1327 | #endif /* CONFIG_HOTPLUG_CPU */ | 1661 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 1328 | 1662 | ||
| 1329 | static int timer_cpu_notify(struct notifier_block *self, | 1663 | static int __cpuinit timer_cpu_notify(struct notifier_block *self, |
| 1330 | unsigned long action, void *hcpu) | 1664 | unsigned long action, void *hcpu) |
| 1331 | { | 1665 | { |
| 1332 | long cpu = (long)hcpu; | 1666 | long cpu = (long)hcpu; |
| @@ -1346,15 +1680,17 @@ static int timer_cpu_notify(struct notifier_block *self, | |||
| 1346 | return NOTIFY_OK; | 1680 | return NOTIFY_OK; |
| 1347 | } | 1681 | } |
| 1348 | 1682 | ||
| 1349 | static struct notifier_block timers_nb = { | 1683 | static struct notifier_block __cpuinitdata timers_nb = { |
| 1350 | .notifier_call = timer_cpu_notify, | 1684 | .notifier_call = timer_cpu_notify, |
| 1351 | }; | 1685 | }; |
| 1352 | 1686 | ||
| 1353 | 1687 | ||
| 1354 | void __init init_timers(void) | 1688 | void __init init_timers(void) |
| 1355 | { | 1689 | { |
| 1356 | timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, | 1690 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, |
| 1357 | (void *)(long)smp_processor_id()); | 1691 | (void *)(long)smp_processor_id()); |
| 1692 | |||
| 1693 | BUG_ON(err == NOTIFY_BAD); | ||
| 1358 | register_cpu_notifier(&timers_nb); | 1694 | register_cpu_notifier(&timers_nb); |
| 1359 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); | 1695 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); |
| 1360 | } | 1696 | } |
