diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-19 20:11:10 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-19 20:11:10 -0400 |
| commit | 164d44fd92e79d5bce54d0d62df9f856f7b23925 (patch) | |
| tree | 9f21607849b7e684b255578ffdf41951bc31787e | |
| parent | 5bfec46baa3a752393433b8d89d3b2c70820f61d (diff) | |
| parent | d7e81c269db899b800e0963dc4aceece1f82a680 (diff) | |
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
clocksource: Add clocksource_register_hz/khz interface
posix-cpu-timers: Optimize run_posix_cpu_timers()
time: Remove xtime_cache
mqueue: Convert message queue timeout to use hrtimers
hrtimers: Provide schedule_hrtimeout for CLOCK_REALTIME
timers: Introduce the concept of timer slack for legacy timers
ntp: Remove tickadj
ntp: Make time_adjust static
time: Add xtime, wall_to_monotonic to feature-removal-schedule
timer: Try to survive timer callback preempt_count leak
timer: Split out timer function call
timer: Print function name for timer callbacks modifying preemption count
time: Clean up warp_clock()
cpu-timers: Avoid iterating over all threads in fastpath_timer_check()
cpu-timers: Change SIGEV_NONE timer implementation
cpu-timers: Return correct previous timer reload value
cpu-timers: Cleanup arm_timer()
cpu-timers: Simplify RLIMIT_CPU handling
| -rw-r--r-- | Documentation/feature-removal-schedule.txt | 10 | ||||
| -rw-r--r-- | include/linux/clocksource.h | 19 | ||||
| -rw-r--r-- | include/linux/hrtimer.h | 2 | ||||
| -rw-r--r-- | include/linux/time.h | 1 | ||||
| -rw-r--r-- | include/linux/timer.h | 10 | ||||
| -rw-r--r-- | include/linux/timex.h | 5 | ||||
| -rw-r--r-- | ipc/mqueue.c | 74 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 67 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 298 | ||||
| -rw-r--r-- | kernel/time.c | 11 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 48 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 2 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 35 | ||||
| -rw-r--r-- | kernel/timer.c | 137 |
14 files changed, 376 insertions, 343 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d9d3fbcb705d..e7965f4a385a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -541,6 +541,16 @@ Who: Avi Kivity <avi@redhat.com> | |||
| 541 | 541 | ||
| 542 | ---------------------------- | 542 | ---------------------------- |
| 543 | 543 | ||
| 544 | What: xtime, wall_to_monotonic | ||
| 545 | When: 2.6.36+ | ||
| 546 | Files: kernel/time/timekeeping.c include/linux/time.h | ||
| 547 | Why: Cleaning up timekeeping internal values. Please use | ||
| 548 | existing timekeeping accessor functions to access | ||
| 549 | the equivalent functionality. | ||
| 550 | Who: John Stultz <johnstul@us.ibm.com> | ||
| 551 | |||
| 552 | ---------------------------- | ||
| 553 | |||
| 544 | What: KVM kernel-allocated memory slots | 554 | What: KVM kernel-allocated memory slots |
| 545 | When: July 2010 | 555 | When: July 2010 |
| 546 | Why: Since 2.6.25, kvm supports user-allocated memory slots, which are | 556 | Why: Since 2.6.25, kvm supports user-allocated memory slots, which are |
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 4bca8b60cdf7..5ea3c60c160c 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
| @@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) | |||
| 273 | } | 273 | } |
| 274 | 274 | ||
| 275 | 275 | ||
| 276 | /* used to install a new clocksource */ | ||
| 277 | extern int clocksource_register(struct clocksource*); | 276 | extern int clocksource_register(struct clocksource*); |
| 278 | extern void clocksource_unregister(struct clocksource*); | 277 | extern void clocksource_unregister(struct clocksource*); |
| 279 | extern void clocksource_touch_watchdog(void); | 278 | extern void clocksource_touch_watchdog(void); |
| @@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs); | |||
| 287 | extern void | 286 | extern void |
| 288 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); | 287 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); |
| 289 | 288 | ||
| 289 | /* | ||
| 290 | * Don't call __clocksource_register_scale directly, use | ||
| 291 | * clocksource_register_hz/khz | ||
| 292 | */ | ||
| 293 | extern int | ||
| 294 | __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); | ||
| 295 | |||
| 296 | static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) | ||
| 297 | { | ||
| 298 | return __clocksource_register_scale(cs, 1, hz); | ||
| 299 | } | ||
| 300 | |||
| 301 | static inline int clocksource_register_khz(struct clocksource *cs, u32 khz) | ||
| 302 | { | ||
| 303 | return __clocksource_register_scale(cs, 1000, khz); | ||
| 304 | } | ||
| 305 | |||
| 306 | |||
| 290 | static inline void | 307 | static inline void |
| 291 | clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) | 308 | clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) |
| 292 | { | 309 | { |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5d86fb2309d2..fd0c1b857d3d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
| @@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | |||
| 422 | 422 | ||
| 423 | extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | 423 | extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, |
| 424 | const enum hrtimer_mode mode); | 424 | const enum hrtimer_mode mode); |
| 425 | extern int schedule_hrtimeout_range_clock(ktime_t *expires, | ||
| 426 | unsigned long delta, const enum hrtimer_mode mode, int clock); | ||
| 425 | extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); | 427 | extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); |
| 426 | 428 | ||
| 427 | /* Soft interrupt function to run the hrtimer queues: */ | 429 | /* Soft interrupt function to run the hrtimer queues: */ |
diff --git a/include/linux/time.h b/include/linux/time.h index 6e026e45a179..ea3559f0b3f2 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
| @@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); | |||
| 150 | extern int timekeeping_valid_for_hres(void); | 150 | extern int timekeeping_valid_for_hres(void); |
| 151 | extern u64 timekeeping_max_deferment(void); | 151 | extern u64 timekeeping_max_deferment(void); |
| 152 | extern void update_wall_time(void); | 152 | extern void update_wall_time(void); |
| 153 | extern void update_xtime_cache(u64 nsec); | ||
| 154 | extern void timekeeping_leap_insert(int leapsecond); | 153 | extern void timekeeping_leap_insert(int leapsecond); |
| 155 | 154 | ||
| 156 | struct tms; | 155 | struct tms; |
diff --git a/include/linux/timer.h b/include/linux/timer.h index a2d1eb6cb3f0..ea965b857a50 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h | |||
| @@ -10,13 +10,19 @@ | |||
| 10 | struct tvec_base; | 10 | struct tvec_base; |
| 11 | 11 | ||
| 12 | struct timer_list { | 12 | struct timer_list { |
| 13 | /* | ||
| 14 | * All fields that change during normal runtime grouped to the | ||
| 15 | * same cacheline | ||
| 16 | */ | ||
| 13 | struct list_head entry; | 17 | struct list_head entry; |
| 14 | unsigned long expires; | 18 | unsigned long expires; |
| 19 | struct tvec_base *base; | ||
| 15 | 20 | ||
| 16 | void (*function)(unsigned long); | 21 | void (*function)(unsigned long); |
| 17 | unsigned long data; | 22 | unsigned long data; |
| 18 | 23 | ||
| 19 | struct tvec_base *base; | 24 | int slack; |
| 25 | |||
| 20 | #ifdef CONFIG_TIMER_STATS | 26 | #ifdef CONFIG_TIMER_STATS |
| 21 | void *start_site; | 27 | void *start_site; |
| 22 | char start_comm[16]; | 28 | char start_comm[16]; |
| @@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires); | |||
| 165 | extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); | 171 | extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); |
| 166 | extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); | 172 | extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); |
| 167 | 173 | ||
| 174 | extern void set_timer_slack(struct timer_list *time, int slack_hz); | ||
| 175 | |||
| 168 | #define TIMER_NOT_PINNED 0 | 176 | #define TIMER_NOT_PINNED 0 |
| 169 | #define TIMER_PINNED 1 | 177 | #define TIMER_PINNED 1 |
| 170 | /* | 178 | /* |
diff --git a/include/linux/timex.h b/include/linux/timex.h index 7a082b32d8e1..32d852f8cbe4 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h | |||
| @@ -232,13 +232,11 @@ struct timex { | |||
| 232 | */ | 232 | */ |
| 233 | extern unsigned long tick_usec; /* USER_HZ period (usec) */ | 233 | extern unsigned long tick_usec; /* USER_HZ period (usec) */ |
| 234 | extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ | 234 | extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ |
| 235 | extern int tickadj; /* amount of adjustment per tick */ | ||
| 236 | 235 | ||
| 237 | /* | 236 | /* |
| 238 | * phase-lock loop variables | 237 | * phase-lock loop variables |
| 239 | */ | 238 | */ |
| 240 | extern int time_status; /* clock synchronization status bits */ | 239 | extern int time_status; /* clock synchronization status bits */ |
| 241 | extern long time_adjust; /* The amount of adjtime left */ | ||
| 242 | 240 | ||
| 243 | extern void ntp_init(void); | 241 | extern void ntp_init(void); |
| 244 | extern void ntp_clear(void); | 242 | extern void ntp_clear(void); |
| @@ -271,9 +269,6 @@ extern void second_overflow(void); | |||
| 271 | extern void update_ntp_one_tick(void); | 269 | extern void update_ntp_one_tick(void); |
| 272 | extern int do_adjtimex(struct timex *); | 270 | extern int do_adjtimex(struct timex *); |
| 273 | 271 | ||
| 274 | /* Don't use! Compatibility define for existing users. */ | ||
| 275 | #define tickadj (500/HZ ? : 1) | ||
| 276 | |||
| 277 | int read_current_timer(unsigned long *timer_val); | 272 | int read_current_timer(unsigned long *timer_val); |
| 278 | 273 | ||
| 279 | /* The clock frequency of the i8253/i8254 PIT */ | 274 | /* The clock frequency of the i8253/i8254 PIT */ |
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 59a009dc54a8..5108232f93d4 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
| @@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr, | |||
| 429 | * sr: SEND or RECV | 429 | * sr: SEND or RECV |
| 430 | */ | 430 | */ |
| 431 | static int wq_sleep(struct mqueue_inode_info *info, int sr, | 431 | static int wq_sleep(struct mqueue_inode_info *info, int sr, |
| 432 | long timeout, struct ext_wait_queue *ewp) | 432 | ktime_t *timeout, struct ext_wait_queue *ewp) |
| 433 | { | 433 | { |
| 434 | int retval; | 434 | int retval; |
| 435 | signed long time; | 435 | signed long time; |
| @@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, | |||
| 440 | set_current_state(TASK_INTERRUPTIBLE); | 440 | set_current_state(TASK_INTERRUPTIBLE); |
| 441 | 441 | ||
| 442 | spin_unlock(&info->lock); | 442 | spin_unlock(&info->lock); |
| 443 | time = schedule_timeout(timeout); | 443 | time = schedule_hrtimeout_range_clock(timeout, |
| 444 | HRTIMER_MODE_ABS, 0, CLOCK_REALTIME); | ||
| 444 | 445 | ||
| 445 | while (ewp->state == STATE_PENDING) | 446 | while (ewp->state == STATE_PENDING) |
| 446 | cpu_relax(); | 447 | cpu_relax(); |
| @@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info) | |||
| 552 | wake_up(&info->wait_q); | 553 | wake_up(&info->wait_q); |
| 553 | } | 554 | } |
| 554 | 555 | ||
| 555 | static long prepare_timeout(struct timespec *p) | 556 | static int prepare_timeout(const struct timespec __user *u_abs_timeout, |
| 557 | ktime_t *expires, struct timespec *ts) | ||
| 556 | { | 558 | { |
| 557 | struct timespec nowts; | 559 | if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec))) |
| 558 | long timeout; | 560 | return -EFAULT; |
| 559 | 561 | if (!timespec_valid(ts)) | |
| 560 | if (p) { | 562 | return -EINVAL; |
| 561 | if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0 | ||
| 562 | || p->tv_nsec >= NSEC_PER_SEC)) | ||
| 563 | return -EINVAL; | ||
| 564 | nowts = CURRENT_TIME; | ||
| 565 | /* first subtract as jiffies can't be too big */ | ||
| 566 | p->tv_sec -= nowts.tv_sec; | ||
| 567 | if (p->tv_nsec < nowts.tv_nsec) { | ||
| 568 | p->tv_nsec += NSEC_PER_SEC; | ||
| 569 | p->tv_sec--; | ||
| 570 | } | ||
| 571 | p->tv_nsec -= nowts.tv_nsec; | ||
| 572 | if (p->tv_sec < 0) | ||
| 573 | return 0; | ||
| 574 | |||
| 575 | timeout = timespec_to_jiffies(p) + 1; | ||
| 576 | } else | ||
| 577 | return MAX_SCHEDULE_TIMEOUT; | ||
| 578 | 563 | ||
| 579 | return timeout; | 564 | *expires = timespec_to_ktime(*ts); |
| 565 | return 0; | ||
| 580 | } | 566 | } |
| 581 | 567 | ||
| 582 | static void remove_notification(struct mqueue_inode_info *info) | 568 | static void remove_notification(struct mqueue_inode_info *info) |
| @@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
| 862 | struct ext_wait_queue *receiver; | 848 | struct ext_wait_queue *receiver; |
| 863 | struct msg_msg *msg_ptr; | 849 | struct msg_msg *msg_ptr; |
| 864 | struct mqueue_inode_info *info; | 850 | struct mqueue_inode_info *info; |
| 865 | struct timespec ts, *p = NULL; | 851 | ktime_t expires, *timeout = NULL; |
| 866 | long timeout; | 852 | struct timespec ts; |
| 867 | int ret; | 853 | int ret; |
| 868 | 854 | ||
| 869 | if (u_abs_timeout) { | 855 | if (u_abs_timeout) { |
| 870 | if (copy_from_user(&ts, u_abs_timeout, | 856 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
| 871 | sizeof(struct timespec))) | 857 | if (res) |
| 872 | return -EFAULT; | 858 | return res; |
| 873 | p = &ts; | 859 | timeout = &expires; |
| 874 | } | 860 | } |
| 875 | 861 | ||
| 876 | if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) | 862 | if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) |
| 877 | return -EINVAL; | 863 | return -EINVAL; |
| 878 | 864 | ||
| 879 | audit_mq_sendrecv(mqdes, msg_len, msg_prio, p); | 865 | audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); |
| 880 | timeout = prepare_timeout(p); | ||
| 881 | 866 | ||
| 882 | filp = fget(mqdes); | 867 | filp = fget(mqdes); |
| 883 | if (unlikely(!filp)) { | 868 | if (unlikely(!filp)) { |
| @@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
| 919 | if (filp->f_flags & O_NONBLOCK) { | 904 | if (filp->f_flags & O_NONBLOCK) { |
| 920 | spin_unlock(&info->lock); | 905 | spin_unlock(&info->lock); |
| 921 | ret = -EAGAIN; | 906 | ret = -EAGAIN; |
| 922 | } else if (unlikely(timeout < 0)) { | ||
| 923 | spin_unlock(&info->lock); | ||
| 924 | ret = timeout; | ||
| 925 | } else { | 907 | } else { |
| 926 | wait.task = current; | 908 | wait.task = current; |
| 927 | wait.msg = (void *) msg_ptr; | 909 | wait.msg = (void *) msg_ptr; |
| @@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
| 954 | size_t, msg_len, unsigned int __user *, u_msg_prio, | 936 | size_t, msg_len, unsigned int __user *, u_msg_prio, |
| 955 | const struct timespec __user *, u_abs_timeout) | 937 | const struct timespec __user *, u_abs_timeout) |
| 956 | { | 938 | { |
| 957 | long timeout; | ||
| 958 | ssize_t ret; | 939 | ssize_t ret; |
| 959 | struct msg_msg *msg_ptr; | 940 | struct msg_msg *msg_ptr; |
| 960 | struct file *filp; | 941 | struct file *filp; |
| 961 | struct inode *inode; | 942 | struct inode *inode; |
| 962 | struct mqueue_inode_info *info; | 943 | struct mqueue_inode_info *info; |
| 963 | struct ext_wait_queue wait; | 944 | struct ext_wait_queue wait; |
| 964 | struct timespec ts, *p = NULL; | 945 | ktime_t expires, *timeout = NULL; |
| 946 | struct timespec ts; | ||
| 965 | 947 | ||
| 966 | if (u_abs_timeout) { | 948 | if (u_abs_timeout) { |
| 967 | if (copy_from_user(&ts, u_abs_timeout, | 949 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
| 968 | sizeof(struct timespec))) | 950 | if (res) |
| 969 | return -EFAULT; | 951 | return res; |
| 970 | p = &ts; | 952 | timeout = &expires; |
| 971 | } | 953 | } |
| 972 | 954 | ||
| 973 | audit_mq_sendrecv(mqdes, msg_len, 0, p); | 955 | audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); |
| 974 | timeout = prepare_timeout(p); | ||
| 975 | 956 | ||
| 976 | filp = fget(mqdes); | 957 | filp = fget(mqdes); |
| 977 | if (unlikely(!filp)) { | 958 | if (unlikely(!filp)) { |
| @@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
| 1003 | if (filp->f_flags & O_NONBLOCK) { | 984 | if (filp->f_flags & O_NONBLOCK) { |
| 1004 | spin_unlock(&info->lock); | 985 | spin_unlock(&info->lock); |
| 1005 | ret = -EAGAIN; | 986 | ret = -EAGAIN; |
| 1006 | msg_ptr = NULL; | ||
| 1007 | } else if (unlikely(timeout < 0)) { | ||
| 1008 | spin_unlock(&info->lock); | ||
| 1009 | ret = timeout; | ||
| 1010 | msg_ptr = NULL; | ||
| 1011 | } else { | 987 | } else { |
| 1012 | wait.task = current; | 988 | wait.task = current; |
| 1013 | wait.state = STATE_NONE; | 989 | wait.state = STATE_NONE; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e97..b9b134b35088 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -1749,35 +1749,15 @@ void __init hrtimers_init(void) | |||
| 1749 | } | 1749 | } |
| 1750 | 1750 | ||
| 1751 | /** | 1751 | /** |
| 1752 | * schedule_hrtimeout_range - sleep until timeout | 1752 | * schedule_hrtimeout_range_clock - sleep until timeout |
| 1753 | * @expires: timeout value (ktime_t) | 1753 | * @expires: timeout value (ktime_t) |
| 1754 | * @delta: slack in expires timeout (ktime_t) | 1754 | * @delta: slack in expires timeout (ktime_t) |
| 1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | 1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL |
| 1756 | * | 1756 | * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME |
| 1757 | * Make the current task sleep until the given expiry time has | ||
| 1758 | * elapsed. The routine will return immediately unless | ||
| 1759 | * the current task state has been set (see set_current_state()). | ||
| 1760 | * | ||
| 1761 | * The @delta argument gives the kernel the freedom to schedule the | ||
| 1762 | * actual wakeup to a time that is both power and performance friendly. | ||
| 1763 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
| 1764 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
| 1765 | * | ||
| 1766 | * You can set the task state as follows - | ||
| 1767 | * | ||
| 1768 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
| 1769 | * pass before the routine returns. | ||
| 1770 | * | ||
| 1771 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
| 1772 | * delivered to the current task. | ||
| 1773 | * | ||
| 1774 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
| 1775 | * routine returns. | ||
| 1776 | * | ||
| 1777 | * Returns 0 when the timer has expired otherwise -EINTR | ||
| 1778 | */ | 1757 | */ |
| 1779 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | 1758 | int __sched |
| 1780 | const enum hrtimer_mode mode) | 1759 | schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, |
| 1760 | const enum hrtimer_mode mode, int clock) | ||
| 1781 | { | 1761 | { |
| 1782 | struct hrtimer_sleeper t; | 1762 | struct hrtimer_sleeper t; |
| 1783 | 1763 | ||
| @@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
| 1799 | return -EINTR; | 1779 | return -EINTR; |
| 1800 | } | 1780 | } |
| 1801 | 1781 | ||
| 1802 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); | 1782 | hrtimer_init_on_stack(&t.timer, clock, mode); |
| 1803 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | 1783 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
| 1804 | 1784 | ||
| 1805 | hrtimer_init_sleeper(&t, current); | 1785 | hrtimer_init_sleeper(&t, current); |
| @@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
| 1818 | 1798 | ||
| 1819 | return !t.task ? 0 : -EINTR; | 1799 | return !t.task ? 0 : -EINTR; |
| 1820 | } | 1800 | } |
| 1801 | |||
| 1802 | /** | ||
| 1803 | * schedule_hrtimeout_range - sleep until timeout | ||
| 1804 | * @expires: timeout value (ktime_t) | ||
| 1805 | * @delta: slack in expires timeout (ktime_t) | ||
| 1806 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
| 1807 | * | ||
| 1808 | * Make the current task sleep until the given expiry time has | ||
| 1809 | * elapsed. The routine will return immediately unless | ||
| 1810 | * the current task state has been set (see set_current_state()). | ||
| 1811 | * | ||
| 1812 | * The @delta argument gives the kernel the freedom to schedule the | ||
| 1813 | * actual wakeup to a time that is both power and performance friendly. | ||
| 1814 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
| 1815 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
| 1816 | * | ||
| 1817 | * You can set the task state as follows - | ||
| 1818 | * | ||
| 1819 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
| 1820 | * pass before the routine returns. | ||
| 1821 | * | ||
| 1822 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
| 1823 | * delivered to the current task. | ||
| 1824 | * | ||
| 1825 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
| 1826 | * routine returns. | ||
| 1827 | * | ||
| 1828 | * Returns 0 when the timer has expired otherwise -EINTR | ||
| 1829 | */ | ||
| 1830 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | ||
| 1831 | const enum hrtimer_mode mode) | ||
| 1832 | { | ||
| 1833 | return schedule_hrtimeout_range_clock(expires, delta, mode, | ||
| 1834 | CLOCK_MONOTONIC); | ||
| 1835 | } | ||
| 1821 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); | 1836 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); |
| 1822 | 1837 | ||
| 1823 | /** | 1838 | /** |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bc7704b3a443..00bb252f29a2 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -11,19 +11,18 @@ | |||
| 11 | #include <trace/events/timer.h> | 11 | #include <trace/events/timer.h> |
| 12 | 12 | ||
| 13 | /* | 13 | /* |
| 14 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 14 | * Called after updating RLIMIT_CPU to run cpu timer and update |
| 15 | * tsk->signal->cputime_expires expiration cache if necessary. Needs | ||
| 16 | * siglock protection since other code may update expiration cache as | ||
| 17 | * well. | ||
| 15 | */ | 18 | */ |
| 16 | void update_rlimit_cpu(unsigned long rlim_new) | 19 | void update_rlimit_cpu(unsigned long rlim_new) |
| 17 | { | 20 | { |
| 18 | cputime_t cputime = secs_to_cputime(rlim_new); | 21 | cputime_t cputime = secs_to_cputime(rlim_new); |
| 19 | struct signal_struct *const sig = current->signal; | ||
| 20 | 22 | ||
| 21 | if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || | 23 | spin_lock_irq(¤t->sighand->siglock); |
| 22 | cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { | 24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); |
| 23 | spin_lock_irq(¤t->sighand->siglock); | 25 | spin_unlock_irq(¤t->sighand->siglock); |
| 24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
| 25 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 26 | } | ||
| 27 | } | 26 | } |
| 28 | 27 | ||
| 29 | static int check_clock(const clockid_t which_clock) | 28 | static int check_clock(const clockid_t which_clock) |
| @@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) | |||
| 548 | cputime_gt(expires, new_exp); | 547 | cputime_gt(expires, new_exp); |
| 549 | } | 548 | } |
| 550 | 549 | ||
| 551 | static inline int expires_le(cputime_t expires, cputime_t new_exp) | ||
| 552 | { | ||
| 553 | return !cputime_eq(expires, cputime_zero) && | ||
| 554 | cputime_le(expires, new_exp); | ||
| 555 | } | ||
| 556 | /* | 550 | /* |
| 557 | * Insert the timer on the appropriate list before any timers that | 551 | * Insert the timer on the appropriate list before any timers that |
| 558 | * expire later. This must be called with the tasklist_lock held | 552 | * expire later. This must be called with the tasklist_lock held |
| 559 | * for reading, and interrupts disabled. | 553 | * for reading, interrupts disabled and p->sighand->siglock taken. |
| 560 | */ | 554 | */ |
| 561 | static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | 555 | static void arm_timer(struct k_itimer *timer) |
| 562 | { | 556 | { |
| 563 | struct task_struct *p = timer->it.cpu.task; | 557 | struct task_struct *p = timer->it.cpu.task; |
| 564 | struct list_head *head, *listpos; | 558 | struct list_head *head, *listpos; |
| 559 | struct task_cputime *cputime_expires; | ||
| 565 | struct cpu_timer_list *const nt = &timer->it.cpu; | 560 | struct cpu_timer_list *const nt = &timer->it.cpu; |
| 566 | struct cpu_timer_list *next; | 561 | struct cpu_timer_list *next; |
| 567 | unsigned long i; | ||
| 568 | 562 | ||
| 569 | head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? | 563 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
| 570 | p->cpu_timers : p->signal->cpu_timers); | 564 | head = p->cpu_timers; |
| 565 | cputime_expires = &p->cputime_expires; | ||
| 566 | } else { | ||
| 567 | head = p->signal->cpu_timers; | ||
| 568 | cputime_expires = &p->signal->cputime_expires; | ||
| 569 | } | ||
| 571 | head += CPUCLOCK_WHICH(timer->it_clock); | 570 | head += CPUCLOCK_WHICH(timer->it_clock); |
| 572 | 571 | ||
| 573 | BUG_ON(!irqs_disabled()); | ||
| 574 | spin_lock(&p->sighand->siglock); | ||
| 575 | |||
| 576 | listpos = head; | 572 | listpos = head; |
| 577 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 573 | list_for_each_entry(next, head, entry) { |
| 578 | list_for_each_entry(next, head, entry) { | 574 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) |
| 579 | if (next->expires.sched > nt->expires.sched) | 575 | break; |
| 580 | break; | 576 | listpos = &next->entry; |
| 581 | listpos = &next->entry; | ||
| 582 | } | ||
| 583 | } else { | ||
| 584 | list_for_each_entry(next, head, entry) { | ||
| 585 | if (cputime_gt(next->expires.cpu, nt->expires.cpu)) | ||
| 586 | break; | ||
| 587 | listpos = &next->entry; | ||
| 588 | } | ||
| 589 | } | 577 | } |
| 590 | list_add(&nt->entry, listpos); | 578 | list_add(&nt->entry, listpos); |
| 591 | 579 | ||
| 592 | if (listpos == head) { | 580 | if (listpos == head) { |
| 581 | union cpu_time_count *exp = &nt->expires; | ||
| 582 | |||
| 593 | /* | 583 | /* |
| 594 | * We are the new earliest-expiring timer. | 584 | * We are the new earliest-expiring POSIX 1.b timer, hence |
| 595 | * If we are a thread timer, there can always | 585 | * need to update expiration cache. Take into account that |
| 596 | * be a process timer telling us to stop earlier. | 586 | * for process timers we share expiration cache with itimers |
| 587 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. | ||
| 597 | */ | 588 | */ |
| 598 | 589 | ||
| 599 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 590 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
| 600 | union cpu_time_count *exp = &nt->expires; | 591 | case CPUCLOCK_PROF: |
| 601 | 592 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | |
| 602 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 593 | cputime_expires->prof_exp = exp->cpu; |
| 603 | default: | 594 | break; |
| 604 | BUG(); | 595 | case CPUCLOCK_VIRT: |
| 605 | case CPUCLOCK_PROF: | 596 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) |
| 606 | if (expires_gt(p->cputime_expires.prof_exp, | 597 | cputime_expires->virt_exp = exp->cpu; |
| 607 | exp->cpu)) | 598 | break; |
| 608 | p->cputime_expires.prof_exp = exp->cpu; | 599 | case CPUCLOCK_SCHED: |
| 609 | break; | 600 | if (cputime_expires->sched_exp == 0 || |
| 610 | case CPUCLOCK_VIRT: | 601 | cputime_expires->sched_exp > exp->sched) |
| 611 | if (expires_gt(p->cputime_expires.virt_exp, | 602 | cputime_expires->sched_exp = exp->sched; |
| 612 | exp->cpu)) | 603 | break; |
| 613 | p->cputime_expires.virt_exp = exp->cpu; | ||
| 614 | break; | ||
| 615 | case CPUCLOCK_SCHED: | ||
| 616 | if (p->cputime_expires.sched_exp == 0 || | ||
| 617 | p->cputime_expires.sched_exp > exp->sched) | ||
| 618 | p->cputime_expires.sched_exp = | ||
| 619 | exp->sched; | ||
| 620 | break; | ||
| 621 | } | ||
| 622 | } else { | ||
| 623 | struct signal_struct *const sig = p->signal; | ||
| 624 | union cpu_time_count *exp = &timer->it.cpu.expires; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * For a process timer, set the cached expiration time. | ||
| 628 | */ | ||
| 629 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | ||
| 630 | default: | ||
| 631 | BUG(); | ||
| 632 | case CPUCLOCK_VIRT: | ||
| 633 | if (expires_le(sig->it[CPUCLOCK_VIRT].expires, | ||
| 634 | exp->cpu)) | ||
| 635 | break; | ||
| 636 | sig->cputime_expires.virt_exp = exp->cpu; | ||
| 637 | break; | ||
| 638 | case CPUCLOCK_PROF: | ||
| 639 | if (expires_le(sig->it[CPUCLOCK_PROF].expires, | ||
| 640 | exp->cpu)) | ||
| 641 | break; | ||
| 642 | i = sig->rlim[RLIMIT_CPU].rlim_cur; | ||
| 643 | if (i != RLIM_INFINITY && | ||
| 644 | i <= cputime_to_secs(exp->cpu)) | ||
| 645 | break; | ||
| 646 | sig->cputime_expires.prof_exp = exp->cpu; | ||
| 647 | break; | ||
| 648 | case CPUCLOCK_SCHED: | ||
| 649 | sig->cputime_expires.sched_exp = exp->sched; | ||
| 650 | break; | ||
| 651 | } | ||
| 652 | } | 604 | } |
| 653 | } | 605 | } |
| 654 | |||
| 655 | spin_unlock(&p->sighand->siglock); | ||
| 656 | } | 606 | } |
| 657 | 607 | ||
| 658 | /* | 608 | /* |
| @@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
| 660 | */ | 610 | */ |
| 661 | static void cpu_timer_fire(struct k_itimer *timer) | 611 | static void cpu_timer_fire(struct k_itimer *timer) |
| 662 | { | 612 | { |
| 663 | if (unlikely(timer->sigq == NULL)) { | 613 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { |
| 614 | /* | ||
| 615 | * User don't want any signal. | ||
| 616 | */ | ||
| 617 | timer->it.cpu.expires.sched = 0; | ||
| 618 | } else if (unlikely(timer->sigq == NULL)) { | ||
| 664 | /* | 619 | /* |
| 665 | * This a special case for clock_nanosleep, | 620 | * This a special case for clock_nanosleep, |
| 666 | * not a normal timer from sys_timer_create. | 621 | * not a normal timer from sys_timer_create. |
| @@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 721 | struct itimerspec *new, struct itimerspec *old) | 676 | struct itimerspec *new, struct itimerspec *old) |
| 722 | { | 677 | { |
| 723 | struct task_struct *p = timer->it.cpu.task; | 678 | struct task_struct *p = timer->it.cpu.task; |
| 724 | union cpu_time_count old_expires, new_expires, val; | 679 | union cpu_time_count old_expires, new_expires, old_incr, val; |
| 725 | int ret; | 680 | int ret; |
| 726 | 681 | ||
| 727 | if (unlikely(p == NULL)) { | 682 | if (unlikely(p == NULL)) { |
| @@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 752 | BUG_ON(!irqs_disabled()); | 707 | BUG_ON(!irqs_disabled()); |
| 753 | 708 | ||
| 754 | ret = 0; | 709 | ret = 0; |
| 710 | old_incr = timer->it.cpu.incr; | ||
| 755 | spin_lock(&p->sighand->siglock); | 711 | spin_lock(&p->sighand->siglock); |
| 756 | old_expires = timer->it.cpu.expires; | 712 | old_expires = timer->it.cpu.expires; |
| 757 | if (unlikely(timer->it.cpu.firing)) { | 713 | if (unlikely(timer->it.cpu.firing)) { |
| @@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 759 | ret = TIMER_RETRY; | 715 | ret = TIMER_RETRY; |
| 760 | } else | 716 | } else |
| 761 | list_del_init(&timer->it.cpu.entry); | 717 | list_del_init(&timer->it.cpu.entry); |
| 762 | spin_unlock(&p->sighand->siglock); | ||
| 763 | 718 | ||
| 764 | /* | 719 | /* |
| 765 | * We need to sample the current value to convert the new | 720 | * We need to sample the current value to convert the new |
| @@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 813 | * disable this firing since we are already reporting | 768 | * disable this firing since we are already reporting |
| 814 | * it as an overrun (thanks to bump_cpu_timer above). | 769 | * it as an overrun (thanks to bump_cpu_timer above). |
| 815 | */ | 770 | */ |
| 771 | spin_unlock(&p->sighand->siglock); | ||
| 816 | read_unlock(&tasklist_lock); | 772 | read_unlock(&tasklist_lock); |
| 817 | goto out; | 773 | goto out; |
| 818 | } | 774 | } |
| @@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 828 | */ | 784 | */ |
| 829 | timer->it.cpu.expires = new_expires; | 785 | timer->it.cpu.expires = new_expires; |
| 830 | if (new_expires.sched != 0 && | 786 | if (new_expires.sched != 0 && |
| 831 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
| 832 | cpu_time_before(timer->it_clock, val, new_expires)) { | 787 | cpu_time_before(timer->it_clock, val, new_expires)) { |
| 833 | arm_timer(timer, val); | 788 | arm_timer(timer); |
| 834 | } | 789 | } |
| 835 | 790 | ||
| 791 | spin_unlock(&p->sighand->siglock); | ||
| 836 | read_unlock(&tasklist_lock); | 792 | read_unlock(&tasklist_lock); |
| 837 | 793 | ||
| 838 | /* | 794 | /* |
| @@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 853 | timer->it_overrun = -1; | 809 | timer->it_overrun = -1; |
| 854 | 810 | ||
| 855 | if (new_expires.sched != 0 && | 811 | if (new_expires.sched != 0 && |
| 856 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
| 857 | !cpu_time_before(timer->it_clock, val, new_expires)) { | 812 | !cpu_time_before(timer->it_clock, val, new_expires)) { |
| 858 | /* | 813 | /* |
| 859 | * The designated time already passed, so we notify | 814 | * The designated time already passed, so we notify |
| @@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 867 | out: | 822 | out: |
| 868 | if (old) { | 823 | if (old) { |
| 869 | sample_to_timespec(timer->it_clock, | 824 | sample_to_timespec(timer->it_clock, |
| 870 | timer->it.cpu.incr, &old->it_interval); | 825 | old_incr, &old->it_interval); |
| 871 | } | 826 | } |
| 872 | return ret; | 827 | return ret; |
| 873 | } | 828 | } |
| @@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 927 | read_unlock(&tasklist_lock); | 882 | read_unlock(&tasklist_lock); |
| 928 | } | 883 | } |
| 929 | 884 | ||
| 930 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | ||
| 931 | if (timer->it.cpu.incr.sched == 0 && | ||
| 932 | cpu_time_before(timer->it_clock, | ||
| 933 | timer->it.cpu.expires, now)) { | ||
| 934 | /* | ||
| 935 | * Do-nothing timer expired and has no reload, | ||
| 936 | * so it's as if it was never set. | ||
| 937 | */ | ||
| 938 | timer->it.cpu.expires.sched = 0; | ||
| 939 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | ||
| 940 | return; | ||
| 941 | } | ||
| 942 | /* | ||
| 943 | * Account for any expirations and reloads that should | ||
| 944 | * have happened. | ||
| 945 | */ | ||
| 946 | bump_cpu_timer(timer, now); | ||
| 947 | } | ||
| 948 | |||
| 949 | if (unlikely(clear_dead)) { | 885 | if (unlikely(clear_dead)) { |
| 950 | /* | 886 | /* |
| 951 | * We've noticed that the thread is dead, but | 887 | * We've noticed that the thread is dead, but |
| @@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig) | |||
| 1066 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 1002 | struct thread_group_cputimer *cputimer = &sig->cputimer; |
| 1067 | unsigned long flags; | 1003 | unsigned long flags; |
| 1068 | 1004 | ||
| 1069 | if (!cputimer->running) | ||
| 1070 | return; | ||
| 1071 | |||
| 1072 | spin_lock_irqsave(&cputimer->lock, flags); | 1005 | spin_lock_irqsave(&cputimer->lock, flags); |
| 1073 | cputimer->running = 0; | 1006 | cputimer->running = 0; |
| 1074 | spin_unlock_irqrestore(&cputimer->lock, flags); | 1007 | spin_unlock_irqrestore(&cputimer->lock, flags); |
| 1075 | |||
| 1076 | sig->cputime_expires.prof_exp = cputime_zero; | ||
| 1077 | sig->cputime_expires.virt_exp = cputime_zero; | ||
| 1078 | sig->cputime_expires.sched_exp = 0; | ||
| 1079 | } | 1008 | } |
| 1080 | 1009 | ||
| 1081 | static u32 onecputick; | 1010 | static u32 onecputick; |
| @@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
| 1112 | } | 1041 | } |
| 1113 | } | 1042 | } |
| 1114 | 1043 | ||
| 1044 | /** | ||
| 1045 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
| 1046 | * | ||
| 1047 | * @cputime: The struct to compare. | ||
| 1048 | * | ||
| 1049 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
| 1050 | * are zero, false if any field is nonzero. | ||
| 1051 | */ | ||
| 1052 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
| 1053 | { | ||
| 1054 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
| 1055 | cputime_eq(cputime->stime, cputime_zero) && | ||
| 1056 | cputime->sum_exec_runtime == 0) | ||
| 1057 | return 1; | ||
| 1058 | return 0; | ||
| 1059 | } | ||
| 1060 | |||
| 1115 | /* | 1061 | /* |
| 1116 | * Check for any per-thread CPU timers that have fired and move them | 1062 | * Check for any per-thread CPU timers that have fired and move them |
| 1117 | * off the tsk->*_timers list onto the firing list. Per-thread timers | 1063 | * off the tsk->*_timers list onto the firing list. Per-thread timers |
| @@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1129 | unsigned long soft; | 1075 | unsigned long soft; |
| 1130 | 1076 | ||
| 1131 | /* | 1077 | /* |
| 1132 | * Don't sample the current process CPU clocks if there are no timers. | ||
| 1133 | */ | ||
| 1134 | if (list_empty(&timers[CPUCLOCK_PROF]) && | ||
| 1135 | cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && | ||
| 1136 | sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && | ||
| 1137 | list_empty(&timers[CPUCLOCK_VIRT]) && | ||
| 1138 | cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && | ||
| 1139 | list_empty(&timers[CPUCLOCK_SCHED])) { | ||
| 1140 | stop_process_timers(sig); | ||
| 1141 | return; | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | /* | ||
| 1145 | * Collect the current process totals. | 1078 | * Collect the current process totals. |
| 1146 | */ | 1079 | */ |
| 1147 | thread_group_cputimer(tsk, &cputime); | 1080 | thread_group_cputimer(tsk, &cputime); |
| @@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk, | |||
| 1230 | } | 1163 | } |
| 1231 | } | 1164 | } |
| 1232 | 1165 | ||
| 1233 | if (!cputime_eq(prof_expires, cputime_zero) && | 1166 | sig->cputime_expires.prof_exp = prof_expires; |
| 1234 | (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || | 1167 | sig->cputime_expires.virt_exp = virt_expires; |
| 1235 | cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) | 1168 | sig->cputime_expires.sched_exp = sched_expires; |
| 1236 | sig->cputime_expires.prof_exp = prof_expires; | 1169 | if (task_cputime_zero(&sig->cputime_expires)) |
| 1237 | if (!cputime_eq(virt_expires, cputime_zero) && | 1170 | stop_process_timers(sig); |
| 1238 | (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || | ||
| 1239 | cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) | ||
| 1240 | sig->cputime_expires.virt_exp = virt_expires; | ||
| 1241 | if (sched_expires != 0 && | ||
| 1242 | (sig->cputime_expires.sched_exp == 0 || | ||
| 1243 | sig->cputime_expires.sched_exp > sched_expires)) | ||
| 1244 | sig->cputime_expires.sched_exp = sched_expires; | ||
| 1245 | } | 1171 | } |
| 1246 | 1172 | ||
| 1247 | /* | 1173 | /* |
| @@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1270 | goto out; | 1196 | goto out; |
| 1271 | } | 1197 | } |
| 1272 | read_lock(&tasklist_lock); /* arm_timer needs it. */ | 1198 | read_lock(&tasklist_lock); /* arm_timer needs it. */ |
| 1199 | spin_lock(&p->sighand->siglock); | ||
| 1273 | } else { | 1200 | } else { |
| 1274 | read_lock(&tasklist_lock); | 1201 | read_lock(&tasklist_lock); |
| 1275 | if (unlikely(p->signal == NULL)) { | 1202 | if (unlikely(p->signal == NULL)) { |
| @@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1290 | clear_dead_task(timer, now); | 1217 | clear_dead_task(timer, now); |
| 1291 | goto out_unlock; | 1218 | goto out_unlock; |
| 1292 | } | 1219 | } |
| 1220 | spin_lock(&p->sighand->siglock); | ||
| 1293 | cpu_timer_sample_group(timer->it_clock, p, &now); | 1221 | cpu_timer_sample_group(timer->it_clock, p, &now); |
| 1294 | bump_cpu_timer(timer, now); | 1222 | bump_cpu_timer(timer, now); |
| 1295 | /* Leave the tasklist_lock locked for the call below. */ | 1223 | /* Leave the tasklist_lock locked for the call below. */ |
| @@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1298 | /* | 1226 | /* |
| 1299 | * Now re-arm for the new expiry time. | 1227 | * Now re-arm for the new expiry time. |
| 1300 | */ | 1228 | */ |
| 1301 | arm_timer(timer, now); | 1229 | BUG_ON(!irqs_disabled()); |
| 1230 | arm_timer(timer); | ||
| 1231 | spin_unlock(&p->sighand->siglock); | ||
| 1302 | 1232 | ||
| 1303 | out_unlock: | 1233 | out_unlock: |
| 1304 | read_unlock(&tasklist_lock); | 1234 | read_unlock(&tasklist_lock); |
| @@ -1310,23 +1240,6 @@ out: | |||
| 1310 | } | 1240 | } |
| 1311 | 1241 | ||
| 1312 | /** | 1242 | /** |
| 1313 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
| 1314 | * | ||
| 1315 | * @cputime: The struct to compare. | ||
| 1316 | * | ||
| 1317 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
| 1318 | * are zero, false if any field is nonzero. | ||
| 1319 | */ | ||
| 1320 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
| 1321 | { | ||
| 1322 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
| 1323 | cputime_eq(cputime->stime, cputime_zero) && | ||
| 1324 | cputime->sum_exec_runtime == 0) | ||
| 1325 | return 1; | ||
| 1326 | return 0; | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | /** | ||
| 1330 | * task_cputime_expired - Compare two task_cputime entities. | 1243 | * task_cputime_expired - Compare two task_cputime entities. |
| 1331 | * | 1244 | * |
| 1332 | * @sample: The task_cputime structure to be checked for expiration. | 1245 | * @sample: The task_cputime structure to be checked for expiration. |
| @@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
| 1382 | } | 1295 | } |
| 1383 | 1296 | ||
| 1384 | sig = tsk->signal; | 1297 | sig = tsk->signal; |
| 1385 | if (!task_cputime_zero(&sig->cputime_expires)) { | 1298 | if (sig->cputimer.running) { |
| 1386 | struct task_cputime group_sample; | 1299 | struct task_cputime group_sample; |
| 1387 | 1300 | ||
| 1388 | thread_group_cputimer(tsk, &group_sample); | 1301 | thread_group_cputimer(tsk, &group_sample); |
| @@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
| 1390 | return 1; | 1303 | return 1; |
| 1391 | } | 1304 | } |
| 1392 | 1305 | ||
| 1393 | return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; | 1306 | return 0; |
| 1394 | } | 1307 | } |
| 1395 | 1308 | ||
| 1396 | /* | 1309 | /* |
| @@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
| 1419 | * put them on the firing list. | 1332 | * put them on the firing list. |
| 1420 | */ | 1333 | */ |
| 1421 | check_thread_timers(tsk, &firing); | 1334 | check_thread_timers(tsk, &firing); |
| 1422 | check_process_timers(tsk, &firing); | 1335 | /* |
| 1336 | * If there are any active process wide timers (POSIX 1.b, itimers, | ||
| 1337 | * RLIMIT_CPU) cputimer must be running. | ||
| 1338 | */ | ||
| 1339 | if (tsk->signal->cputimer.running) | ||
| 1340 | check_process_timers(tsk, &firing); | ||
| 1423 | 1341 | ||
| 1424 | /* | 1342 | /* |
| 1425 | * We must release these locks before taking any timer's lock. | 1343 | * We must release these locks before taking any timer's lock. |
| @@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
| 1456 | } | 1374 | } |
| 1457 | 1375 | ||
| 1458 | /* | 1376 | /* |
| 1459 | * Set one of the process-wide special case CPU timers. | 1377 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
| 1460 | * The tsk->sighand->siglock must be held by the caller. | 1378 | * The tsk->sighand->siglock must be held by the caller. |
| 1461 | * The *newval argument is relative and we update it to be absolute, *oldval | ||
| 1462 | * is absolute and we update it to be relative. | ||
| 1463 | */ | 1379 | */ |
| 1464 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1380 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
| 1465 | cputime_t *newval, cputime_t *oldval) | 1381 | cputime_t *newval, cputime_t *oldval) |
| 1466 | { | 1382 | { |
| 1467 | union cpu_time_count now; | 1383 | union cpu_time_count now; |
| 1468 | struct list_head *head; | ||
| 1469 | 1384 | ||
| 1470 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1385 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
| 1471 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1386 | cpu_timer_sample_group(clock_idx, tsk, &now); |
| 1472 | 1387 | ||
| 1473 | if (oldval) { | 1388 | if (oldval) { |
| 1389 | /* | ||
| 1390 | * We are setting itimer. The *oldval is absolute and we update | ||
| 1391 | * it to be relative, *newval argument is relative and we update | ||
| 1392 | * it to be absolute. | ||
| 1393 | */ | ||
| 1474 | if (!cputime_eq(*oldval, cputime_zero)) { | 1394 | if (!cputime_eq(*oldval, cputime_zero)) { |
| 1475 | if (cputime_le(*oldval, now.cpu)) { | 1395 | if (cputime_le(*oldval, now.cpu)) { |
| 1476 | /* Just about to fire. */ | 1396 | /* Just about to fire. */ |
| @@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
| 1483 | if (cputime_eq(*newval, cputime_zero)) | 1403 | if (cputime_eq(*newval, cputime_zero)) |
| 1484 | return; | 1404 | return; |
| 1485 | *newval = cputime_add(*newval, now.cpu); | 1405 | *newval = cputime_add(*newval, now.cpu); |
| 1486 | |||
| 1487 | /* | ||
| 1488 | * If the RLIMIT_CPU timer will expire before the | ||
| 1489 | * ITIMER_PROF timer, we have nothing else to do. | ||
| 1490 | */ | ||
| 1491 | if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur | ||
| 1492 | < cputime_to_secs(*newval)) | ||
| 1493 | return; | ||
| 1494 | } | 1406 | } |
| 1495 | 1407 | ||
| 1496 | /* | 1408 | /* |
| 1497 | * Check whether there are any process timers already set to fire | 1409 | * Update expiration cache if we are the earliest timer, or eventually |
| 1498 | * before this one. If so, we don't have anything more to do. | 1410 | * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. |
| 1499 | */ | 1411 | */ |
| 1500 | head = &tsk->signal->cpu_timers[clock_idx]; | 1412 | switch (clock_idx) { |
| 1501 | if (list_empty(head) || | 1413 | case CPUCLOCK_PROF: |
| 1502 | cputime_ge(list_first_entry(head, | 1414 | if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) |
| 1503 | struct cpu_timer_list, entry)->expires.cpu, | ||
| 1504 | *newval)) { | ||
| 1505 | switch (clock_idx) { | ||
| 1506 | case CPUCLOCK_PROF: | ||
| 1507 | tsk->signal->cputime_expires.prof_exp = *newval; | 1415 | tsk->signal->cputime_expires.prof_exp = *newval; |
| 1508 | break; | 1416 | break; |
| 1509 | case CPUCLOCK_VIRT: | 1417 | case CPUCLOCK_VIRT: |
| 1418 | if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) | ||
| 1510 | tsk->signal->cputime_expires.virt_exp = *newval; | 1419 | tsk->signal->cputime_expires.virt_exp = *newval; |
| 1511 | break; | 1420 | break; |
| 1512 | } | ||
| 1513 | } | 1421 | } |
| 1514 | } | 1422 | } |
| 1515 | 1423 | ||
diff --git a/kernel/time.c b/kernel/time.c index 656dccfe1cbb..50612faa9baf 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, | |||
| 132 | */ | 132 | */ |
| 133 | static inline void warp_clock(void) | 133 | static inline void warp_clock(void) |
| 134 | { | 134 | { |
| 135 | write_seqlock_irq(&xtime_lock); | 135 | struct timespec delta, adjust; |
| 136 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 136 | delta.tv_sec = sys_tz.tz_minuteswest * 60; |
| 137 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 137 | delta.tv_nsec = 0; |
| 138 | update_xtime_cache(0); | 138 | adjust = timespec_add_safe(current_kernel_time(), delta); |
| 139 | write_sequnlock_irq(&xtime_lock); | 139 | do_settimeofday(&adjust); |
| 140 | clock_was_set(); | ||
| 141 | } | 140 | } |
| 142 | 141 | ||
| 143 | /* | 142 | /* |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1f5dde637457..f08e99c1d561 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
| 625 | list_add(&cs->list, entry); | 625 | list_add(&cs->list, entry); |
| 626 | } | 626 | } |
| 627 | 627 | ||
| 628 | |||
| 629 | /* | ||
| 630 | * Maximum time we expect to go between ticks. This includes idle | ||
| 631 | * tickless time. It provides the trade off between selecting a | ||
| 632 | * mult/shift pair that is very precise but can only handle a short | ||
| 633 | * period of time, vs. a mult/shift pair that can handle long periods | ||
| 634 | * of time but isn't as precise. | ||
| 635 | * | ||
| 636 | * This is a subsystem constant, and actual hardware limitations | ||
| 637 | * may override it (ie: clocksources that wrap every 3 seconds). | ||
| 638 | */ | ||
| 639 | #define MAX_UPDATE_LENGTH 5 /* Seconds */ | ||
| 640 | |||
| 641 | /** | ||
| 642 | * __clocksource_register_scale - Used to install new clocksources | ||
| 643 | * @t: clocksource to be registered | ||
| 644 | * @scale: Scale factor multiplied against freq to get clocksource hz | ||
| 645 | * @freq: clocksource frequency (cycles per second) divided by scale | ||
| 646 | * | ||
| 647 | * Returns -EBUSY if registration fails, zero otherwise. | ||
| 648 | * | ||
| 649 | * This *SHOULD NOT* be called directly! Please use the | ||
| 650 | * clocksource_register_hz() or clocksource_register_khz helper functions. | ||
| 651 | */ | ||
| 652 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | ||
| 653 | { | ||
| 654 | |||
| 655 | /* | ||
| 656 | * Ideally we want to use some of the limits used in | ||
| 657 | * clocksource_max_deferment, to provide a more informed | ||
| 658 | * MAX_UPDATE_LENGTH. But for now this just gets the | ||
| 659 | * register interface working properly. | ||
| 660 | */ | ||
| 661 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, | ||
| 662 | NSEC_PER_SEC/scale, | ||
| 663 | MAX_UPDATE_LENGTH*scale); | ||
| 664 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
| 665 | |||
| 666 | mutex_lock(&clocksource_mutex); | ||
| 667 | clocksource_enqueue(cs); | ||
| 668 | clocksource_select(); | ||
| 669 | clocksource_enqueue_watchdog(cs); | ||
| 670 | mutex_unlock(&clocksource_mutex); | ||
| 671 | return 0; | ||
| 672 | } | ||
| 673 | EXPORT_SYMBOL_GPL(__clocksource_register_scale); | ||
| 674 | |||
| 675 | |||
| 628 | /** | 676 | /** |
| 629 | * clocksource_register - Used to install new clocksources | 677 | * clocksource_register - Used to install new clocksources |
| 630 | * @t: clocksource to be registered | 678 | * @t: clocksource to be registered |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7c0f180d6e9d..c63116863a80 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -69,7 +69,7 @@ static s64 time_freq; | |||
| 69 | /* time at last adjustment (secs): */ | 69 | /* time at last adjustment (secs): */ |
| 70 | static long time_reftime; | 70 | static long time_reftime; |
| 71 | 71 | ||
| 72 | long time_adjust; | 72 | static long time_adjust; |
| 73 | 73 | ||
| 74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ | 74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ |
| 75 | static s64 ntp_tick_adj; | 75 | static s64 ntp_tick_adj; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 39f6177fafac..caf8d4d4f5c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -165,13 +165,6 @@ struct timespec raw_time; | |||
| 165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
| 166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
| 167 | 167 | ||
| 168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
| 169 | void update_xtime_cache(u64 nsec) | ||
| 170 | { | ||
| 171 | xtime_cache = xtime; | ||
| 172 | timespec_add_ns(&xtime_cache, nsec); | ||
| 173 | } | ||
| 174 | |||
| 175 | /* must hold xtime_lock */ | 168 | /* must hold xtime_lock */ |
| 176 | void timekeeping_leap_insert(int leapsecond) | 169 | void timekeeping_leap_insert(int leapsecond) |
| 177 | { | 170 | { |
| @@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) | |||
| 332 | 325 | ||
| 333 | xtime = *tv; | 326 | xtime = *tv; |
| 334 | 327 | ||
| 335 | update_xtime_cache(0); | ||
| 336 | |||
| 337 | timekeeper.ntp_error = 0; | 328 | timekeeper.ntp_error = 0; |
| 338 | ntp_clear(); | 329 | ntp_clear(); |
| 339 | 330 | ||
| @@ -559,7 +550,6 @@ void __init timekeeping_init(void) | |||
| 559 | } | 550 | } |
| 560 | set_normalized_timespec(&wall_to_monotonic, | 551 | set_normalized_timespec(&wall_to_monotonic, |
| 561 | -boot.tv_sec, -boot.tv_nsec); | 552 | -boot.tv_sec, -boot.tv_nsec); |
| 562 | update_xtime_cache(0); | ||
| 563 | total_sleep_time.tv_sec = 0; | 553 | total_sleep_time.tv_sec = 0; |
| 564 | total_sleep_time.tv_nsec = 0; | 554 | total_sleep_time.tv_nsec = 0; |
| 565 | write_sequnlock_irqrestore(&xtime_lock, flags); | 555 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| @@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
| 593 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
| 594 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
| 595 | } | 585 | } |
| 596 | update_xtime_cache(0); | ||
| 597 | /* re-base the last cycle value */ | 586 | /* re-base the last cycle value */ |
| 598 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
| 599 | timekeeper.ntp_error = 0; | 588 | timekeeper.ntp_error = 0; |
| @@ -788,7 +777,6 @@ void update_wall_time(void) | |||
| 788 | { | 777 | { |
| 789 | struct clocksource *clock; | 778 | struct clocksource *clock; |
| 790 | cycle_t offset; | 779 | cycle_t offset; |
| 791 | u64 nsecs; | ||
| 792 | int shift = 0, maxshift; | 780 | int shift = 0, maxshift; |
| 793 | 781 | ||
| 794 | /* Make sure we're fully resumed: */ | 782 | /* Make sure we're fully resumed: */ |
| @@ -847,7 +835,9 @@ void update_wall_time(void) | |||
| 847 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; | 835 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; |
| 848 | } | 836 | } |
| 849 | 837 | ||
| 850 | /* store full nanoseconds into xtime after rounding it up and | 838 | |
| 839 | /* | ||
| 840 | * Store full nanoseconds into xtime after rounding it up and | ||
| 851 | * add the remainder to the error difference. | 841 | * add the remainder to the error difference. |
| 852 | */ | 842 | */ |
| 853 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; | 843 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; |
| @@ -855,8 +845,15 @@ void update_wall_time(void) | |||
| 855 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 845 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
| 856 | timekeeper.ntp_error_shift; | 846 | timekeeper.ntp_error_shift; |
| 857 | 847 | ||
| 858 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | 848 | /* |
| 859 | update_xtime_cache(nsecs); | 849 | * Finally, make sure that after the rounding |
| 850 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC | ||
| 851 | */ | ||
| 852 | if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { | ||
| 853 | xtime.tv_nsec -= NSEC_PER_SEC; | ||
| 854 | xtime.tv_sec++; | ||
| 855 | second_overflow(); | ||
| 856 | } | ||
| 860 | 857 | ||
| 861 | /* check to see if there is a new clocksource to use */ | 858 | /* check to see if there is a new clocksource to use */ |
| 862 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); | 859 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
| @@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | |||
| 896 | 893 | ||
| 897 | unsigned long get_seconds(void) | 894 | unsigned long get_seconds(void) |
| 898 | { | 895 | { |
| 899 | return xtime_cache.tv_sec; | 896 | return xtime.tv_sec; |
| 900 | } | 897 | } |
| 901 | EXPORT_SYMBOL(get_seconds); | 898 | EXPORT_SYMBOL(get_seconds); |
| 902 | 899 | ||
| 903 | struct timespec __current_kernel_time(void) | 900 | struct timespec __current_kernel_time(void) |
| 904 | { | 901 | { |
| 905 | return xtime_cache; | 902 | return xtime; |
| 906 | } | 903 | } |
| 907 | 904 | ||
| 908 | struct timespec current_kernel_time(void) | 905 | struct timespec current_kernel_time(void) |
| @@ -913,7 +910,7 @@ struct timespec current_kernel_time(void) | |||
| 913 | do { | 910 | do { |
| 914 | seq = read_seqbegin(&xtime_lock); | 911 | seq = read_seqbegin(&xtime_lock); |
| 915 | 912 | ||
| 916 | now = xtime_cache; | 913 | now = xtime; |
| 917 | } while (read_seqretry(&xtime_lock, seq)); | 914 | } while (read_seqretry(&xtime_lock, seq)); |
| 918 | 915 | ||
| 919 | return now; | 916 | return now; |
| @@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void) | |||
| 928 | do { | 925 | do { |
| 929 | seq = read_seqbegin(&xtime_lock); | 926 | seq = read_seqbegin(&xtime_lock); |
| 930 | 927 | ||
| 931 | now = xtime_cache; | 928 | now = xtime; |
| 932 | mono = wall_to_monotonic; | 929 | mono = wall_to_monotonic; |
| 933 | } while (read_seqretry(&xtime_lock, seq)); | 930 | } while (read_seqretry(&xtime_lock, seq)); |
| 934 | 931 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f2771..9199f3c52215 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) | |||
| 319 | } | 319 | } |
| 320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); | 320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); |
| 321 | 321 | ||
| 322 | /** | ||
| 323 | * set_timer_slack - set the allowed slack for a timer | ||
| 324 | * @slack_hz: the amount of time (in jiffies) allowed for rounding | ||
| 325 | * | ||
| 326 | * Set the amount of time, in jiffies, that a certain timer has | ||
| 327 | * in terms of slack. By setting this value, the timer subsystem | ||
| 328 | * will schedule the actual timer somewhere between | ||
| 329 | * the time mod_timer() asks for, and that time plus the slack. | ||
| 330 | * | ||
| 331 | * By setting the slack to -1, a percentage of the delay is used | ||
| 332 | * instead. | ||
| 333 | */ | ||
| 334 | void set_timer_slack(struct timer_list *timer, int slack_hz) | ||
| 335 | { | ||
| 336 | timer->slack = slack_hz; | ||
| 337 | } | ||
| 338 | EXPORT_SYMBOL_GPL(set_timer_slack); | ||
| 339 | |||
| 322 | 340 | ||
| 323 | static inline void set_running_timer(struct tvec_base *base, | 341 | static inline void set_running_timer(struct tvec_base *base, |
| 324 | struct timer_list *timer) | 342 | struct timer_list *timer) |
| @@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer, | |||
| 550 | { | 568 | { |
| 551 | timer->entry.next = NULL; | 569 | timer->entry.next = NULL; |
| 552 | timer->base = __raw_get_cpu_var(tvec_bases); | 570 | timer->base = __raw_get_cpu_var(tvec_bases); |
| 571 | timer->slack = -1; | ||
| 553 | #ifdef CONFIG_TIMER_STATS | 572 | #ifdef CONFIG_TIMER_STATS |
| 554 | timer->start_site = NULL; | 573 | timer->start_site = NULL; |
| 555 | timer->start_pid = -1; | 574 | timer->start_pid = -1; |
| @@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) | |||
| 715 | } | 734 | } |
| 716 | EXPORT_SYMBOL(mod_timer_pending); | 735 | EXPORT_SYMBOL(mod_timer_pending); |
| 717 | 736 | ||
| 737 | /* | ||
| 738 | * Decide where to put the timer while taking the slack into account | ||
| 739 | * | ||
| 740 | * Algorithm: | ||
| 741 | * 1) calculate the maximum (absolute) time | ||
| 742 | * 2) calculate the highest bit where the expires and new max are different | ||
| 743 | * 3) use this bit to make a mask | ||
| 744 | * 4) use the bitmask to round down the maximum time, so that all last | ||
| 745 | * bits are zeros | ||
| 746 | */ | ||
| 747 | static inline | ||
| 748 | unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | ||
| 749 | { | ||
| 750 | unsigned long expires_limit, mask; | ||
| 751 | int bit; | ||
| 752 | |||
| 753 | expires_limit = expires + timer->slack; | ||
| 754 | |||
| 755 | if (timer->slack < 0) /* auto slack: use 0.4% */ | ||
| 756 | expires_limit = expires + (expires - jiffies)/256; | ||
| 757 | |||
| 758 | mask = expires ^ expires_limit; | ||
| 759 | |||
| 760 | if (mask == 0) | ||
| 761 | return expires; | ||
| 762 | |||
| 763 | bit = find_last_bit(&mask, BITS_PER_LONG); | ||
| 764 | |||
| 765 | mask = (1 << bit) - 1; | ||
| 766 | |||
| 767 | expires_limit = expires_limit & ~(mask); | ||
| 768 | |||
| 769 | return expires_limit; | ||
| 770 | } | ||
| 771 | |||
| 718 | /** | 772 | /** |
| 719 | * mod_timer - modify a timer's timeout | 773 | * mod_timer - modify a timer's timeout |
| 720 | * @timer: the timer to be modified | 774 | * @timer: the timer to be modified |
| @@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 745 | if (timer_pending(timer) && timer->expires == expires) | 799 | if (timer_pending(timer) && timer->expires == expires) |
| 746 | return 1; | 800 | return 1; |
| 747 | 801 | ||
| 802 | expires = apply_slack(timer, expires); | ||
| 803 | |||
| 748 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 804 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
| 749 | } | 805 | } |
| 750 | EXPORT_SYMBOL(mod_timer); | 806 | EXPORT_SYMBOL(mod_timer); |
| @@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
| 955 | return index; | 1011 | return index; |
| 956 | } | 1012 | } |
| 957 | 1013 | ||
| 1014 | static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), | ||
| 1015 | unsigned long data) | ||
| 1016 | { | ||
| 1017 | int preempt_count = preempt_count(); | ||
| 1018 | |||
| 1019 | #ifdef CONFIG_LOCKDEP | ||
| 1020 | /* | ||
| 1021 | * It is permissible to free the timer from inside the | ||
| 1022 | * function that is called from it, this we need to take into | ||
| 1023 | * account for lockdep too. To avoid bogus "held lock freed" | ||
| 1024 | * warnings as well as problems when looking into | ||
| 1025 | * timer->lockdep_map, make a copy and use that here. | ||
| 1026 | */ | ||
| 1027 | struct lockdep_map lockdep_map = timer->lockdep_map; | ||
| 1028 | #endif | ||
| 1029 | /* | ||
| 1030 | * Couple the lock chain with the lock chain at | ||
| 1031 | * del_timer_sync() by acquiring the lock_map around the fn() | ||
| 1032 | * call here and in del_timer_sync(). | ||
| 1033 | */ | ||
| 1034 | lock_map_acquire(&lockdep_map); | ||
| 1035 | |||
| 1036 | trace_timer_expire_entry(timer); | ||
| 1037 | fn(data); | ||
| 1038 | trace_timer_expire_exit(timer); | ||
| 1039 | |||
| 1040 | lock_map_release(&lockdep_map); | ||
| 1041 | |||
| 1042 | if (preempt_count != preempt_count()) { | ||
| 1043 | WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", | ||
| 1044 | fn, preempt_count, preempt_count()); | ||
| 1045 | /* | ||
| 1046 | * Restore the preempt count. That gives us a decent | ||
| 1047 | * chance to survive and extract information. If the | ||
| 1048 | * callback kept a lock held, bad luck, but not worse | ||
| 1049 | * than the BUG() we had. | ||
| 1050 | */ | ||
| 1051 | preempt_count() = preempt_count; | ||
| 1052 | } | ||
| 1053 | } | ||
| 1054 | |||
| 958 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 1055 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
| 959 | 1056 | ||
| 960 | /** | 1057 | /** |
| @@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
| 998 | detach_timer(timer, 1); | 1095 | detach_timer(timer, 1); |
| 999 | 1096 | ||
| 1000 | spin_unlock_irq(&base->lock); | 1097 | spin_unlock_irq(&base->lock); |
| 1001 | { | 1098 | call_timer_fn(timer, fn, data); |
| 1002 | int preempt_count = preempt_count(); | ||
| 1003 | |||
| 1004 | #ifdef CONFIG_LOCKDEP | ||
| 1005 | /* | ||
| 1006 | * It is permissible to free the timer from | ||
| 1007 | * inside the function that is called from | ||
| 1008 | * it, this we need to take into account for | ||
| 1009 | * lockdep too. To avoid bogus "held lock | ||
| 1010 | * freed" warnings as well as problems when | ||
| 1011 | * looking into timer->lockdep_map, make a | ||
| 1012 | * copy and use that here. | ||
| 1013 | */ | ||
| 1014 | struct lockdep_map lockdep_map = | ||
| 1015 | timer->lockdep_map; | ||
| 1016 | #endif | ||
| 1017 | /* | ||
| 1018 | * Couple the lock chain with the lock chain at | ||
| 1019 | * del_timer_sync() by acquiring the lock_map | ||
| 1020 | * around the fn() call here and in | ||
| 1021 | * del_timer_sync(). | ||
| 1022 | */ | ||
| 1023 | lock_map_acquire(&lockdep_map); | ||
| 1024 | |||
| 1025 | trace_timer_expire_entry(timer); | ||
| 1026 | fn(data); | ||
| 1027 | trace_timer_expire_exit(timer); | ||
| 1028 | |||
| 1029 | lock_map_release(&lockdep_map); | ||
| 1030 | |||
| 1031 | if (preempt_count != preempt_count()) { | ||
| 1032 | printk(KERN_ERR "huh, entered %p " | ||
| 1033 | "with preempt_count %08x, exited" | ||
| 1034 | " with %08x?\n", | ||
| 1035 | fn, preempt_count, | ||
| 1036 | preempt_count()); | ||
| 1037 | BUG(); | ||
| 1038 | } | ||
| 1039 | } | ||
| 1040 | spin_lock_irq(&base->lock); | 1099 | spin_lock_irq(&base->lock); |
| 1041 | } | 1100 | } |
| 1042 | } | 1101 | } |
