diff options
| -rw-r--r-- | Documentation/feature-removal-schedule.txt | 10 | ||||
| -rw-r--r-- | include/linux/hrtimer.h | 2 | ||||
| -rw-r--r-- | include/linux/time.h | 1 | ||||
| -rw-r--r-- | include/linux/timer.h | 10 | ||||
| -rw-r--r-- | include/linux/timex.h | 5 | ||||
| -rw-r--r-- | ipc/mqueue.c | 74 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 67 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 218 | ||||
| -rw-r--r-- | kernel/time.c | 11 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 2 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 35 | ||||
| -rw-r--r-- | kernel/timer.c | 137 |
12 files changed, 281 insertions, 291 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 05df0b7514b6..b93b7810c8bd 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -564,6 +564,16 @@ Who: Avi Kivity <avi@redhat.com> | |||
| 564 | 564 | ||
| 565 | ---------------------------- | 565 | ---------------------------- |
| 566 | 566 | ||
| 567 | What: xtime, wall_to_monotonic | ||
| 568 | When: 2.6.36+ | ||
| 569 | Files: kernel/time/timekeeping.c include/linux/time.h | ||
| 570 | Why: Cleaning up timekeeping internal values. Please use | ||
| 571 | existing timekeeping accessor functions to access | ||
| 572 | the equivalent functionality. | ||
| 573 | Who: John Stultz <johnstul@us.ibm.com> | ||
| 574 | |||
| 575 | ---------------------------- | ||
| 576 | |||
| 567 | What: KVM kernel-allocated memory slots | 577 | What: KVM kernel-allocated memory slots |
| 568 | When: July 2010 | 578 | When: July 2010 |
| 569 | Why: Since 2.6.25, kvm supports user-allocated memory slots, which are | 579 | Why: Since 2.6.25, kvm supports user-allocated memory slots, which are |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5d86fb2309d2..fd0c1b857d3d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
| @@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | |||
| 422 | 422 | ||
| 423 | extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | 423 | extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, |
| 424 | const enum hrtimer_mode mode); | 424 | const enum hrtimer_mode mode); |
| 425 | extern int schedule_hrtimeout_range_clock(ktime_t *expires, | ||
| 426 | unsigned long delta, const enum hrtimer_mode mode, int clock); | ||
| 425 | extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); | 427 | extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); |
| 426 | 428 | ||
| 427 | /* Soft interrupt function to run the hrtimer queues: */ | 429 | /* Soft interrupt function to run the hrtimer queues: */ |
diff --git a/include/linux/time.h b/include/linux/time.h index 6e026e45a179..ea3559f0b3f2 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
| @@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); | |||
| 150 | extern int timekeeping_valid_for_hres(void); | 150 | extern int timekeeping_valid_for_hres(void); |
| 151 | extern u64 timekeeping_max_deferment(void); | 151 | extern u64 timekeeping_max_deferment(void); |
| 152 | extern void update_wall_time(void); | 152 | extern void update_wall_time(void); |
| 153 | extern void update_xtime_cache(u64 nsec); | ||
| 154 | extern void timekeeping_leap_insert(int leapsecond); | 153 | extern void timekeeping_leap_insert(int leapsecond); |
| 155 | 154 | ||
| 156 | struct tms; | 155 | struct tms; |
diff --git a/include/linux/timer.h b/include/linux/timer.h index a2d1eb6cb3f0..ea965b857a50 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h | |||
| @@ -10,13 +10,19 @@ | |||
| 10 | struct tvec_base; | 10 | struct tvec_base; |
| 11 | 11 | ||
| 12 | struct timer_list { | 12 | struct timer_list { |
| 13 | /* | ||
| 14 | * All fields that change during normal runtime grouped to the | ||
| 15 | * same cacheline | ||
| 16 | */ | ||
| 13 | struct list_head entry; | 17 | struct list_head entry; |
| 14 | unsigned long expires; | 18 | unsigned long expires; |
| 19 | struct tvec_base *base; | ||
| 15 | 20 | ||
| 16 | void (*function)(unsigned long); | 21 | void (*function)(unsigned long); |
| 17 | unsigned long data; | 22 | unsigned long data; |
| 18 | 23 | ||
| 19 | struct tvec_base *base; | 24 | int slack; |
| 25 | |||
| 20 | #ifdef CONFIG_TIMER_STATS | 26 | #ifdef CONFIG_TIMER_STATS |
| 21 | void *start_site; | 27 | void *start_site; |
| 22 | char start_comm[16]; | 28 | char start_comm[16]; |
| @@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires); | |||
| 165 | extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); | 171 | extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); |
| 166 | extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); | 172 | extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); |
| 167 | 173 | ||
| 174 | extern void set_timer_slack(struct timer_list *time, int slack_hz); | ||
| 175 | |||
| 168 | #define TIMER_NOT_PINNED 0 | 176 | #define TIMER_NOT_PINNED 0 |
| 169 | #define TIMER_PINNED 1 | 177 | #define TIMER_PINNED 1 |
| 170 | /* | 178 | /* |
diff --git a/include/linux/timex.h b/include/linux/timex.h index 7a082b32d8e1..32d852f8cbe4 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h | |||
| @@ -232,13 +232,11 @@ struct timex { | |||
| 232 | */ | 232 | */ |
| 233 | extern unsigned long tick_usec; /* USER_HZ period (usec) */ | 233 | extern unsigned long tick_usec; /* USER_HZ period (usec) */ |
| 234 | extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ | 234 | extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ |
| 235 | extern int tickadj; /* amount of adjustment per tick */ | ||
| 236 | 235 | ||
| 237 | /* | 236 | /* |
| 238 | * phase-lock loop variables | 237 | * phase-lock loop variables |
| 239 | */ | 238 | */ |
| 240 | extern int time_status; /* clock synchronization status bits */ | 239 | extern int time_status; /* clock synchronization status bits */ |
| 241 | extern long time_adjust; /* The amount of adjtime left */ | ||
| 242 | 240 | ||
| 243 | extern void ntp_init(void); | 241 | extern void ntp_init(void); |
| 244 | extern void ntp_clear(void); | 242 | extern void ntp_clear(void); |
| @@ -271,9 +269,6 @@ extern void second_overflow(void); | |||
| 271 | extern void update_ntp_one_tick(void); | 269 | extern void update_ntp_one_tick(void); |
| 272 | extern int do_adjtimex(struct timex *); | 270 | extern int do_adjtimex(struct timex *); |
| 273 | 271 | ||
| 274 | /* Don't use! Compatibility define for existing users. */ | ||
| 275 | #define tickadj (500/HZ ? : 1) | ||
| 276 | |||
| 277 | int read_current_timer(unsigned long *timer_val); | 272 | int read_current_timer(unsigned long *timer_val); |
| 278 | 273 | ||
| 279 | /* The clock frequency of the i8253/i8254 PIT */ | 274 | /* The clock frequency of the i8253/i8254 PIT */ |
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 722b0130aa94..d6c09c46ad06 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
| @@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr, | |||
| 429 | * sr: SEND or RECV | 429 | * sr: SEND or RECV |
| 430 | */ | 430 | */ |
| 431 | static int wq_sleep(struct mqueue_inode_info *info, int sr, | 431 | static int wq_sleep(struct mqueue_inode_info *info, int sr, |
| 432 | long timeout, struct ext_wait_queue *ewp) | 432 | ktime_t *timeout, struct ext_wait_queue *ewp) |
| 433 | { | 433 | { |
| 434 | int retval; | 434 | int retval; |
| 435 | signed long time; | 435 | signed long time; |
| @@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, | |||
| 440 | set_current_state(TASK_INTERRUPTIBLE); | 440 | set_current_state(TASK_INTERRUPTIBLE); |
| 441 | 441 | ||
| 442 | spin_unlock(&info->lock); | 442 | spin_unlock(&info->lock); |
| 443 | time = schedule_timeout(timeout); | 443 | time = schedule_hrtimeout_range_clock(timeout, |
| 444 | HRTIMER_MODE_ABS, 0, CLOCK_REALTIME); | ||
| 444 | 445 | ||
| 445 | while (ewp->state == STATE_PENDING) | 446 | while (ewp->state == STATE_PENDING) |
| 446 | cpu_relax(); | 447 | cpu_relax(); |
| @@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info) | |||
| 552 | wake_up(&info->wait_q); | 553 | wake_up(&info->wait_q); |
| 553 | } | 554 | } |
| 554 | 555 | ||
| 555 | static long prepare_timeout(struct timespec *p) | 556 | static int prepare_timeout(const struct timespec __user *u_abs_timeout, |
| 557 | ktime_t *expires, struct timespec *ts) | ||
| 556 | { | 558 | { |
| 557 | struct timespec nowts; | 559 | if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec))) |
| 558 | long timeout; | 560 | return -EFAULT; |
| 559 | 561 | if (!timespec_valid(ts)) | |
| 560 | if (p) { | 562 | return -EINVAL; |
| 561 | if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0 | ||
| 562 | || p->tv_nsec >= NSEC_PER_SEC)) | ||
| 563 | return -EINVAL; | ||
| 564 | nowts = CURRENT_TIME; | ||
| 565 | /* first subtract as jiffies can't be too big */ | ||
| 566 | p->tv_sec -= nowts.tv_sec; | ||
| 567 | if (p->tv_nsec < nowts.tv_nsec) { | ||
| 568 | p->tv_nsec += NSEC_PER_SEC; | ||
| 569 | p->tv_sec--; | ||
| 570 | } | ||
| 571 | p->tv_nsec -= nowts.tv_nsec; | ||
| 572 | if (p->tv_sec < 0) | ||
| 573 | return 0; | ||
| 574 | |||
| 575 | timeout = timespec_to_jiffies(p) + 1; | ||
| 576 | } else | ||
| 577 | return MAX_SCHEDULE_TIMEOUT; | ||
| 578 | 563 | ||
| 579 | return timeout; | 564 | *expires = timespec_to_ktime(*ts); |
| 565 | return 0; | ||
| 580 | } | 566 | } |
| 581 | 567 | ||
| 582 | static void remove_notification(struct mqueue_inode_info *info) | 568 | static void remove_notification(struct mqueue_inode_info *info) |
| @@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
| 862 | struct ext_wait_queue *receiver; | 848 | struct ext_wait_queue *receiver; |
| 863 | struct msg_msg *msg_ptr; | 849 | struct msg_msg *msg_ptr; |
| 864 | struct mqueue_inode_info *info; | 850 | struct mqueue_inode_info *info; |
| 865 | struct timespec ts, *p = NULL; | 851 | ktime_t expires, *timeout = NULL; |
| 866 | long timeout; | 852 | struct timespec ts; |
| 867 | int ret; | 853 | int ret; |
| 868 | 854 | ||
| 869 | if (u_abs_timeout) { | 855 | if (u_abs_timeout) { |
| 870 | if (copy_from_user(&ts, u_abs_timeout, | 856 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
| 871 | sizeof(struct timespec))) | 857 | if (res) |
| 872 | return -EFAULT; | 858 | return res; |
| 873 | p = &ts; | 859 | timeout = &expires; |
| 874 | } | 860 | } |
| 875 | 861 | ||
| 876 | if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) | 862 | if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) |
| 877 | return -EINVAL; | 863 | return -EINVAL; |
| 878 | 864 | ||
| 879 | audit_mq_sendrecv(mqdes, msg_len, msg_prio, p); | 865 | audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); |
| 880 | timeout = prepare_timeout(p); | ||
| 881 | 866 | ||
| 882 | filp = fget(mqdes); | 867 | filp = fget(mqdes); |
| 883 | if (unlikely(!filp)) { | 868 | if (unlikely(!filp)) { |
| @@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
| 919 | if (filp->f_flags & O_NONBLOCK) { | 904 | if (filp->f_flags & O_NONBLOCK) { |
| 920 | spin_unlock(&info->lock); | 905 | spin_unlock(&info->lock); |
| 921 | ret = -EAGAIN; | 906 | ret = -EAGAIN; |
| 922 | } else if (unlikely(timeout < 0)) { | ||
| 923 | spin_unlock(&info->lock); | ||
| 924 | ret = timeout; | ||
| 925 | } else { | 907 | } else { |
| 926 | wait.task = current; | 908 | wait.task = current; |
| 927 | wait.msg = (void *) msg_ptr; | 909 | wait.msg = (void *) msg_ptr; |
| @@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
| 954 | size_t, msg_len, unsigned int __user *, u_msg_prio, | 936 | size_t, msg_len, unsigned int __user *, u_msg_prio, |
| 955 | const struct timespec __user *, u_abs_timeout) | 937 | const struct timespec __user *, u_abs_timeout) |
| 956 | { | 938 | { |
| 957 | long timeout; | ||
| 958 | ssize_t ret; | 939 | ssize_t ret; |
| 959 | struct msg_msg *msg_ptr; | 940 | struct msg_msg *msg_ptr; |
| 960 | struct file *filp; | 941 | struct file *filp; |
| 961 | struct inode *inode; | 942 | struct inode *inode; |
| 962 | struct mqueue_inode_info *info; | 943 | struct mqueue_inode_info *info; |
| 963 | struct ext_wait_queue wait; | 944 | struct ext_wait_queue wait; |
| 964 | struct timespec ts, *p = NULL; | 945 | ktime_t expires, *timeout = NULL; |
| 946 | struct timespec ts; | ||
| 965 | 947 | ||
| 966 | if (u_abs_timeout) { | 948 | if (u_abs_timeout) { |
| 967 | if (copy_from_user(&ts, u_abs_timeout, | 949 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
| 968 | sizeof(struct timespec))) | 950 | if (res) |
| 969 | return -EFAULT; | 951 | return res; |
| 970 | p = &ts; | 952 | timeout = &expires; |
| 971 | } | 953 | } |
| 972 | 954 | ||
| 973 | audit_mq_sendrecv(mqdes, msg_len, 0, p); | 955 | audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); |
| 974 | timeout = prepare_timeout(p); | ||
| 975 | 956 | ||
| 976 | filp = fget(mqdes); | 957 | filp = fget(mqdes); |
| 977 | if (unlikely(!filp)) { | 958 | if (unlikely(!filp)) { |
| @@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
| 1003 | if (filp->f_flags & O_NONBLOCK) { | 984 | if (filp->f_flags & O_NONBLOCK) { |
| 1004 | spin_unlock(&info->lock); | 985 | spin_unlock(&info->lock); |
| 1005 | ret = -EAGAIN; | 986 | ret = -EAGAIN; |
| 1006 | msg_ptr = NULL; | ||
| 1007 | } else if (unlikely(timeout < 0)) { | ||
| 1008 | spin_unlock(&info->lock); | ||
| 1009 | ret = timeout; | ||
| 1010 | msg_ptr = NULL; | ||
| 1011 | } else { | 987 | } else { |
| 1012 | wait.task = current; | 988 | wait.task = current; |
| 1013 | wait.state = STATE_NONE; | 989 | wait.state = STATE_NONE; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e97..b9b134b35088 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -1749,35 +1749,15 @@ void __init hrtimers_init(void) | |||
| 1749 | } | 1749 | } |
| 1750 | 1750 | ||
| 1751 | /** | 1751 | /** |
| 1752 | * schedule_hrtimeout_range - sleep until timeout | 1752 | * schedule_hrtimeout_range_clock - sleep until timeout |
| 1753 | * @expires: timeout value (ktime_t) | 1753 | * @expires: timeout value (ktime_t) |
| 1754 | * @delta: slack in expires timeout (ktime_t) | 1754 | * @delta: slack in expires timeout (ktime_t) |
| 1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | 1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL |
| 1756 | * | 1756 | * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME |
| 1757 | * Make the current task sleep until the given expiry time has | ||
| 1758 | * elapsed. The routine will return immediately unless | ||
| 1759 | * the current task state has been set (see set_current_state()). | ||
| 1760 | * | ||
| 1761 | * The @delta argument gives the kernel the freedom to schedule the | ||
| 1762 | * actual wakeup to a time that is both power and performance friendly. | ||
| 1763 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
| 1764 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
| 1765 | * | ||
| 1766 | * You can set the task state as follows - | ||
| 1767 | * | ||
| 1768 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
| 1769 | * pass before the routine returns. | ||
| 1770 | * | ||
| 1771 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
| 1772 | * delivered to the current task. | ||
| 1773 | * | ||
| 1774 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
| 1775 | * routine returns. | ||
| 1776 | * | ||
| 1777 | * Returns 0 when the timer has expired otherwise -EINTR | ||
| 1778 | */ | 1757 | */ |
| 1779 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | 1758 | int __sched |
| 1780 | const enum hrtimer_mode mode) | 1759 | schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, |
| 1760 | const enum hrtimer_mode mode, int clock) | ||
| 1781 | { | 1761 | { |
| 1782 | struct hrtimer_sleeper t; | 1762 | struct hrtimer_sleeper t; |
| 1783 | 1763 | ||
| @@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
| 1799 | return -EINTR; | 1779 | return -EINTR; |
| 1800 | } | 1780 | } |
| 1801 | 1781 | ||
| 1802 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); | 1782 | hrtimer_init_on_stack(&t.timer, clock, mode); |
| 1803 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | 1783 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
| 1804 | 1784 | ||
| 1805 | hrtimer_init_sleeper(&t, current); | 1785 | hrtimer_init_sleeper(&t, current); |
| @@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
| 1818 | 1798 | ||
| 1819 | return !t.task ? 0 : -EINTR; | 1799 | return !t.task ? 0 : -EINTR; |
| 1820 | } | 1800 | } |
| 1801 | |||
| 1802 | /** | ||
| 1803 | * schedule_hrtimeout_range - sleep until timeout | ||
| 1804 | * @expires: timeout value (ktime_t) | ||
| 1805 | * @delta: slack in expires timeout (ktime_t) | ||
| 1806 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
| 1807 | * | ||
| 1808 | * Make the current task sleep until the given expiry time has | ||
| 1809 | * elapsed. The routine will return immediately unless | ||
| 1810 | * the current task state has been set (see set_current_state()). | ||
| 1811 | * | ||
| 1812 | * The @delta argument gives the kernel the freedom to schedule the | ||
| 1813 | * actual wakeup to a time that is both power and performance friendly. | ||
| 1814 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
| 1815 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
| 1816 | * | ||
| 1817 | * You can set the task state as follows - | ||
| 1818 | * | ||
| 1819 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
| 1820 | * pass before the routine returns. | ||
| 1821 | * | ||
| 1822 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
| 1823 | * delivered to the current task. | ||
| 1824 | * | ||
| 1825 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
| 1826 | * routine returns. | ||
| 1827 | * | ||
| 1828 | * Returns 0 when the timer has expired otherwise -EINTR | ||
| 1829 | */ | ||
| 1830 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | ||
| 1831 | const enum hrtimer_mode mode) | ||
| 1832 | { | ||
| 1833 | return schedule_hrtimeout_range_clock(expires, delta, mode, | ||
| 1834 | CLOCK_MONOTONIC); | ||
| 1835 | } | ||
| 1821 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); | 1836 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); |
| 1822 | 1837 | ||
| 1823 | /** | 1838 | /** |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bc7704b3a443..799f360d1475 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -11,19 +11,18 @@ | |||
| 11 | #include <trace/events/timer.h> | 11 | #include <trace/events/timer.h> |
| 12 | 12 | ||
| 13 | /* | 13 | /* |
| 14 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 14 | * Called after updating RLIMIT_CPU to run cpu timer and update |
| 15 | * tsk->signal->cputime_expires expiration cache if necessary. Needs | ||
| 16 | * siglock protection since other code may update expiration cache as | ||
| 17 | * well. | ||
| 15 | */ | 18 | */ |
| 16 | void update_rlimit_cpu(unsigned long rlim_new) | 19 | void update_rlimit_cpu(unsigned long rlim_new) |
| 17 | { | 20 | { |
| 18 | cputime_t cputime = secs_to_cputime(rlim_new); | 21 | cputime_t cputime = secs_to_cputime(rlim_new); |
| 19 | struct signal_struct *const sig = current->signal; | ||
| 20 | 22 | ||
| 21 | if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || | 23 | spin_lock_irq(¤t->sighand->siglock); |
| 22 | cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { | 24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); |
| 23 | spin_lock_irq(¤t->sighand->siglock); | 25 | spin_unlock_irq(¤t->sighand->siglock); |
| 24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
| 25 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 26 | } | ||
| 27 | } | 26 | } |
| 28 | 27 | ||
| 29 | static int check_clock(const clockid_t which_clock) | 28 | static int check_clock(const clockid_t which_clock) |
| @@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) | |||
| 548 | cputime_gt(expires, new_exp); | 547 | cputime_gt(expires, new_exp); |
| 549 | } | 548 | } |
| 550 | 549 | ||
| 551 | static inline int expires_le(cputime_t expires, cputime_t new_exp) | ||
| 552 | { | ||
| 553 | return !cputime_eq(expires, cputime_zero) && | ||
| 554 | cputime_le(expires, new_exp); | ||
| 555 | } | ||
| 556 | /* | 550 | /* |
| 557 | * Insert the timer on the appropriate list before any timers that | 551 | * Insert the timer on the appropriate list before any timers that |
| 558 | * expire later. This must be called with the tasklist_lock held | 552 | * expire later. This must be called with the tasklist_lock held |
| 559 | * for reading, and interrupts disabled. | 553 | * for reading, interrupts disabled and p->sighand->siglock taken. |
| 560 | */ | 554 | */ |
| 561 | static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | 555 | static void arm_timer(struct k_itimer *timer) |
| 562 | { | 556 | { |
| 563 | struct task_struct *p = timer->it.cpu.task; | 557 | struct task_struct *p = timer->it.cpu.task; |
| 564 | struct list_head *head, *listpos; | 558 | struct list_head *head, *listpos; |
| 559 | struct task_cputime *cputime_expires; | ||
| 565 | struct cpu_timer_list *const nt = &timer->it.cpu; | 560 | struct cpu_timer_list *const nt = &timer->it.cpu; |
| 566 | struct cpu_timer_list *next; | 561 | struct cpu_timer_list *next; |
| 567 | unsigned long i; | ||
| 568 | 562 | ||
| 569 | head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? | 563 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
| 570 | p->cpu_timers : p->signal->cpu_timers); | 564 | head = p->cpu_timers; |
| 565 | cputime_expires = &p->cputime_expires; | ||
| 566 | } else { | ||
| 567 | head = p->signal->cpu_timers; | ||
| 568 | cputime_expires = &p->signal->cputime_expires; | ||
| 569 | } | ||
| 571 | head += CPUCLOCK_WHICH(timer->it_clock); | 570 | head += CPUCLOCK_WHICH(timer->it_clock); |
| 572 | 571 | ||
| 573 | BUG_ON(!irqs_disabled()); | ||
| 574 | spin_lock(&p->sighand->siglock); | ||
| 575 | |||
| 576 | listpos = head; | 572 | listpos = head; |
| 577 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 573 | list_for_each_entry(next, head, entry) { |
| 578 | list_for_each_entry(next, head, entry) { | 574 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) |
| 579 | if (next->expires.sched > nt->expires.sched) | 575 | break; |
| 580 | break; | 576 | listpos = &next->entry; |
| 581 | listpos = &next->entry; | ||
| 582 | } | ||
| 583 | } else { | ||
| 584 | list_for_each_entry(next, head, entry) { | ||
| 585 | if (cputime_gt(next->expires.cpu, nt->expires.cpu)) | ||
| 586 | break; | ||
| 587 | listpos = &next->entry; | ||
| 588 | } | ||
| 589 | } | 577 | } |
| 590 | list_add(&nt->entry, listpos); | 578 | list_add(&nt->entry, listpos); |
| 591 | 579 | ||
| 592 | if (listpos == head) { | 580 | if (listpos == head) { |
| 581 | union cpu_time_count *exp = &nt->expires; | ||
| 582 | |||
| 593 | /* | 583 | /* |
| 594 | * We are the new earliest-expiring timer. | 584 | * We are the new earliest-expiring POSIX 1.b timer, hence |
| 595 | * If we are a thread timer, there can always | 585 | * need to update expiration cache. Take into account that |
| 596 | * be a process timer telling us to stop earlier. | 586 | * for process timers we share expiration cache with itimers |
| 587 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. | ||
| 597 | */ | 588 | */ |
| 598 | 589 | ||
| 599 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 590 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
| 600 | union cpu_time_count *exp = &nt->expires; | 591 | case CPUCLOCK_PROF: |
| 601 | 592 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | |
| 602 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 593 | cputime_expires->prof_exp = exp->cpu; |
| 603 | default: | 594 | break; |
| 604 | BUG(); | 595 | case CPUCLOCK_VIRT: |
| 605 | case CPUCLOCK_PROF: | 596 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) |
| 606 | if (expires_gt(p->cputime_expires.prof_exp, | 597 | cputime_expires->virt_exp = exp->cpu; |
| 607 | exp->cpu)) | 598 | break; |
| 608 | p->cputime_expires.prof_exp = exp->cpu; | 599 | case CPUCLOCK_SCHED: |
| 609 | break; | 600 | if (cputime_expires->sched_exp == 0 || |
| 610 | case CPUCLOCK_VIRT: | 601 | cputime_expires->sched_exp > exp->sched) |
| 611 | if (expires_gt(p->cputime_expires.virt_exp, | 602 | cputime_expires->sched_exp = exp->sched; |
| 612 | exp->cpu)) | 603 | break; |
| 613 | p->cputime_expires.virt_exp = exp->cpu; | ||
| 614 | break; | ||
| 615 | case CPUCLOCK_SCHED: | ||
| 616 | if (p->cputime_expires.sched_exp == 0 || | ||
| 617 | p->cputime_expires.sched_exp > exp->sched) | ||
| 618 | p->cputime_expires.sched_exp = | ||
| 619 | exp->sched; | ||
| 620 | break; | ||
| 621 | } | ||
| 622 | } else { | ||
| 623 | struct signal_struct *const sig = p->signal; | ||
| 624 | union cpu_time_count *exp = &timer->it.cpu.expires; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * For a process timer, set the cached expiration time. | ||
| 628 | */ | ||
| 629 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | ||
| 630 | default: | ||
| 631 | BUG(); | ||
| 632 | case CPUCLOCK_VIRT: | ||
| 633 | if (expires_le(sig->it[CPUCLOCK_VIRT].expires, | ||
| 634 | exp->cpu)) | ||
| 635 | break; | ||
| 636 | sig->cputime_expires.virt_exp = exp->cpu; | ||
| 637 | break; | ||
| 638 | case CPUCLOCK_PROF: | ||
| 639 | if (expires_le(sig->it[CPUCLOCK_PROF].expires, | ||
| 640 | exp->cpu)) | ||
| 641 | break; | ||
| 642 | i = sig->rlim[RLIMIT_CPU].rlim_cur; | ||
| 643 | if (i != RLIM_INFINITY && | ||
| 644 | i <= cputime_to_secs(exp->cpu)) | ||
| 645 | break; | ||
| 646 | sig->cputime_expires.prof_exp = exp->cpu; | ||
| 647 | break; | ||
| 648 | case CPUCLOCK_SCHED: | ||
| 649 | sig->cputime_expires.sched_exp = exp->sched; | ||
| 650 | break; | ||
| 651 | } | ||
| 652 | } | 604 | } |
| 653 | } | 605 | } |
| 654 | |||
| 655 | spin_unlock(&p->sighand->siglock); | ||
| 656 | } | 606 | } |
| 657 | 607 | ||
| 658 | /* | 608 | /* |
| @@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
| 660 | */ | 610 | */ |
| 661 | static void cpu_timer_fire(struct k_itimer *timer) | 611 | static void cpu_timer_fire(struct k_itimer *timer) |
| 662 | { | 612 | { |
| 663 | if (unlikely(timer->sigq == NULL)) { | 613 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { |
| 614 | /* | ||
| 615 | * User don't want any signal. | ||
| 616 | */ | ||
| 617 | timer->it.cpu.expires.sched = 0; | ||
| 618 | } else if (unlikely(timer->sigq == NULL)) { | ||
| 664 | /* | 619 | /* |
| 665 | * This a special case for clock_nanosleep, | 620 | * This a special case for clock_nanosleep, |
| 666 | * not a normal timer from sys_timer_create. | 621 | * not a normal timer from sys_timer_create. |
| @@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 721 | struct itimerspec *new, struct itimerspec *old) | 676 | struct itimerspec *new, struct itimerspec *old) |
| 722 | { | 677 | { |
| 723 | struct task_struct *p = timer->it.cpu.task; | 678 | struct task_struct *p = timer->it.cpu.task; |
| 724 | union cpu_time_count old_expires, new_expires, val; | 679 | union cpu_time_count old_expires, new_expires, old_incr, val; |
| 725 | int ret; | 680 | int ret; |
| 726 | 681 | ||
| 727 | if (unlikely(p == NULL)) { | 682 | if (unlikely(p == NULL)) { |
| @@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 752 | BUG_ON(!irqs_disabled()); | 707 | BUG_ON(!irqs_disabled()); |
| 753 | 708 | ||
| 754 | ret = 0; | 709 | ret = 0; |
| 710 | old_incr = timer->it.cpu.incr; | ||
| 755 | spin_lock(&p->sighand->siglock); | 711 | spin_lock(&p->sighand->siglock); |
| 756 | old_expires = timer->it.cpu.expires; | 712 | old_expires = timer->it.cpu.expires; |
| 757 | if (unlikely(timer->it.cpu.firing)) { | 713 | if (unlikely(timer->it.cpu.firing)) { |
| @@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 759 | ret = TIMER_RETRY; | 715 | ret = TIMER_RETRY; |
| 760 | } else | 716 | } else |
| 761 | list_del_init(&timer->it.cpu.entry); | 717 | list_del_init(&timer->it.cpu.entry); |
| 762 | spin_unlock(&p->sighand->siglock); | ||
| 763 | 718 | ||
| 764 | /* | 719 | /* |
| 765 | * We need to sample the current value to convert the new | 720 | * We need to sample the current value to convert the new |
| @@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 813 | * disable this firing since we are already reporting | 768 | * disable this firing since we are already reporting |
| 814 | * it as an overrun (thanks to bump_cpu_timer above). | 769 | * it as an overrun (thanks to bump_cpu_timer above). |
| 815 | */ | 770 | */ |
| 771 | spin_unlock(&p->sighand->siglock); | ||
| 816 | read_unlock(&tasklist_lock); | 772 | read_unlock(&tasklist_lock); |
| 817 | goto out; | 773 | goto out; |
| 818 | } | 774 | } |
| @@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 828 | */ | 784 | */ |
| 829 | timer->it.cpu.expires = new_expires; | 785 | timer->it.cpu.expires = new_expires; |
| 830 | if (new_expires.sched != 0 && | 786 | if (new_expires.sched != 0 && |
| 831 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
| 832 | cpu_time_before(timer->it_clock, val, new_expires)) { | 787 | cpu_time_before(timer->it_clock, val, new_expires)) { |
| 833 | arm_timer(timer, val); | 788 | arm_timer(timer); |
| 834 | } | 789 | } |
| 835 | 790 | ||
| 791 | spin_unlock(&p->sighand->siglock); | ||
| 836 | read_unlock(&tasklist_lock); | 792 | read_unlock(&tasklist_lock); |
| 837 | 793 | ||
| 838 | /* | 794 | /* |
| @@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 853 | timer->it_overrun = -1; | 809 | timer->it_overrun = -1; |
| 854 | 810 | ||
| 855 | if (new_expires.sched != 0 && | 811 | if (new_expires.sched != 0 && |
| 856 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
| 857 | !cpu_time_before(timer->it_clock, val, new_expires)) { | 812 | !cpu_time_before(timer->it_clock, val, new_expires)) { |
| 858 | /* | 813 | /* |
| 859 | * The designated time already passed, so we notify | 814 | * The designated time already passed, so we notify |
| @@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
| 867 | out: | 822 | out: |
| 868 | if (old) { | 823 | if (old) { |
| 869 | sample_to_timespec(timer->it_clock, | 824 | sample_to_timespec(timer->it_clock, |
| 870 | timer->it.cpu.incr, &old->it_interval); | 825 | old_incr, &old->it_interval); |
| 871 | } | 826 | } |
| 872 | return ret; | 827 | return ret; |
| 873 | } | 828 | } |
| @@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
| 927 | read_unlock(&tasklist_lock); | 882 | read_unlock(&tasklist_lock); |
| 928 | } | 883 | } |
| 929 | 884 | ||
| 930 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | ||
| 931 | if (timer->it.cpu.incr.sched == 0 && | ||
| 932 | cpu_time_before(timer->it_clock, | ||
| 933 | timer->it.cpu.expires, now)) { | ||
| 934 | /* | ||
| 935 | * Do-nothing timer expired and has no reload, | ||
| 936 | * so it's as if it was never set. | ||
| 937 | */ | ||
| 938 | timer->it.cpu.expires.sched = 0; | ||
| 939 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | ||
| 940 | return; | ||
| 941 | } | ||
| 942 | /* | ||
| 943 | * Account for any expirations and reloads that should | ||
| 944 | * have happened. | ||
| 945 | */ | ||
| 946 | bump_cpu_timer(timer, now); | ||
| 947 | } | ||
| 948 | |||
| 949 | if (unlikely(clear_dead)) { | 885 | if (unlikely(clear_dead)) { |
| 950 | /* | 886 | /* |
| 951 | * We've noticed that the thread is dead, but | 887 | * We've noticed that the thread is dead, but |
| @@ -1270,6 +1206,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1270 | goto out; | 1206 | goto out; |
| 1271 | } | 1207 | } |
| 1272 | read_lock(&tasklist_lock); /* arm_timer needs it. */ | 1208 | read_lock(&tasklist_lock); /* arm_timer needs it. */ |
| 1209 | spin_lock(&p->sighand->siglock); | ||
| 1273 | } else { | 1210 | } else { |
| 1274 | read_lock(&tasklist_lock); | 1211 | read_lock(&tasklist_lock); |
| 1275 | if (unlikely(p->signal == NULL)) { | 1212 | if (unlikely(p->signal == NULL)) { |
| @@ -1290,6 +1227,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1290 | clear_dead_task(timer, now); | 1227 | clear_dead_task(timer, now); |
| 1291 | goto out_unlock; | 1228 | goto out_unlock; |
| 1292 | } | 1229 | } |
| 1230 | spin_lock(&p->sighand->siglock); | ||
| 1293 | cpu_timer_sample_group(timer->it_clock, p, &now); | 1231 | cpu_timer_sample_group(timer->it_clock, p, &now); |
| 1294 | bump_cpu_timer(timer, now); | 1232 | bump_cpu_timer(timer, now); |
| 1295 | /* Leave the tasklist_lock locked for the call below. */ | 1233 | /* Leave the tasklist_lock locked for the call below. */ |
| @@ -1298,7 +1236,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
| 1298 | /* | 1236 | /* |
| 1299 | * Now re-arm for the new expiry time. | 1237 | * Now re-arm for the new expiry time. |
| 1300 | */ | 1238 | */ |
| 1301 | arm_timer(timer, now); | 1239 | BUG_ON(!irqs_disabled()); |
| 1240 | arm_timer(timer); | ||
| 1241 | spin_unlock(&p->sighand->siglock); | ||
| 1302 | 1242 | ||
| 1303 | out_unlock: | 1243 | out_unlock: |
| 1304 | read_unlock(&tasklist_lock); | 1244 | read_unlock(&tasklist_lock); |
| @@ -1390,7 +1330,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
| 1390 | return 1; | 1330 | return 1; |
| 1391 | } | 1331 | } |
| 1392 | 1332 | ||
| 1393 | return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; | 1333 | return 0; |
| 1394 | } | 1334 | } |
| 1395 | 1335 | ||
| 1396 | /* | 1336 | /* |
| @@ -1456,21 +1396,23 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
| 1456 | } | 1396 | } |
| 1457 | 1397 | ||
| 1458 | /* | 1398 | /* |
| 1459 | * Set one of the process-wide special case CPU timers. | 1399 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
| 1460 | * The tsk->sighand->siglock must be held by the caller. | 1400 | * The tsk->sighand->siglock must be held by the caller. |
| 1461 | * The *newval argument is relative and we update it to be absolute, *oldval | ||
| 1462 | * is absolute and we update it to be relative. | ||
| 1463 | */ | 1401 | */ |
| 1464 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1402 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
| 1465 | cputime_t *newval, cputime_t *oldval) | 1403 | cputime_t *newval, cputime_t *oldval) |
| 1466 | { | 1404 | { |
| 1467 | union cpu_time_count now; | 1405 | union cpu_time_count now; |
| 1468 | struct list_head *head; | ||
| 1469 | 1406 | ||
| 1470 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1407 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
| 1471 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1408 | cpu_timer_sample_group(clock_idx, tsk, &now); |
| 1472 | 1409 | ||
| 1473 | if (oldval) { | 1410 | if (oldval) { |
| 1411 | /* | ||
| 1412 | * We are setting itimer. The *oldval is absolute and we update | ||
| 1413 | * it to be relative, *newval argument is relative and we update | ||
| 1414 | * it to be absolute. | ||
| 1415 | */ | ||
| 1474 | if (!cputime_eq(*oldval, cputime_zero)) { | 1416 | if (!cputime_eq(*oldval, cputime_zero)) { |
| 1475 | if (cputime_le(*oldval, now.cpu)) { | 1417 | if (cputime_le(*oldval, now.cpu)) { |
| 1476 | /* Just about to fire. */ | 1418 | /* Just about to fire. */ |
| @@ -1483,33 +1425,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
| 1483 | if (cputime_eq(*newval, cputime_zero)) | 1425 | if (cputime_eq(*newval, cputime_zero)) |
| 1484 | return; | 1426 | return; |
| 1485 | *newval = cputime_add(*newval, now.cpu); | 1427 | *newval = cputime_add(*newval, now.cpu); |
| 1486 | |||
| 1487 | /* | ||
| 1488 | * If the RLIMIT_CPU timer will expire before the | ||
| 1489 | * ITIMER_PROF timer, we have nothing else to do. | ||
| 1490 | */ | ||
| 1491 | if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur | ||
| 1492 | < cputime_to_secs(*newval)) | ||
| 1493 | return; | ||
| 1494 | } | 1428 | } |
| 1495 | 1429 | ||
| 1496 | /* | 1430 | /* |
| 1497 | * Check whether there are any process timers already set to fire | 1431 | * Update expiration cache if we are the earliest timer, or eventually |
| 1498 | * before this one. If so, we don't have anything more to do. | 1432 | * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. |
| 1499 | */ | 1433 | */ |
| 1500 | head = &tsk->signal->cpu_timers[clock_idx]; | 1434 | switch (clock_idx) { |
| 1501 | if (list_empty(head) || | 1435 | case CPUCLOCK_PROF: |
| 1502 | cputime_ge(list_first_entry(head, | 1436 | if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) |
| 1503 | struct cpu_timer_list, entry)->expires.cpu, | ||
| 1504 | *newval)) { | ||
| 1505 | switch (clock_idx) { | ||
| 1506 | case CPUCLOCK_PROF: | ||
| 1507 | tsk->signal->cputime_expires.prof_exp = *newval; | 1437 | tsk->signal->cputime_expires.prof_exp = *newval; |
| 1508 | break; | 1438 | break; |
| 1509 | case CPUCLOCK_VIRT: | 1439 | case CPUCLOCK_VIRT: |
| 1440 | if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) | ||
| 1510 | tsk->signal->cputime_expires.virt_exp = *newval; | 1441 | tsk->signal->cputime_expires.virt_exp = *newval; |
| 1511 | break; | 1442 | break; |
| 1512 | } | ||
| 1513 | } | 1443 | } |
| 1514 | } | 1444 | } |
| 1515 | 1445 | ||
diff --git a/kernel/time.c b/kernel/time.c index 656dccfe1cbb..50612faa9baf 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, | |||
| 132 | */ | 132 | */ |
| 133 | static inline void warp_clock(void) | 133 | static inline void warp_clock(void) |
| 134 | { | 134 | { |
| 135 | write_seqlock_irq(&xtime_lock); | 135 | struct timespec delta, adjust; |
| 136 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 136 | delta.tv_sec = sys_tz.tz_minuteswest * 60; |
| 137 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 137 | delta.tv_nsec = 0; |
| 138 | update_xtime_cache(0); | 138 | adjust = timespec_add_safe(current_kernel_time(), delta); |
| 139 | write_sequnlock_irq(&xtime_lock); | 139 | do_settimeofday(&adjust); |
| 140 | clock_was_set(); | ||
| 141 | } | 140 | } |
| 142 | 141 | ||
| 143 | /* | 142 | /* |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7c0f180d6e9d..c63116863a80 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -69,7 +69,7 @@ static s64 time_freq; | |||
| 69 | /* time at last adjustment (secs): */ | 69 | /* time at last adjustment (secs): */ |
| 70 | static long time_reftime; | 70 | static long time_reftime; |
| 71 | 71 | ||
| 72 | long time_adjust; | 72 | static long time_adjust; |
| 73 | 73 | ||
| 74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ | 74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ |
| 75 | static s64 ntp_tick_adj; | 75 | static s64 ntp_tick_adj; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 39f6177fafac..caf8d4d4f5c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -165,13 +165,6 @@ struct timespec raw_time; | |||
| 165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
| 166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
| 167 | 167 | ||
| 168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
| 169 | void update_xtime_cache(u64 nsec) | ||
| 170 | { | ||
| 171 | xtime_cache = xtime; | ||
| 172 | timespec_add_ns(&xtime_cache, nsec); | ||
| 173 | } | ||
| 174 | |||
| 175 | /* must hold xtime_lock */ | 168 | /* must hold xtime_lock */ |
| 176 | void timekeeping_leap_insert(int leapsecond) | 169 | void timekeeping_leap_insert(int leapsecond) |
| 177 | { | 170 | { |
| @@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) | |||
| 332 | 325 | ||
| 333 | xtime = *tv; | 326 | xtime = *tv; |
| 334 | 327 | ||
| 335 | update_xtime_cache(0); | ||
| 336 | |||
| 337 | timekeeper.ntp_error = 0; | 328 | timekeeper.ntp_error = 0; |
| 338 | ntp_clear(); | 329 | ntp_clear(); |
| 339 | 330 | ||
| @@ -559,7 +550,6 @@ void __init timekeeping_init(void) | |||
| 559 | } | 550 | } |
| 560 | set_normalized_timespec(&wall_to_monotonic, | 551 | set_normalized_timespec(&wall_to_monotonic, |
| 561 | -boot.tv_sec, -boot.tv_nsec); | 552 | -boot.tv_sec, -boot.tv_nsec); |
| 562 | update_xtime_cache(0); | ||
| 563 | total_sleep_time.tv_sec = 0; | 553 | total_sleep_time.tv_sec = 0; |
| 564 | total_sleep_time.tv_nsec = 0; | 554 | total_sleep_time.tv_nsec = 0; |
| 565 | write_sequnlock_irqrestore(&xtime_lock, flags); | 555 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| @@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
| 593 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
| 594 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
| 595 | } | 585 | } |
| 596 | update_xtime_cache(0); | ||
| 597 | /* re-base the last cycle value */ | 586 | /* re-base the last cycle value */ |
| 598 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
| 599 | timekeeper.ntp_error = 0; | 588 | timekeeper.ntp_error = 0; |
| @@ -788,7 +777,6 @@ void update_wall_time(void) | |||
| 788 | { | 777 | { |
| 789 | struct clocksource *clock; | 778 | struct clocksource *clock; |
| 790 | cycle_t offset; | 779 | cycle_t offset; |
| 791 | u64 nsecs; | ||
| 792 | int shift = 0, maxshift; | 780 | int shift = 0, maxshift; |
| 793 | 781 | ||
| 794 | /* Make sure we're fully resumed: */ | 782 | /* Make sure we're fully resumed: */ |
| @@ -847,7 +835,9 @@ void update_wall_time(void) | |||
| 847 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; | 835 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; |
| 848 | } | 836 | } |
| 849 | 837 | ||
| 850 | /* store full nanoseconds into xtime after rounding it up and | 838 | |
| 839 | /* | ||
| 840 | * Store full nanoseconds into xtime after rounding it up and | ||
| 851 | * add the remainder to the error difference. | 841 | * add the remainder to the error difference. |
| 852 | */ | 842 | */ |
| 853 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; | 843 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; |
| @@ -855,8 +845,15 @@ void update_wall_time(void) | |||
| 855 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 845 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
| 856 | timekeeper.ntp_error_shift; | 846 | timekeeper.ntp_error_shift; |
| 857 | 847 | ||
| 858 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | 848 | /* |
| 859 | update_xtime_cache(nsecs); | 849 | * Finally, make sure that after the rounding |
| 850 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC | ||
| 851 | */ | ||
| 852 | if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { | ||
| 853 | xtime.tv_nsec -= NSEC_PER_SEC; | ||
| 854 | xtime.tv_sec++; | ||
| 855 | second_overflow(); | ||
| 856 | } | ||
| 860 | 857 | ||
| 861 | /* check to see if there is a new clocksource to use */ | 858 | /* check to see if there is a new clocksource to use */ |
| 862 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); | 859 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
| @@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | |||
| 896 | 893 | ||
| 897 | unsigned long get_seconds(void) | 894 | unsigned long get_seconds(void) |
| 898 | { | 895 | { |
| 899 | return xtime_cache.tv_sec; | 896 | return xtime.tv_sec; |
| 900 | } | 897 | } |
| 901 | EXPORT_SYMBOL(get_seconds); | 898 | EXPORT_SYMBOL(get_seconds); |
| 902 | 899 | ||
| 903 | struct timespec __current_kernel_time(void) | 900 | struct timespec __current_kernel_time(void) |
| 904 | { | 901 | { |
| 905 | return xtime_cache; | 902 | return xtime; |
| 906 | } | 903 | } |
| 907 | 904 | ||
| 908 | struct timespec current_kernel_time(void) | 905 | struct timespec current_kernel_time(void) |
| @@ -913,7 +910,7 @@ struct timespec current_kernel_time(void) | |||
| 913 | do { | 910 | do { |
| 914 | seq = read_seqbegin(&xtime_lock); | 911 | seq = read_seqbegin(&xtime_lock); |
| 915 | 912 | ||
| 916 | now = xtime_cache; | 913 | now = xtime; |
| 917 | } while (read_seqretry(&xtime_lock, seq)); | 914 | } while (read_seqretry(&xtime_lock, seq)); |
| 918 | 915 | ||
| 919 | return now; | 916 | return now; |
| @@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void) | |||
| 928 | do { | 925 | do { |
| 929 | seq = read_seqbegin(&xtime_lock); | 926 | seq = read_seqbegin(&xtime_lock); |
| 930 | 927 | ||
| 931 | now = xtime_cache; | 928 | now = xtime; |
| 932 | mono = wall_to_monotonic; | 929 | mono = wall_to_monotonic; |
| 933 | } while (read_seqretry(&xtime_lock, seq)); | 930 | } while (read_seqretry(&xtime_lock, seq)); |
| 934 | 931 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f2771..9199f3c52215 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) | |||
| 319 | } | 319 | } |
| 320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); | 320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); |
| 321 | 321 | ||
| 322 | /** | ||
| 323 | * set_timer_slack - set the allowed slack for a timer | ||
| 324 | * @slack_hz: the amount of time (in jiffies) allowed for rounding | ||
| 325 | * | ||
| 326 | * Set the amount of time, in jiffies, that a certain timer has | ||
| 327 | * in terms of slack. By setting this value, the timer subsystem | ||
| 328 | * will schedule the actual timer somewhere between | ||
| 329 | * the time mod_timer() asks for, and that time plus the slack. | ||
| 330 | * | ||
| 331 | * By setting the slack to -1, a percentage of the delay is used | ||
| 332 | * instead. | ||
| 333 | */ | ||
| 334 | void set_timer_slack(struct timer_list *timer, int slack_hz) | ||
| 335 | { | ||
| 336 | timer->slack = slack_hz; | ||
| 337 | } | ||
| 338 | EXPORT_SYMBOL_GPL(set_timer_slack); | ||
| 339 | |||
| 322 | 340 | ||
| 323 | static inline void set_running_timer(struct tvec_base *base, | 341 | static inline void set_running_timer(struct tvec_base *base, |
| 324 | struct timer_list *timer) | 342 | struct timer_list *timer) |
| @@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer, | |||
| 550 | { | 568 | { |
| 551 | timer->entry.next = NULL; | 569 | timer->entry.next = NULL; |
| 552 | timer->base = __raw_get_cpu_var(tvec_bases); | 570 | timer->base = __raw_get_cpu_var(tvec_bases); |
| 571 | timer->slack = -1; | ||
| 553 | #ifdef CONFIG_TIMER_STATS | 572 | #ifdef CONFIG_TIMER_STATS |
| 554 | timer->start_site = NULL; | 573 | timer->start_site = NULL; |
| 555 | timer->start_pid = -1; | 574 | timer->start_pid = -1; |
| @@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) | |||
| 715 | } | 734 | } |
| 716 | EXPORT_SYMBOL(mod_timer_pending); | 735 | EXPORT_SYMBOL(mod_timer_pending); |
| 717 | 736 | ||
| 737 | /* | ||
| 738 | * Decide where to put the timer while taking the slack into account | ||
| 739 | * | ||
| 740 | * Algorithm: | ||
| 741 | * 1) calculate the maximum (absolute) time | ||
| 742 | * 2) calculate the highest bit where the expires and new max are different | ||
| 743 | * 3) use this bit to make a mask | ||
| 744 | * 4) use the bitmask to round down the maximum time, so that all last | ||
| 745 | * bits are zeros | ||
| 746 | */ | ||
| 747 | static inline | ||
| 748 | unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | ||
| 749 | { | ||
| 750 | unsigned long expires_limit, mask; | ||
| 751 | int bit; | ||
| 752 | |||
| 753 | expires_limit = expires + timer->slack; | ||
| 754 | |||
| 755 | if (timer->slack < 0) /* auto slack: use 0.4% */ | ||
| 756 | expires_limit = expires + (expires - jiffies)/256; | ||
| 757 | |||
| 758 | mask = expires ^ expires_limit; | ||
| 759 | |||
| 760 | if (mask == 0) | ||
| 761 | return expires; | ||
| 762 | |||
| 763 | bit = find_last_bit(&mask, BITS_PER_LONG); | ||
| 764 | |||
| 765 | mask = (1 << bit) - 1; | ||
| 766 | |||
| 767 | expires_limit = expires_limit & ~(mask); | ||
| 768 | |||
| 769 | return expires_limit; | ||
| 770 | } | ||
| 771 | |||
| 718 | /** | 772 | /** |
| 719 | * mod_timer - modify a timer's timeout | 773 | * mod_timer - modify a timer's timeout |
| 720 | * @timer: the timer to be modified | 774 | * @timer: the timer to be modified |
| @@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 745 | if (timer_pending(timer) && timer->expires == expires) | 799 | if (timer_pending(timer) && timer->expires == expires) |
| 746 | return 1; | 800 | return 1; |
| 747 | 801 | ||
| 802 | expires = apply_slack(timer, expires); | ||
| 803 | |||
| 748 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 804 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
| 749 | } | 805 | } |
| 750 | EXPORT_SYMBOL(mod_timer); | 806 | EXPORT_SYMBOL(mod_timer); |
| @@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
| 955 | return index; | 1011 | return index; |
| 956 | } | 1012 | } |
| 957 | 1013 | ||
| 1014 | static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), | ||
| 1015 | unsigned long data) | ||
| 1016 | { | ||
| 1017 | int preempt_count = preempt_count(); | ||
| 1018 | |||
| 1019 | #ifdef CONFIG_LOCKDEP | ||
| 1020 | /* | ||
| 1021 | * It is permissible to free the timer from inside the | ||
| 1022 | * function that is called from it, this we need to take into | ||
| 1023 | * account for lockdep too. To avoid bogus "held lock freed" | ||
| 1024 | * warnings as well as problems when looking into | ||
| 1025 | * timer->lockdep_map, make a copy and use that here. | ||
| 1026 | */ | ||
| 1027 | struct lockdep_map lockdep_map = timer->lockdep_map; | ||
| 1028 | #endif | ||
| 1029 | /* | ||
| 1030 | * Couple the lock chain with the lock chain at | ||
| 1031 | * del_timer_sync() by acquiring the lock_map around the fn() | ||
| 1032 | * call here and in del_timer_sync(). | ||
| 1033 | */ | ||
| 1034 | lock_map_acquire(&lockdep_map); | ||
| 1035 | |||
| 1036 | trace_timer_expire_entry(timer); | ||
| 1037 | fn(data); | ||
| 1038 | trace_timer_expire_exit(timer); | ||
| 1039 | |||
| 1040 | lock_map_release(&lockdep_map); | ||
| 1041 | |||
| 1042 | if (preempt_count != preempt_count()) { | ||
| 1043 | WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", | ||
| 1044 | fn, preempt_count, preempt_count()); | ||
| 1045 | /* | ||
| 1046 | * Restore the preempt count. That gives us a decent | ||
| 1047 | * chance to survive and extract information. If the | ||
| 1048 | * callback kept a lock held, bad luck, but not worse | ||
| 1049 | * than the BUG() we had. | ||
| 1050 | */ | ||
| 1051 | preempt_count() = preempt_count; | ||
| 1052 | } | ||
| 1053 | } | ||
| 1054 | |||
| 958 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 1055 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
| 959 | 1056 | ||
| 960 | /** | 1057 | /** |
| @@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
| 998 | detach_timer(timer, 1); | 1095 | detach_timer(timer, 1); |
| 999 | 1096 | ||
| 1000 | spin_unlock_irq(&base->lock); | 1097 | spin_unlock_irq(&base->lock); |
| 1001 | { | 1098 | call_timer_fn(timer, fn, data); |
| 1002 | int preempt_count = preempt_count(); | ||
| 1003 | |||
| 1004 | #ifdef CONFIG_LOCKDEP | ||
| 1005 | /* | ||
| 1006 | * It is permissible to free the timer from | ||
| 1007 | * inside the function that is called from | ||
| 1008 | * it, this we need to take into account for | ||
| 1009 | * lockdep too. To avoid bogus "held lock | ||
| 1010 | * freed" warnings as well as problems when | ||
| 1011 | * looking into timer->lockdep_map, make a | ||
| 1012 | * copy and use that here. | ||
| 1013 | */ | ||
| 1014 | struct lockdep_map lockdep_map = | ||
| 1015 | timer->lockdep_map; | ||
| 1016 | #endif | ||
| 1017 | /* | ||
| 1018 | * Couple the lock chain with the lock chain at | ||
| 1019 | * del_timer_sync() by acquiring the lock_map | ||
| 1020 | * around the fn() call here and in | ||
| 1021 | * del_timer_sync(). | ||
| 1022 | */ | ||
| 1023 | lock_map_acquire(&lockdep_map); | ||
| 1024 | |||
| 1025 | trace_timer_expire_entry(timer); | ||
| 1026 | fn(data); | ||
| 1027 | trace_timer_expire_exit(timer); | ||
| 1028 | |||
| 1029 | lock_map_release(&lockdep_map); | ||
| 1030 | |||
| 1031 | if (preempt_count != preempt_count()) { | ||
| 1032 | printk(KERN_ERR "huh, entered %p " | ||
| 1033 | "with preempt_count %08x, exited" | ||
| 1034 | " with %08x?\n", | ||
| 1035 | fn, preempt_count, | ||
| 1036 | preempt_count()); | ||
| 1037 | BUG(); | ||
| 1038 | } | ||
| 1039 | } | ||
| 1040 | spin_lock_irq(&base->lock); | 1099 | spin_lock_irq(&base->lock); |
| 1041 | } | 1100 | } |
| 1042 | } | 1101 | } |
