aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--include/linux/clocksource.h19
-rw-r--r--include/linux/hrtimer.h2
-rw-r--r--include/linux/time.h1
-rw-r--r--include/linux/timer.h10
-rw-r--r--include/linux/timex.h5
-rw-r--r--ipc/mqueue.c74
-rw-r--r--kernel/hrtimer.c67
-rw-r--r--kernel/posix-cpu-timers.c298
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/clocksource.c48
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c35
-rw-r--r--kernel/timer.c137
14 files changed, 376 insertions, 343 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d9d3fbcb705d..e7965f4a385a 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -541,6 +541,16 @@ Who: Avi Kivity <avi@redhat.com>
541 541
542---------------------------- 542----------------------------
543 543
544What: xtime, wall_to_monotonic
545When: 2.6.36+
546Files: kernel/time/timekeeping.c include/linux/time.h
547Why: Cleaning up timekeeping internal values. Please use
548 existing timekeeping accessor functions to access
549 the equivalent functionality.
550Who: John Stultz <johnstul@us.ibm.com>
551
552----------------------------
553
544What: KVM kernel-allocated memory slots 554What: KVM kernel-allocated memory slots
545When: July 2010 555When: July 2010
546Why: Since 2.6.25, kvm supports user-allocated memory slots, which are 556Why: Since 2.6.25, kvm supports user-allocated memory slots, which are
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 4bca8b60cdf7..5ea3c60c160c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
273} 273}
274 274
275 275
276/* used to install a new clocksource */
277extern int clocksource_register(struct clocksource*); 276extern int clocksource_register(struct clocksource*);
278extern void clocksource_unregister(struct clocksource*); 277extern void clocksource_unregister(struct clocksource*);
279extern void clocksource_touch_watchdog(void); 278extern void clocksource_touch_watchdog(void);
@@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs);
287extern void 286extern void
288clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); 287clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
289 288
289/*
290 * Don't call __clocksource_register_scale directly, use
291 * clocksource_register_hz/khz
292 */
293extern int
294__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
295
296static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
297{
298 return __clocksource_register_scale(cs, 1, hz);
299}
300
301static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
302{
303 return __clocksource_register_scale(cs, 1000, khz);
304}
305
306
290static inline void 307static inline void
291clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) 308clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
292{ 309{
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5d86fb2309d2..fd0c1b857d3d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
422 422
423extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, 423extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
424 const enum hrtimer_mode mode); 424 const enum hrtimer_mode mode);
425extern int schedule_hrtimeout_range_clock(ktime_t *expires,
426 unsigned long delta, const enum hrtimer_mode mode, int clock);
425extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); 427extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
426 428
427/* Soft interrupt function to run the hrtimer queues: */ 429/* Soft interrupt function to run the hrtimer queues: */
diff --git a/include/linux/time.h b/include/linux/time.h
index 6e026e45a179..ea3559f0b3f2 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
150extern int timekeeping_valid_for_hres(void); 150extern int timekeeping_valid_for_hres(void);
151extern u64 timekeeping_max_deferment(void); 151extern u64 timekeeping_max_deferment(void);
152extern void update_wall_time(void); 152extern void update_wall_time(void);
153extern void update_xtime_cache(u64 nsec);
154extern void timekeeping_leap_insert(int leapsecond); 153extern void timekeeping_leap_insert(int leapsecond);
155 154
156struct tms; 155struct tms;
diff --git a/include/linux/timer.h b/include/linux/timer.h
index a2d1eb6cb3f0..ea965b857a50 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -10,13 +10,19 @@
10struct tvec_base; 10struct tvec_base;
11 11
12struct timer_list { 12struct timer_list {
13 /*
14 * All fields that change during normal runtime grouped to the
15 * same cacheline
16 */
13 struct list_head entry; 17 struct list_head entry;
14 unsigned long expires; 18 unsigned long expires;
19 struct tvec_base *base;
15 20
16 void (*function)(unsigned long); 21 void (*function)(unsigned long);
17 unsigned long data; 22 unsigned long data;
18 23
19 struct tvec_base *base; 24 int slack;
25
20#ifdef CONFIG_TIMER_STATS 26#ifdef CONFIG_TIMER_STATS
21 void *start_site; 27 void *start_site;
22 char start_comm[16]; 28 char start_comm[16];
@@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires);
165extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); 171extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
166extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); 172extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
167 173
174extern void set_timer_slack(struct timer_list *time, int slack_hz);
175
168#define TIMER_NOT_PINNED 0 176#define TIMER_NOT_PINNED 0
169#define TIMER_PINNED 1 177#define TIMER_PINNED 1
170/* 178/*
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 7a082b32d8e1..32d852f8cbe4 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -232,13 +232,11 @@ struct timex {
232 */ 232 */
233extern unsigned long tick_usec; /* USER_HZ period (usec) */ 233extern unsigned long tick_usec; /* USER_HZ period (usec) */
234extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ 234extern unsigned long tick_nsec; /* ACTHZ period (nsec) */
235extern int tickadj; /* amount of adjustment per tick */
236 235
237/* 236/*
238 * phase-lock loop variables 237 * phase-lock loop variables
239 */ 238 */
240extern int time_status; /* clock synchronization status bits */ 239extern int time_status; /* clock synchronization status bits */
241extern long time_adjust; /* The amount of adjtime left */
242 240
243extern void ntp_init(void); 241extern void ntp_init(void);
244extern void ntp_clear(void); 242extern void ntp_clear(void);
@@ -271,9 +269,6 @@ extern void second_overflow(void);
271extern void update_ntp_one_tick(void); 269extern void update_ntp_one_tick(void);
272extern int do_adjtimex(struct timex *); 270extern int do_adjtimex(struct timex *);
273 271
274/* Don't use! Compatibility define for existing users. */
275#define tickadj (500/HZ ? : 1)
276
277int read_current_timer(unsigned long *timer_val); 272int read_current_timer(unsigned long *timer_val);
278 273
279/* The clock frequency of the i8253/i8254 PIT */ 274/* The clock frequency of the i8253/i8254 PIT */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 59a009dc54a8..5108232f93d4 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
429 * sr: SEND or RECV 429 * sr: SEND or RECV
430 */ 430 */
431static int wq_sleep(struct mqueue_inode_info *info, int sr, 431static int wq_sleep(struct mqueue_inode_info *info, int sr,
432 long timeout, struct ext_wait_queue *ewp) 432 ktime_t *timeout, struct ext_wait_queue *ewp)
433{ 433{
434 int retval; 434 int retval;
435 signed long time; 435 signed long time;
@@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
440 set_current_state(TASK_INTERRUPTIBLE); 440 set_current_state(TASK_INTERRUPTIBLE);
441 441
442 spin_unlock(&info->lock); 442 spin_unlock(&info->lock);
443 time = schedule_timeout(timeout); 443 time = schedule_hrtimeout_range_clock(timeout,
444 HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
444 445
445 while (ewp->state == STATE_PENDING) 446 while (ewp->state == STATE_PENDING)
446 cpu_relax(); 447 cpu_relax();
@@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info)
552 wake_up(&info->wait_q); 553 wake_up(&info->wait_q);
553} 554}
554 555
555static long prepare_timeout(struct timespec *p) 556static int prepare_timeout(const struct timespec __user *u_abs_timeout,
557 ktime_t *expires, struct timespec *ts)
556{ 558{
557 struct timespec nowts; 559 if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec)))
558 long timeout; 560 return -EFAULT;
559 561 if (!timespec_valid(ts))
560 if (p) { 562 return -EINVAL;
561 if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
562 || p->tv_nsec >= NSEC_PER_SEC))
563 return -EINVAL;
564 nowts = CURRENT_TIME;
565 /* first subtract as jiffies can't be too big */
566 p->tv_sec -= nowts.tv_sec;
567 if (p->tv_nsec < nowts.tv_nsec) {
568 p->tv_nsec += NSEC_PER_SEC;
569 p->tv_sec--;
570 }
571 p->tv_nsec -= nowts.tv_nsec;
572 if (p->tv_sec < 0)
573 return 0;
574
575 timeout = timespec_to_jiffies(p) + 1;
576 } else
577 return MAX_SCHEDULE_TIMEOUT;
578 563
579 return timeout; 564 *expires = timespec_to_ktime(*ts);
565 return 0;
580} 566}
581 567
582static void remove_notification(struct mqueue_inode_info *info) 568static void remove_notification(struct mqueue_inode_info *info)
@@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
862 struct ext_wait_queue *receiver; 848 struct ext_wait_queue *receiver;
863 struct msg_msg *msg_ptr; 849 struct msg_msg *msg_ptr;
864 struct mqueue_inode_info *info; 850 struct mqueue_inode_info *info;
865 struct timespec ts, *p = NULL; 851 ktime_t expires, *timeout = NULL;
866 long timeout; 852 struct timespec ts;
867 int ret; 853 int ret;
868 854
869 if (u_abs_timeout) { 855 if (u_abs_timeout) {
870 if (copy_from_user(&ts, u_abs_timeout, 856 int res = prepare_timeout(u_abs_timeout, &expires, &ts);
871 sizeof(struct timespec))) 857 if (res)
872 return -EFAULT; 858 return res;
873 p = &ts; 859 timeout = &expires;
874 } 860 }
875 861
876 if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) 862 if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
877 return -EINVAL; 863 return -EINVAL;
878 864
879 audit_mq_sendrecv(mqdes, msg_len, msg_prio, p); 865 audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
880 timeout = prepare_timeout(p);
881 866
882 filp = fget(mqdes); 867 filp = fget(mqdes);
883 if (unlikely(!filp)) { 868 if (unlikely(!filp)) {
@@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
919 if (filp->f_flags & O_NONBLOCK) { 904 if (filp->f_flags & O_NONBLOCK) {
920 spin_unlock(&info->lock); 905 spin_unlock(&info->lock);
921 ret = -EAGAIN; 906 ret = -EAGAIN;
922 } else if (unlikely(timeout < 0)) {
923 spin_unlock(&info->lock);
924 ret = timeout;
925 } else { 907 } else {
926 wait.task = current; 908 wait.task = current;
927 wait.msg = (void *) msg_ptr; 909 wait.msg = (void *) msg_ptr;
@@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
954 size_t, msg_len, unsigned int __user *, u_msg_prio, 936 size_t, msg_len, unsigned int __user *, u_msg_prio,
955 const struct timespec __user *, u_abs_timeout) 937 const struct timespec __user *, u_abs_timeout)
956{ 938{
957 long timeout;
958 ssize_t ret; 939 ssize_t ret;
959 struct msg_msg *msg_ptr; 940 struct msg_msg *msg_ptr;
960 struct file *filp; 941 struct file *filp;
961 struct inode *inode; 942 struct inode *inode;
962 struct mqueue_inode_info *info; 943 struct mqueue_inode_info *info;
963 struct ext_wait_queue wait; 944 struct ext_wait_queue wait;
964 struct timespec ts, *p = NULL; 945 ktime_t expires, *timeout = NULL;
946 struct timespec ts;
965 947
966 if (u_abs_timeout) { 948 if (u_abs_timeout) {
967 if (copy_from_user(&ts, u_abs_timeout, 949 int res = prepare_timeout(u_abs_timeout, &expires, &ts);
968 sizeof(struct timespec))) 950 if (res)
969 return -EFAULT; 951 return res;
970 p = &ts; 952 timeout = &expires;
971 } 953 }
972 954
973 audit_mq_sendrecv(mqdes, msg_len, 0, p); 955 audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
974 timeout = prepare_timeout(p);
975 956
976 filp = fget(mqdes); 957 filp = fget(mqdes);
977 if (unlikely(!filp)) { 958 if (unlikely(!filp)) {
@@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1003 if (filp->f_flags & O_NONBLOCK) { 984 if (filp->f_flags & O_NONBLOCK) {
1004 spin_unlock(&info->lock); 985 spin_unlock(&info->lock);
1005 ret = -EAGAIN; 986 ret = -EAGAIN;
1006 msg_ptr = NULL;
1007 } else if (unlikely(timeout < 0)) {
1008 spin_unlock(&info->lock);
1009 ret = timeout;
1010 msg_ptr = NULL;
1011 } else { 987 } else {
1012 wait.task = current; 988 wait.task = current;
1013 wait.state = STATE_NONE; 989 wait.state = STATE_NONE;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e97..b9b134b35088 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
1749} 1749}
1750 1750
1751/** 1751/**
1752 * schedule_hrtimeout_range - sleep until timeout 1752 * schedule_hrtimeout_range_clock - sleep until timeout
1753 * @expires: timeout value (ktime_t) 1753 * @expires: timeout value (ktime_t)
1754 * @delta: slack in expires timeout (ktime_t) 1754 * @delta: slack in expires timeout (ktime_t)
1755 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL 1755 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1756 * 1756 * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
1757 * Make the current task sleep until the given expiry time has
1758 * elapsed. The routine will return immediately unless
1759 * the current task state has been set (see set_current_state()).
1760 *
1761 * The @delta argument gives the kernel the freedom to schedule the
1762 * actual wakeup to a time that is both power and performance friendly.
1763 * The kernel give the normal best effort behavior for "@expires+@delta",
1764 * but may decide to fire the timer earlier, but no earlier than @expires.
1765 *
1766 * You can set the task state as follows -
1767 *
1768 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1769 * pass before the routine returns.
1770 *
1771 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1772 * delivered to the current task.
1773 *
1774 * The current task state is guaranteed to be TASK_RUNNING when this
1775 * routine returns.
1776 *
1777 * Returns 0 when the timer has expired otherwise -EINTR
1778 */ 1757 */
1779int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, 1758int __sched
1780 const enum hrtimer_mode mode) 1759schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
1760 const enum hrtimer_mode mode, int clock)
1781{ 1761{
1782 struct hrtimer_sleeper t; 1762 struct hrtimer_sleeper t;
1783 1763
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1799 return -EINTR; 1779 return -EINTR;
1800 } 1780 }
1801 1781
1802 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); 1782 hrtimer_init_on_stack(&t.timer, clock, mode);
1803 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 1783 hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1804 1784
1805 hrtimer_init_sleeper(&t, current); 1785 hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1818 1798
1819 return !t.task ? 0 : -EINTR; 1799 return !t.task ? 0 : -EINTR;
1820} 1800}
1801
1802/**
1803 * schedule_hrtimeout_range - sleep until timeout
1804 * @expires: timeout value (ktime_t)
1805 * @delta: slack in expires timeout (ktime_t)
1806 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1807 *
1808 * Make the current task sleep until the given expiry time has
1809 * elapsed. The routine will return immediately unless
1810 * the current task state has been set (see set_current_state()).
1811 *
1812 * The @delta argument gives the kernel the freedom to schedule the
1813 * actual wakeup to a time that is both power and performance friendly.
1814 * The kernel give the normal best effort behavior for "@expires+@delta",
1815 * but may decide to fire the timer earlier, but no earlier than @expires.
1816 *
1817 * You can set the task state as follows -
1818 *
1819 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1820 * pass before the routine returns.
1821 *
1822 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1823 * delivered to the current task.
1824 *
1825 * The current task state is guaranteed to be TASK_RUNNING when this
1826 * routine returns.
1827 *
1828 * Returns 0 when the timer has expired otherwise -EINTR
1829 */
1830int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1831 const enum hrtimer_mode mode)
1832{
1833 return schedule_hrtimeout_range_clock(expires, delta, mode,
1834 CLOCK_MONOTONIC);
1835}
1821EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); 1836EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
1822 1837
1823/** 1838/**
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bc7704b3a443..00bb252f29a2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -11,19 +11,18 @@
11#include <trace/events/timer.h> 11#include <trace/events/timer.h>
12 12
13/* 13/*
14 * Called after updating RLIMIT_CPU to set timer expiration if necessary. 14 * Called after updating RLIMIT_CPU to run cpu timer and update
15 * tsk->signal->cputime_expires expiration cache if necessary. Needs
16 * siglock protection since other code may update expiration cache as
17 * well.
15 */ 18 */
16void update_rlimit_cpu(unsigned long rlim_new) 19void update_rlimit_cpu(unsigned long rlim_new)
17{ 20{
18 cputime_t cputime = secs_to_cputime(rlim_new); 21 cputime_t cputime = secs_to_cputime(rlim_new);
19 struct signal_struct *const sig = current->signal;
20 22
21 if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || 23 spin_lock_irq(&current->sighand->siglock);
22 cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { 24 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
23 spin_lock_irq(&current->sighand->siglock); 25 spin_unlock_irq(&current->sighand->siglock);
24 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
25 spin_unlock_irq(&current->sighand->siglock);
26 }
27} 26}
28 27
29static int check_clock(const clockid_t which_clock) 28static int check_clock(const clockid_t which_clock)
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
548 cputime_gt(expires, new_exp); 547 cputime_gt(expires, new_exp);
549} 548}
550 549
551static inline int expires_le(cputime_t expires, cputime_t new_exp)
552{
553 return !cputime_eq(expires, cputime_zero) &&
554 cputime_le(expires, new_exp);
555}
556/* 550/*
557 * Insert the timer on the appropriate list before any timers that 551 * Insert the timer on the appropriate list before any timers that
558 * expire later. This must be called with the tasklist_lock held 552 * expire later. This must be called with the tasklist_lock held
559 * for reading, and interrupts disabled. 553 * for reading, interrupts disabled and p->sighand->siglock taken.
560 */ 554 */
561static void arm_timer(struct k_itimer *timer, union cpu_time_count now) 555static void arm_timer(struct k_itimer *timer)
562{ 556{
563 struct task_struct *p = timer->it.cpu.task; 557 struct task_struct *p = timer->it.cpu.task;
564 struct list_head *head, *listpos; 558 struct list_head *head, *listpos;
559 struct task_cputime *cputime_expires;
565 struct cpu_timer_list *const nt = &timer->it.cpu; 560 struct cpu_timer_list *const nt = &timer->it.cpu;
566 struct cpu_timer_list *next; 561 struct cpu_timer_list *next;
567 unsigned long i;
568 562
569 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? 563 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
570 p->cpu_timers : p->signal->cpu_timers); 564 head = p->cpu_timers;
565 cputime_expires = &p->cputime_expires;
566 } else {
567 head = p->signal->cpu_timers;
568 cputime_expires = &p->signal->cputime_expires;
569 }
571 head += CPUCLOCK_WHICH(timer->it_clock); 570 head += CPUCLOCK_WHICH(timer->it_clock);
572 571
573 BUG_ON(!irqs_disabled());
574 spin_lock(&p->sighand->siglock);
575
576 listpos = head; 572 listpos = head;
577 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { 573 list_for_each_entry(next, head, entry) {
578 list_for_each_entry(next, head, entry) { 574 if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
579 if (next->expires.sched > nt->expires.sched) 575 break;
580 break; 576 listpos = &next->entry;
581 listpos = &next->entry;
582 }
583 } else {
584 list_for_each_entry(next, head, entry) {
585 if (cputime_gt(next->expires.cpu, nt->expires.cpu))
586 break;
587 listpos = &next->entry;
588 }
589 } 577 }
590 list_add(&nt->entry, listpos); 578 list_add(&nt->entry, listpos);
591 579
592 if (listpos == head) { 580 if (listpos == head) {
581 union cpu_time_count *exp = &nt->expires;
582
593 /* 583 /*
594 * We are the new earliest-expiring timer. 584 * We are the new earliest-expiring POSIX 1.b timer, hence
595 * If we are a thread timer, there can always 585 * need to update expiration cache. Take into account that
596 * be a process timer telling us to stop earlier. 586 * for process timers we share expiration cache with itimers
587 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
597 */ 588 */
598 589
599 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 590 switch (CPUCLOCK_WHICH(timer->it_clock)) {
600 union cpu_time_count *exp = &nt->expires; 591 case CPUCLOCK_PROF:
601 592 if (expires_gt(cputime_expires->prof_exp, exp->cpu))
602 switch (CPUCLOCK_WHICH(timer->it_clock)) { 593 cputime_expires->prof_exp = exp->cpu;
603 default: 594 break;
604 BUG(); 595 case CPUCLOCK_VIRT:
605 case CPUCLOCK_PROF: 596 if (expires_gt(cputime_expires->virt_exp, exp->cpu))
606 if (expires_gt(p->cputime_expires.prof_exp, 597 cputime_expires->virt_exp = exp->cpu;
607 exp->cpu)) 598 break;
608 p->cputime_expires.prof_exp = exp->cpu; 599 case CPUCLOCK_SCHED:
609 break; 600 if (cputime_expires->sched_exp == 0 ||
610 case CPUCLOCK_VIRT: 601 cputime_expires->sched_exp > exp->sched)
611 if (expires_gt(p->cputime_expires.virt_exp, 602 cputime_expires->sched_exp = exp->sched;
612 exp->cpu)) 603 break;
613 p->cputime_expires.virt_exp = exp->cpu;
614 break;
615 case CPUCLOCK_SCHED:
616 if (p->cputime_expires.sched_exp == 0 ||
617 p->cputime_expires.sched_exp > exp->sched)
618 p->cputime_expires.sched_exp =
619 exp->sched;
620 break;
621 }
622 } else {
623 struct signal_struct *const sig = p->signal;
624 union cpu_time_count *exp = &timer->it.cpu.expires;
625
626 /*
627 * For a process timer, set the cached expiration time.
628 */
629 switch (CPUCLOCK_WHICH(timer->it_clock)) {
630 default:
631 BUG();
632 case CPUCLOCK_VIRT:
633 if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
634 exp->cpu))
635 break;
636 sig->cputime_expires.virt_exp = exp->cpu;
637 break;
638 case CPUCLOCK_PROF:
639 if (expires_le(sig->it[CPUCLOCK_PROF].expires,
640 exp->cpu))
641 break;
642 i = sig->rlim[RLIMIT_CPU].rlim_cur;
643 if (i != RLIM_INFINITY &&
644 i <= cputime_to_secs(exp->cpu))
645 break;
646 sig->cputime_expires.prof_exp = exp->cpu;
647 break;
648 case CPUCLOCK_SCHED:
649 sig->cputime_expires.sched_exp = exp->sched;
650 break;
651 }
652 } 604 }
653 } 605 }
654
655 spin_unlock(&p->sighand->siglock);
656} 606}
657 607
658/* 608/*
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
660 */ 610 */
661static void cpu_timer_fire(struct k_itimer *timer) 611static void cpu_timer_fire(struct k_itimer *timer)
662{ 612{
663 if (unlikely(timer->sigq == NULL)) { 613 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
614 /*
615 * User don't want any signal.
616 */
617 timer->it.cpu.expires.sched = 0;
618 } else if (unlikely(timer->sigq == NULL)) {
664 /* 619 /*
665 * This a special case for clock_nanosleep, 620 * This a special case for clock_nanosleep,
666 * not a normal timer from sys_timer_create. 621 * not a normal timer from sys_timer_create.
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
721 struct itimerspec *new, struct itimerspec *old) 676 struct itimerspec *new, struct itimerspec *old)
722{ 677{
723 struct task_struct *p = timer->it.cpu.task; 678 struct task_struct *p = timer->it.cpu.task;
724 union cpu_time_count old_expires, new_expires, val; 679 union cpu_time_count old_expires, new_expires, old_incr, val;
725 int ret; 680 int ret;
726 681
727 if (unlikely(p == NULL)) { 682 if (unlikely(p == NULL)) {
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
752 BUG_ON(!irqs_disabled()); 707 BUG_ON(!irqs_disabled());
753 708
754 ret = 0; 709 ret = 0;
710 old_incr = timer->it.cpu.incr;
755 spin_lock(&p->sighand->siglock); 711 spin_lock(&p->sighand->siglock);
756 old_expires = timer->it.cpu.expires; 712 old_expires = timer->it.cpu.expires;
757 if (unlikely(timer->it.cpu.firing)) { 713 if (unlikely(timer->it.cpu.firing)) {
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
759 ret = TIMER_RETRY; 715 ret = TIMER_RETRY;
760 } else 716 } else
761 list_del_init(&timer->it.cpu.entry); 717 list_del_init(&timer->it.cpu.entry);
762 spin_unlock(&p->sighand->siglock);
763 718
764 /* 719 /*
765 * We need to sample the current value to convert the new 720 * We need to sample the current value to convert the new
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
813 * disable this firing since we are already reporting 768 * disable this firing since we are already reporting
814 * it as an overrun (thanks to bump_cpu_timer above). 769 * it as an overrun (thanks to bump_cpu_timer above).
815 */ 770 */
771 spin_unlock(&p->sighand->siglock);
816 read_unlock(&tasklist_lock); 772 read_unlock(&tasklist_lock);
817 goto out; 773 goto out;
818 } 774 }
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
828 */ 784 */
829 timer->it.cpu.expires = new_expires; 785 timer->it.cpu.expires = new_expires;
830 if (new_expires.sched != 0 && 786 if (new_expires.sched != 0 &&
831 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
832 cpu_time_before(timer->it_clock, val, new_expires)) { 787 cpu_time_before(timer->it_clock, val, new_expires)) {
833 arm_timer(timer, val); 788 arm_timer(timer);
834 } 789 }
835 790
791 spin_unlock(&p->sighand->siglock);
836 read_unlock(&tasklist_lock); 792 read_unlock(&tasklist_lock);
837 793
838 /* 794 /*
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
853 timer->it_overrun = -1; 809 timer->it_overrun = -1;
854 810
855 if (new_expires.sched != 0 && 811 if (new_expires.sched != 0 &&
856 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
857 !cpu_time_before(timer->it_clock, val, new_expires)) { 812 !cpu_time_before(timer->it_clock, val, new_expires)) {
858 /* 813 /*
859 * The designated time already passed, so we notify 814 * The designated time already passed, so we notify
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
867 out: 822 out:
868 if (old) { 823 if (old) {
869 sample_to_timespec(timer->it_clock, 824 sample_to_timespec(timer->it_clock,
870 timer->it.cpu.incr, &old->it_interval); 825 old_incr, &old->it_interval);
871 } 826 }
872 return ret; 827 return ret;
873} 828}
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
927 read_unlock(&tasklist_lock); 882 read_unlock(&tasklist_lock);
928 } 883 }
929 884
930 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
931 if (timer->it.cpu.incr.sched == 0 &&
932 cpu_time_before(timer->it_clock,
933 timer->it.cpu.expires, now)) {
934 /*
935 * Do-nothing timer expired and has no reload,
936 * so it's as if it was never set.
937 */
938 timer->it.cpu.expires.sched = 0;
939 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
940 return;
941 }
942 /*
943 * Account for any expirations and reloads that should
944 * have happened.
945 */
946 bump_cpu_timer(timer, now);
947 }
948
949 if (unlikely(clear_dead)) { 885 if (unlikely(clear_dead)) {
950 /* 886 /*
951 * We've noticed that the thread is dead, but 887 * We've noticed that the thread is dead, but
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig)
1066 struct thread_group_cputimer *cputimer = &sig->cputimer; 1002 struct thread_group_cputimer *cputimer = &sig->cputimer;
1067 unsigned long flags; 1003 unsigned long flags;
1068 1004
1069 if (!cputimer->running)
1070 return;
1071
1072 spin_lock_irqsave(&cputimer->lock, flags); 1005 spin_lock_irqsave(&cputimer->lock, flags);
1073 cputimer->running = 0; 1006 cputimer->running = 0;
1074 spin_unlock_irqrestore(&cputimer->lock, flags); 1007 spin_unlock_irqrestore(&cputimer->lock, flags);
1075
1076 sig->cputime_expires.prof_exp = cputime_zero;
1077 sig->cputime_expires.virt_exp = cputime_zero;
1078 sig->cputime_expires.sched_exp = 0;
1079} 1008}
1080 1009
1081static u32 onecputick; 1010static u32 onecputick;
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
1112 } 1041 }
1113} 1042}
1114 1043
1044/**
1045 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1046 *
1047 * @cputime: The struct to compare.
1048 *
1049 * Checks @cputime to see if all fields are zero. Returns true if all fields
1050 * are zero, false if any field is nonzero.
1051 */
1052static inline int task_cputime_zero(const struct task_cputime *cputime)
1053{
1054 if (cputime_eq(cputime->utime, cputime_zero) &&
1055 cputime_eq(cputime->stime, cputime_zero) &&
1056 cputime->sum_exec_runtime == 0)
1057 return 1;
1058 return 0;
1059}
1060
1115/* 1061/*
1116 * Check for any per-thread CPU timers that have fired and move them 1062 * Check for any per-thread CPU timers that have fired and move them
1117 * off the tsk->*_timers list onto the firing list. Per-thread timers 1063 * off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk,
1129 unsigned long soft; 1075 unsigned long soft;
1130 1076
1131 /* 1077 /*
1132 * Don't sample the current process CPU clocks if there are no timers.
1133 */
1134 if (list_empty(&timers[CPUCLOCK_PROF]) &&
1135 cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
1136 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
1137 list_empty(&timers[CPUCLOCK_VIRT]) &&
1138 cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
1139 list_empty(&timers[CPUCLOCK_SCHED])) {
1140 stop_process_timers(sig);
1141 return;
1142 }
1143
1144 /*
1145 * Collect the current process totals. 1078 * Collect the current process totals.
1146 */ 1079 */
1147 thread_group_cputimer(tsk, &cputime); 1080 thread_group_cputimer(tsk, &cputime);
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk,
1230 } 1163 }
1231 } 1164 }
1232 1165
1233 if (!cputime_eq(prof_expires, cputime_zero) && 1166 sig->cputime_expires.prof_exp = prof_expires;
1234 (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || 1167 sig->cputime_expires.virt_exp = virt_expires;
1235 cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) 1168 sig->cputime_expires.sched_exp = sched_expires;
1236 sig->cputime_expires.prof_exp = prof_expires; 1169 if (task_cputime_zero(&sig->cputime_expires))
1237 if (!cputime_eq(virt_expires, cputime_zero) && 1170 stop_process_timers(sig);
1238 (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
1239 cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
1240 sig->cputime_expires.virt_exp = virt_expires;
1241 if (sched_expires != 0 &&
1242 (sig->cputime_expires.sched_exp == 0 ||
1243 sig->cputime_expires.sched_exp > sched_expires))
1244 sig->cputime_expires.sched_exp = sched_expires;
1245} 1171}
1246 1172
1247/* 1173/*
@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1270 goto out; 1196 goto out;
1271 } 1197 }
1272 read_lock(&tasklist_lock); /* arm_timer needs it. */ 1198 read_lock(&tasklist_lock); /* arm_timer needs it. */
1199 spin_lock(&p->sighand->siglock);
1273 } else { 1200 } else {
1274 read_lock(&tasklist_lock); 1201 read_lock(&tasklist_lock);
1275 if (unlikely(p->signal == NULL)) { 1202 if (unlikely(p->signal == NULL)) {
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1290 clear_dead_task(timer, now); 1217 clear_dead_task(timer, now);
1291 goto out_unlock; 1218 goto out_unlock;
1292 } 1219 }
1220 spin_lock(&p->sighand->siglock);
1293 cpu_timer_sample_group(timer->it_clock, p, &now); 1221 cpu_timer_sample_group(timer->it_clock, p, &now);
1294 bump_cpu_timer(timer, now); 1222 bump_cpu_timer(timer, now);
1295 /* Leave the tasklist_lock locked for the call below. */ 1223 /* Leave the tasklist_lock locked for the call below. */
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1298 /* 1226 /*
1299 * Now re-arm for the new expiry time. 1227 * Now re-arm for the new expiry time.
1300 */ 1228 */
1301 arm_timer(timer, now); 1229 BUG_ON(!irqs_disabled());
1230 arm_timer(timer);
1231 spin_unlock(&p->sighand->siglock);
1302 1232
1303out_unlock: 1233out_unlock:
1304 read_unlock(&tasklist_lock); 1234 read_unlock(&tasklist_lock);
@@ -1310,23 +1240,6 @@ out:
1310} 1240}
1311 1241
1312/** 1242/**
1313 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1314 *
1315 * @cputime: The struct to compare.
1316 *
1317 * Checks @cputime to see if all fields are zero. Returns true if all fields
1318 * are zero, false if any field is nonzero.
1319 */
1320static inline int task_cputime_zero(const struct task_cputime *cputime)
1321{
1322 if (cputime_eq(cputime->utime, cputime_zero) &&
1323 cputime_eq(cputime->stime, cputime_zero) &&
1324 cputime->sum_exec_runtime == 0)
1325 return 1;
1326 return 0;
1327}
1328
1329/**
1330 * task_cputime_expired - Compare two task_cputime entities. 1243 * task_cputime_expired - Compare two task_cputime entities.
1331 * 1244 *
1332 * @sample: The task_cputime structure to be checked for expiration. 1245 * @sample: The task_cputime structure to be checked for expiration.
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1382 } 1295 }
1383 1296
1384 sig = tsk->signal; 1297 sig = tsk->signal;
1385 if (!task_cputime_zero(&sig->cputime_expires)) { 1298 if (sig->cputimer.running) {
1386 struct task_cputime group_sample; 1299 struct task_cputime group_sample;
1387 1300
1388 thread_group_cputimer(tsk, &group_sample); 1301 thread_group_cputimer(tsk, &group_sample);
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1390 return 1; 1303 return 1;
1391 } 1304 }
1392 1305
1393 return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; 1306 return 0;
1394} 1307}
1395 1308
1396/* 1309/*
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1419 * put them on the firing list. 1332 * put them on the firing list.
1420 */ 1333 */
1421 check_thread_timers(tsk, &firing); 1334 check_thread_timers(tsk, &firing);
1422 check_process_timers(tsk, &firing); 1335 /*
1336 * If there are any active process wide timers (POSIX 1.b, itimers,
1337 * RLIMIT_CPU) cputimer must be running.
1338 */
1339 if (tsk->signal->cputimer.running)
1340 check_process_timers(tsk, &firing);
1423 1341
1424 /* 1342 /*
1425 * We must release these locks before taking any timer's lock. 1343 * We must release these locks before taking any timer's lock.
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1456} 1374}
1457 1375
1458/* 1376/*
1459 * Set one of the process-wide special case CPU timers. 1377 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
1460 * The tsk->sighand->siglock must be held by the caller. 1378 * The tsk->sighand->siglock must be held by the caller.
1461 * The *newval argument is relative and we update it to be absolute, *oldval
1462 * is absolute and we update it to be relative.
1463 */ 1379 */
1464void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, 1380void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1465 cputime_t *newval, cputime_t *oldval) 1381 cputime_t *newval, cputime_t *oldval)
1466{ 1382{
1467 union cpu_time_count now; 1383 union cpu_time_count now;
1468 struct list_head *head;
1469 1384
1470 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1385 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1471 cpu_timer_sample_group(clock_idx, tsk, &now); 1386 cpu_timer_sample_group(clock_idx, tsk, &now);
1472 1387
1473 if (oldval) { 1388 if (oldval) {
1389 /*
1390 * We are setting itimer. The *oldval is absolute and we update
1391 * it to be relative, *newval argument is relative and we update
1392 * it to be absolute.
1393 */
1474 if (!cputime_eq(*oldval, cputime_zero)) { 1394 if (!cputime_eq(*oldval, cputime_zero)) {
1475 if (cputime_le(*oldval, now.cpu)) { 1395 if (cputime_le(*oldval, now.cpu)) {
1476 /* Just about to fire. */ 1396 /* Just about to fire. */
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1483 if (cputime_eq(*newval, cputime_zero)) 1403 if (cputime_eq(*newval, cputime_zero))
1484 return; 1404 return;
1485 *newval = cputime_add(*newval, now.cpu); 1405 *newval = cputime_add(*newval, now.cpu);
1486
1487 /*
1488 * If the RLIMIT_CPU timer will expire before the
1489 * ITIMER_PROF timer, we have nothing else to do.
1490 */
1491 if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
1492 < cputime_to_secs(*newval))
1493 return;
1494 } 1406 }
1495 1407
1496 /* 1408 /*
1497 * Check whether there are any process timers already set to fire 1409 * Update expiration cache if we are the earliest timer, or eventually
1498 * before this one. If so, we don't have anything more to do. 1410 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1499 */ 1411 */
1500 head = &tsk->signal->cpu_timers[clock_idx]; 1412 switch (clock_idx) {
1501 if (list_empty(head) || 1413 case CPUCLOCK_PROF:
1502 cputime_ge(list_first_entry(head, 1414 if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1503 struct cpu_timer_list, entry)->expires.cpu,
1504 *newval)) {
1505 switch (clock_idx) {
1506 case CPUCLOCK_PROF:
1507 tsk->signal->cputime_expires.prof_exp = *newval; 1415 tsk->signal->cputime_expires.prof_exp = *newval;
1508 break; 1416 break;
1509 case CPUCLOCK_VIRT: 1417 case CPUCLOCK_VIRT:
1418 if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1510 tsk->signal->cputime_expires.virt_exp = *newval; 1419 tsk->signal->cputime_expires.virt_exp = *newval;
1511 break; 1420 break;
1512 }
1513 } 1421 }
1514} 1422}
1515 1423
diff --git a/kernel/time.c b/kernel/time.c
index 656dccfe1cbb..50612faa9baf 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
132 */ 132 */
133static inline void warp_clock(void) 133static inline void warp_clock(void)
134{ 134{
135 write_seqlock_irq(&xtime_lock); 135 struct timespec delta, adjust;
136 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 136 delta.tv_sec = sys_tz.tz_minuteswest * 60;
137 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 137 delta.tv_nsec = 0;
138 update_xtime_cache(0); 138 adjust = timespec_add_safe(current_kernel_time(), delta);
139 write_sequnlock_irq(&xtime_lock); 139 do_settimeofday(&adjust);
140 clock_was_set();
141} 140}
142 141
143/* 142/*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f5dde637457..f08e99c1d561 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs)
625 list_add(&cs->list, entry); 625 list_add(&cs->list, entry);
626} 626}
627 627
628
629/*
630 * Maximum time we expect to go between ticks. This includes idle
631 * tickless time. It provides the trade off between selecting a
632 * mult/shift pair that is very precise but can only handle a short
633 * period of time, vs. a mult/shift pair that can handle long periods
634 * of time but isn't as precise.
635 *
636 * This is a subsystem constant, and actual hardware limitations
637 * may override it (ie: clocksources that wrap every 3 seconds).
638 */
639#define MAX_UPDATE_LENGTH 5 /* Seconds */
640
641/**
642 * __clocksource_register_scale - Used to install new clocksources
643 * @t: clocksource to be registered
644 * @scale: Scale factor multiplied against freq to get clocksource hz
645 * @freq: clocksource frequency (cycles per second) divided by scale
646 *
647 * Returns -EBUSY if registration fails, zero otherwise.
648 *
649 * This *SHOULD NOT* be called directly! Please use the
650 * clocksource_register_hz() or clocksource_register_khz helper functions.
651 */
652int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
653{
654
655 /*
656 * Ideally we want to use some of the limits used in
657 * clocksource_max_deferment, to provide a more informed
658 * MAX_UPDATE_LENGTH. But for now this just gets the
659 * register interface working properly.
660 */
661 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
662 NSEC_PER_SEC/scale,
663 MAX_UPDATE_LENGTH*scale);
664 cs->max_idle_ns = clocksource_max_deferment(cs);
665
666 mutex_lock(&clocksource_mutex);
667 clocksource_enqueue(cs);
668 clocksource_select();
669 clocksource_enqueue_watchdog(cs);
670 mutex_unlock(&clocksource_mutex);
671 return 0;
672}
673EXPORT_SYMBOL_GPL(__clocksource_register_scale);
674
675
628/** 676/**
629 * clocksource_register - Used to install new clocksources 677 * clocksource_register - Used to install new clocksources
630 * @t: clocksource to be registered 678 * @t: clocksource to be registered
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7c0f180d6e9d..c63116863a80 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -69,7 +69,7 @@ static s64 time_freq;
69/* time at last adjustment (secs): */ 69/* time at last adjustment (secs): */
70static long time_reftime; 70static long time_reftime;
71 71
72long time_adjust; 72static long time_adjust;
73 73
74/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ 74/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
75static s64 ntp_tick_adj; 75static s64 ntp_tick_adj;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 39f6177fafac..caf8d4d4f5c8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,13 +165,6 @@ struct timespec raw_time;
165/* flag for if timekeeping is suspended */ 165/* flag for if timekeeping is suspended */
166int __read_mostly timekeeping_suspended; 166int __read_mostly timekeeping_suspended;
167 167
168static struct timespec xtime_cache __attribute__ ((aligned (16)));
169void update_xtime_cache(u64 nsec)
170{
171 xtime_cache = xtime;
172 timespec_add_ns(&xtime_cache, nsec);
173}
174
175/* must hold xtime_lock */ 168/* must hold xtime_lock */
176void timekeeping_leap_insert(int leapsecond) 169void timekeeping_leap_insert(int leapsecond)
177{ 170{
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
332 325
333 xtime = *tv; 326 xtime = *tv;
334 327
335 update_xtime_cache(0);
336
337 timekeeper.ntp_error = 0; 328 timekeeper.ntp_error = 0;
338 ntp_clear(); 329 ntp_clear();
339 330
@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
559 } 550 }
560 set_normalized_timespec(&wall_to_monotonic, 551 set_normalized_timespec(&wall_to_monotonic,
561 -boot.tv_sec, -boot.tv_nsec); 552 -boot.tv_sec, -boot.tv_nsec);
562 update_xtime_cache(0);
563 total_sleep_time.tv_sec = 0; 553 total_sleep_time.tv_sec = 0;
564 total_sleep_time.tv_nsec = 0; 554 total_sleep_time.tv_nsec = 0;
565 write_sequnlock_irqrestore(&xtime_lock, flags); 555 write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
593 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); 583 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
594 total_sleep_time = timespec_add_safe(total_sleep_time, ts); 584 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
595 } 585 }
596 update_xtime_cache(0);
597 /* re-base the last cycle value */ 586 /* re-base the last cycle value */
598 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 587 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
599 timekeeper.ntp_error = 0; 588 timekeeper.ntp_error = 0;
@@ -788,7 +777,6 @@ void update_wall_time(void)
788{ 777{
789 struct clocksource *clock; 778 struct clocksource *clock;
790 cycle_t offset; 779 cycle_t offset;
791 u64 nsecs;
792 int shift = 0, maxshift; 780 int shift = 0, maxshift;
793 781
794 /* Make sure we're fully resumed: */ 782 /* Make sure we're fully resumed: */
@@ -847,7 +835,9 @@ void update_wall_time(void)
847 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; 835 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
848 } 836 }
849 837
850 /* store full nanoseconds into xtime after rounding it up and 838
839 /*
840 * Store full nanoseconds into xtime after rounding it up and
851 * add the remainder to the error difference. 841 * add the remainder to the error difference.
852 */ 842 */
853 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; 843 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -855,8 +845,15 @@ void update_wall_time(void)
855 timekeeper.ntp_error += timekeeper.xtime_nsec << 845 timekeeper.ntp_error += timekeeper.xtime_nsec <<
856 timekeeper.ntp_error_shift; 846 timekeeper.ntp_error_shift;
857 847
858 nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); 848 /*
859 update_xtime_cache(nsecs); 849 * Finally, make sure that after the rounding
850 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
851 */
852 if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
853 xtime.tv_nsec -= NSEC_PER_SEC;
854 xtime.tv_sec++;
855 second_overflow();
856 }
860 857
861 /* check to see if there is a new clocksource to use */ 858 /* check to see if there is a new clocksource to use */
862 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); 859 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
896 893
897unsigned long get_seconds(void) 894unsigned long get_seconds(void)
898{ 895{
899 return xtime_cache.tv_sec; 896 return xtime.tv_sec;
900} 897}
901EXPORT_SYMBOL(get_seconds); 898EXPORT_SYMBOL(get_seconds);
902 899
903struct timespec __current_kernel_time(void) 900struct timespec __current_kernel_time(void)
904{ 901{
905 return xtime_cache; 902 return xtime;
906} 903}
907 904
908struct timespec current_kernel_time(void) 905struct timespec current_kernel_time(void)
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)
913 do { 910 do {
914 seq = read_seqbegin(&xtime_lock); 911 seq = read_seqbegin(&xtime_lock);
915 912
916 now = xtime_cache; 913 now = xtime;
917 } while (read_seqretry(&xtime_lock, seq)); 914 } while (read_seqretry(&xtime_lock, seq));
918 915
919 return now; 916 return now;
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
928 do { 925 do {
929 seq = read_seqbegin(&xtime_lock); 926 seq = read_seqbegin(&xtime_lock);
930 927
931 now = xtime_cache; 928 now = xtime;
932 mono = wall_to_monotonic; 929 mono = wall_to_monotonic;
933 } while (read_seqretry(&xtime_lock, seq)); 930 } while (read_seqretry(&xtime_lock, seq));
934 931
diff --git a/kernel/timer.c b/kernel/timer.c
index aeb6a54f2771..9199f3c52215 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
319} 319}
320EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 320EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
321 321
322/**
323 * set_timer_slack - set the allowed slack for a timer
324 * @slack_hz: the amount of time (in jiffies) allowed for rounding
325 *
326 * Set the amount of time, in jiffies, that a certain timer has
327 * in terms of slack. By setting this value, the timer subsystem
328 * will schedule the actual timer somewhere between
329 * the time mod_timer() asks for, and that time plus the slack.
330 *
331 * By setting the slack to -1, a percentage of the delay is used
332 * instead.
333 */
334void set_timer_slack(struct timer_list *timer, int slack_hz)
335{
336 timer->slack = slack_hz;
337}
338EXPORT_SYMBOL_GPL(set_timer_slack);
339
322 340
323static inline void set_running_timer(struct tvec_base *base, 341static inline void set_running_timer(struct tvec_base *base,
324 struct timer_list *timer) 342 struct timer_list *timer)
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,
550{ 568{
551 timer->entry.next = NULL; 569 timer->entry.next = NULL;
552 timer->base = __raw_get_cpu_var(tvec_bases); 570 timer->base = __raw_get_cpu_var(tvec_bases);
571 timer->slack = -1;
553#ifdef CONFIG_TIMER_STATS 572#ifdef CONFIG_TIMER_STATS
554 timer->start_site = NULL; 573 timer->start_site = NULL;
555 timer->start_pid = -1; 574 timer->start_pid = -1;
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
715} 734}
716EXPORT_SYMBOL(mod_timer_pending); 735EXPORT_SYMBOL(mod_timer_pending);
717 736
737/*
738 * Decide where to put the timer while taking the slack into account
739 *
740 * Algorithm:
741 * 1) calculate the maximum (absolute) time
742 * 2) calculate the highest bit where the expires and new max are different
743 * 3) use this bit to make a mask
744 * 4) use the bitmask to round down the maximum time, so that all last
745 * bits are zeros
746 */
747static inline
748unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
749{
750 unsigned long expires_limit, mask;
751 int bit;
752
753 expires_limit = expires + timer->slack;
754
755 if (timer->slack < 0) /* auto slack: use 0.4% */
756 expires_limit = expires + (expires - jiffies)/256;
757
758 mask = expires ^ expires_limit;
759
760 if (mask == 0)
761 return expires;
762
763 bit = find_last_bit(&mask, BITS_PER_LONG);
764
765 mask = (1 << bit) - 1;
766
767 expires_limit = expires_limit & ~(mask);
768
769 return expires_limit;
770}
771
718/** 772/**
719 * mod_timer - modify a timer's timeout 773 * mod_timer - modify a timer's timeout
720 * @timer: the timer to be modified 774 * @timer: the timer to be modified
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
745 if (timer_pending(timer) && timer->expires == expires) 799 if (timer_pending(timer) && timer->expires == expires)
746 return 1; 800 return 1;
747 801
802 expires = apply_slack(timer, expires);
803
748 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 804 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
749} 805}
750EXPORT_SYMBOL(mod_timer); 806EXPORT_SYMBOL(mod_timer);
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
955 return index; 1011 return index;
956} 1012}
957 1013
1014static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1015 unsigned long data)
1016{
1017 int preempt_count = preempt_count();
1018
1019#ifdef CONFIG_LOCKDEP
1020 /*
1021 * It is permissible to free the timer from inside the
1022 * function that is called from it, this we need to take into
1023 * account for lockdep too. To avoid bogus "held lock freed"
1024 * warnings as well as problems when looking into
1025 * timer->lockdep_map, make a copy and use that here.
1026 */
1027 struct lockdep_map lockdep_map = timer->lockdep_map;
1028#endif
1029 /*
1030 * Couple the lock chain with the lock chain at
1031 * del_timer_sync() by acquiring the lock_map around the fn()
1032 * call here and in del_timer_sync().
1033 */
1034 lock_map_acquire(&lockdep_map);
1035
1036 trace_timer_expire_entry(timer);
1037 fn(data);
1038 trace_timer_expire_exit(timer);
1039
1040 lock_map_release(&lockdep_map);
1041
1042 if (preempt_count != preempt_count()) {
1043 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1044 fn, preempt_count, preempt_count());
1045 /*
1046 * Restore the preempt count. That gives us a decent
1047 * chance to survive and extract information. If the
1048 * callback kept a lock held, bad luck, but not worse
1049 * than the BUG() we had.
1050 */
1051 preempt_count() = preempt_count;
1052 }
1053}
1054
958#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1055#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
959 1056
960/** 1057/**
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
998 detach_timer(timer, 1); 1095 detach_timer(timer, 1);
999 1096
1000 spin_unlock_irq(&base->lock); 1097 spin_unlock_irq(&base->lock);
1001 { 1098 call_timer_fn(timer, fn, data);
1002 int preempt_count = preempt_count();
1003
1004#ifdef CONFIG_LOCKDEP
1005 /*
1006 * It is permissible to free the timer from
1007 * inside the function that is called from
1008 * it, this we need to take into account for
1009 * lockdep too. To avoid bogus "held lock
1010 * freed" warnings as well as problems when
1011 * looking into timer->lockdep_map, make a
1012 * copy and use that here.
1013 */
1014 struct lockdep_map lockdep_map =
1015 timer->lockdep_map;
1016#endif
1017 /*
1018 * Couple the lock chain with the lock chain at
1019 * del_timer_sync() by acquiring the lock_map
1020 * around the fn() call here and in
1021 * del_timer_sync().
1022 */
1023 lock_map_acquire(&lockdep_map);
1024
1025 trace_timer_expire_entry(timer);
1026 fn(data);
1027 trace_timer_expire_exit(timer);
1028
1029 lock_map_release(&lockdep_map);
1030
1031 if (preempt_count != preempt_count()) {
1032 printk(KERN_ERR "huh, entered %p "
1033 "with preempt_count %08x, exited"
1034 " with %08x?\n",
1035 fn, preempt_count,
1036 preempt_count());
1037 BUG();
1038 }
1039 }
1040 spin_lock_irq(&base->lock); 1099 spin_lock_irq(&base->lock);
1041 } 1100 }
1042 } 1101 }