diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-19 20:11:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-19 20:11:10 -0400 |
commit | 164d44fd92e79d5bce54d0d62df9f856f7b23925 (patch) | |
tree | 9f21607849b7e684b255578ffdf41951bc31787e /kernel | |
parent | 5bfec46baa3a752393433b8d89d3b2c70820f61d (diff) | |
parent | d7e81c269db899b800e0963dc4aceece1f82a680 (diff) |
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
clocksource: Add clocksource_register_hz/khz interface
posix-cpu-timers: Optimize run_posix_cpu_timers()
time: Remove xtime_cache
mqueue: Convert message queue timeout to use hrtimers
hrtimers: Provide schedule_hrtimeout for CLOCK_REALTIME
timers: Introduce the concept of timer slack for legacy timers
ntp: Remove tickadj
ntp: Make time_adjust static
time: Add xtime, wall_to_monotonic to feature-removal-schedule
timer: Try to survive timer callback preempt_count leak
timer: Split out timer function call
timer: Print function name for timer callbacks modifying preemption count
time: Clean up warp_clock()
cpu-timers: Avoid iterating over all threads in fastpath_timer_check()
cpu-timers: Change SIGEV_NONE timer implementation
cpu-timers: Return correct previous timer reload value
cpu-timers: Cleanup arm_timer()
cpu-timers: Simplify RLIMIT_CPU handling
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/hrtimer.c | 67 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 298 | ||||
-rw-r--r-- | kernel/time.c | 11 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 48 | ||||
-rw-r--r-- | kernel/time/ntp.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 35 | ||||
-rw-r--r-- | kernel/timer.c | 137 |
7 files changed, 312 insertions, 286 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e97..b9b134b35088 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void) | |||
1749 | } | 1749 | } |
1750 | 1750 | ||
1751 | /** | 1751 | /** |
1752 | * schedule_hrtimeout_range - sleep until timeout | 1752 | * schedule_hrtimeout_range_clock - sleep until timeout |
1753 | * @expires: timeout value (ktime_t) | 1753 | * @expires: timeout value (ktime_t) |
1754 | * @delta: slack in expires timeout (ktime_t) | 1754 | * @delta: slack in expires timeout (ktime_t) |
1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | 1755 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL |
1756 | * | 1756 | * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME |
1757 | * Make the current task sleep until the given expiry time has | ||
1758 | * elapsed. The routine will return immediately unless | ||
1759 | * the current task state has been set (see set_current_state()). | ||
1760 | * | ||
1761 | * The @delta argument gives the kernel the freedom to schedule the | ||
1762 | * actual wakeup to a time that is both power and performance friendly. | ||
1763 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
1764 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
1765 | * | ||
1766 | * You can set the task state as follows - | ||
1767 | * | ||
1768 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
1769 | * pass before the routine returns. | ||
1770 | * | ||
1771 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
1772 | * delivered to the current task. | ||
1773 | * | ||
1774 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
1775 | * routine returns. | ||
1776 | * | ||
1777 | * Returns 0 when the timer has expired otherwise -EINTR | ||
1778 | */ | 1757 | */ |
1779 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | 1758 | int __sched |
1780 | const enum hrtimer_mode mode) | 1759 | schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, |
1760 | const enum hrtimer_mode mode, int clock) | ||
1781 | { | 1761 | { |
1782 | struct hrtimer_sleeper t; | 1762 | struct hrtimer_sleeper t; |
1783 | 1763 | ||
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
1799 | return -EINTR; | 1779 | return -EINTR; |
1800 | } | 1780 | } |
1801 | 1781 | ||
1802 | hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); | 1782 | hrtimer_init_on_stack(&t.timer, clock, mode); |
1803 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | 1783 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
1804 | 1784 | ||
1805 | hrtimer_init_sleeper(&t, current); | 1785 | hrtimer_init_sleeper(&t, current); |
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | |||
1818 | 1798 | ||
1819 | return !t.task ? 0 : -EINTR; | 1799 | return !t.task ? 0 : -EINTR; |
1820 | } | 1800 | } |
1801 | |||
1802 | /** | ||
1803 | * schedule_hrtimeout_range - sleep until timeout | ||
1804 | * @expires: timeout value (ktime_t) | ||
1805 | * @delta: slack in expires timeout (ktime_t) | ||
1806 | * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | ||
1807 | * | ||
1808 | * Make the current task sleep until the given expiry time has | ||
1809 | * elapsed. The routine will return immediately unless | ||
1810 | * the current task state has been set (see set_current_state()). | ||
1811 | * | ||
1812 | * The @delta argument gives the kernel the freedom to schedule the | ||
1813 | * actual wakeup to a time that is both power and performance friendly. | ||
1814 | * The kernel give the normal best effort behavior for "@expires+@delta", | ||
1815 | * but may decide to fire the timer earlier, but no earlier than @expires. | ||
1816 | * | ||
1817 | * You can set the task state as follows - | ||
1818 | * | ||
1819 | * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to | ||
1820 | * pass before the routine returns. | ||
1821 | * | ||
1822 | * %TASK_INTERRUPTIBLE - the routine may return early if a signal is | ||
1823 | * delivered to the current task. | ||
1824 | * | ||
1825 | * The current task state is guaranteed to be TASK_RUNNING when this | ||
1826 | * routine returns. | ||
1827 | * | ||
1828 | * Returns 0 when the timer has expired otherwise -EINTR | ||
1829 | */ | ||
1830 | int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, | ||
1831 | const enum hrtimer_mode mode) | ||
1832 | { | ||
1833 | return schedule_hrtimeout_range_clock(expires, delta, mode, | ||
1834 | CLOCK_MONOTONIC); | ||
1835 | } | ||
1821 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); | 1836 | EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); |
1822 | 1837 | ||
1823 | /** | 1838 | /** |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bc7704b3a443..00bb252f29a2 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -11,19 +11,18 @@ | |||
11 | #include <trace/events/timer.h> | 11 | #include <trace/events/timer.h> |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * Called after updating RLIMIT_CPU to set timer expiration if necessary. | 14 | * Called after updating RLIMIT_CPU to run cpu timer and update |
15 | * tsk->signal->cputime_expires expiration cache if necessary. Needs | ||
16 | * siglock protection since other code may update expiration cache as | ||
17 | * well. | ||
15 | */ | 18 | */ |
16 | void update_rlimit_cpu(unsigned long rlim_new) | 19 | void update_rlimit_cpu(unsigned long rlim_new) |
17 | { | 20 | { |
18 | cputime_t cputime = secs_to_cputime(rlim_new); | 21 | cputime_t cputime = secs_to_cputime(rlim_new); |
19 | struct signal_struct *const sig = current->signal; | ||
20 | 22 | ||
21 | if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || | 23 | spin_lock_irq(¤t->sighand->siglock); |
22 | cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { | 24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); |
23 | spin_lock_irq(¤t->sighand->siglock); | 25 | spin_unlock_irq(¤t->sighand->siglock); |
24 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | ||
25 | spin_unlock_irq(¤t->sighand->siglock); | ||
26 | } | ||
27 | } | 26 | } |
28 | 27 | ||
29 | static int check_clock(const clockid_t which_clock) | 28 | static int check_clock(const clockid_t which_clock) |
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) | |||
548 | cputime_gt(expires, new_exp); | 547 | cputime_gt(expires, new_exp); |
549 | } | 548 | } |
550 | 549 | ||
551 | static inline int expires_le(cputime_t expires, cputime_t new_exp) | ||
552 | { | ||
553 | return !cputime_eq(expires, cputime_zero) && | ||
554 | cputime_le(expires, new_exp); | ||
555 | } | ||
556 | /* | 550 | /* |
557 | * Insert the timer on the appropriate list before any timers that | 551 | * Insert the timer on the appropriate list before any timers that |
558 | * expire later. This must be called with the tasklist_lock held | 552 | * expire later. This must be called with the tasklist_lock held |
559 | * for reading, and interrupts disabled. | 553 | * for reading, interrupts disabled and p->sighand->siglock taken. |
560 | */ | 554 | */ |
561 | static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | 555 | static void arm_timer(struct k_itimer *timer) |
562 | { | 556 | { |
563 | struct task_struct *p = timer->it.cpu.task; | 557 | struct task_struct *p = timer->it.cpu.task; |
564 | struct list_head *head, *listpos; | 558 | struct list_head *head, *listpos; |
559 | struct task_cputime *cputime_expires; | ||
565 | struct cpu_timer_list *const nt = &timer->it.cpu; | 560 | struct cpu_timer_list *const nt = &timer->it.cpu; |
566 | struct cpu_timer_list *next; | 561 | struct cpu_timer_list *next; |
567 | unsigned long i; | ||
568 | 562 | ||
569 | head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? | 563 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { |
570 | p->cpu_timers : p->signal->cpu_timers); | 564 | head = p->cpu_timers; |
565 | cputime_expires = &p->cputime_expires; | ||
566 | } else { | ||
567 | head = p->signal->cpu_timers; | ||
568 | cputime_expires = &p->signal->cputime_expires; | ||
569 | } | ||
571 | head += CPUCLOCK_WHICH(timer->it_clock); | 570 | head += CPUCLOCK_WHICH(timer->it_clock); |
572 | 571 | ||
573 | BUG_ON(!irqs_disabled()); | ||
574 | spin_lock(&p->sighand->siglock); | ||
575 | |||
576 | listpos = head; | 572 | listpos = head; |
577 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 573 | list_for_each_entry(next, head, entry) { |
578 | list_for_each_entry(next, head, entry) { | 574 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) |
579 | if (next->expires.sched > nt->expires.sched) | 575 | break; |
580 | break; | 576 | listpos = &next->entry; |
581 | listpos = &next->entry; | ||
582 | } | ||
583 | } else { | ||
584 | list_for_each_entry(next, head, entry) { | ||
585 | if (cputime_gt(next->expires.cpu, nt->expires.cpu)) | ||
586 | break; | ||
587 | listpos = &next->entry; | ||
588 | } | ||
589 | } | 577 | } |
590 | list_add(&nt->entry, listpos); | 578 | list_add(&nt->entry, listpos); |
591 | 579 | ||
592 | if (listpos == head) { | 580 | if (listpos == head) { |
581 | union cpu_time_count *exp = &nt->expires; | ||
582 | |||
593 | /* | 583 | /* |
594 | * We are the new earliest-expiring timer. | 584 | * We are the new earliest-expiring POSIX 1.b timer, hence |
595 | * If we are a thread timer, there can always | 585 | * need to update expiration cache. Take into account that |
596 | * be a process timer telling us to stop earlier. | 586 | * for process timers we share expiration cache with itimers |
587 | * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. | ||
597 | */ | 588 | */ |
598 | 589 | ||
599 | if (CPUCLOCK_PERTHREAD(timer->it_clock)) { | 590 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
600 | union cpu_time_count *exp = &nt->expires; | 591 | case CPUCLOCK_PROF: |
601 | 592 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | |
602 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 593 | cputime_expires->prof_exp = exp->cpu; |
603 | default: | 594 | break; |
604 | BUG(); | 595 | case CPUCLOCK_VIRT: |
605 | case CPUCLOCK_PROF: | 596 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) |
606 | if (expires_gt(p->cputime_expires.prof_exp, | 597 | cputime_expires->virt_exp = exp->cpu; |
607 | exp->cpu)) | 598 | break; |
608 | p->cputime_expires.prof_exp = exp->cpu; | 599 | case CPUCLOCK_SCHED: |
609 | break; | 600 | if (cputime_expires->sched_exp == 0 || |
610 | case CPUCLOCK_VIRT: | 601 | cputime_expires->sched_exp > exp->sched) |
611 | if (expires_gt(p->cputime_expires.virt_exp, | 602 | cputime_expires->sched_exp = exp->sched; |
612 | exp->cpu)) | 603 | break; |
613 | p->cputime_expires.virt_exp = exp->cpu; | ||
614 | break; | ||
615 | case CPUCLOCK_SCHED: | ||
616 | if (p->cputime_expires.sched_exp == 0 || | ||
617 | p->cputime_expires.sched_exp > exp->sched) | ||
618 | p->cputime_expires.sched_exp = | ||
619 | exp->sched; | ||
620 | break; | ||
621 | } | ||
622 | } else { | ||
623 | struct signal_struct *const sig = p->signal; | ||
624 | union cpu_time_count *exp = &timer->it.cpu.expires; | ||
625 | |||
626 | /* | ||
627 | * For a process timer, set the cached expiration time. | ||
628 | */ | ||
629 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | ||
630 | default: | ||
631 | BUG(); | ||
632 | case CPUCLOCK_VIRT: | ||
633 | if (expires_le(sig->it[CPUCLOCK_VIRT].expires, | ||
634 | exp->cpu)) | ||
635 | break; | ||
636 | sig->cputime_expires.virt_exp = exp->cpu; | ||
637 | break; | ||
638 | case CPUCLOCK_PROF: | ||
639 | if (expires_le(sig->it[CPUCLOCK_PROF].expires, | ||
640 | exp->cpu)) | ||
641 | break; | ||
642 | i = sig->rlim[RLIMIT_CPU].rlim_cur; | ||
643 | if (i != RLIM_INFINITY && | ||
644 | i <= cputime_to_secs(exp->cpu)) | ||
645 | break; | ||
646 | sig->cputime_expires.prof_exp = exp->cpu; | ||
647 | break; | ||
648 | case CPUCLOCK_SCHED: | ||
649 | sig->cputime_expires.sched_exp = exp->sched; | ||
650 | break; | ||
651 | } | ||
652 | } | 604 | } |
653 | } | 605 | } |
654 | |||
655 | spin_unlock(&p->sighand->siglock); | ||
656 | } | 606 | } |
657 | 607 | ||
658 | /* | 608 | /* |
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) | |||
660 | */ | 610 | */ |
661 | static void cpu_timer_fire(struct k_itimer *timer) | 611 | static void cpu_timer_fire(struct k_itimer *timer) |
662 | { | 612 | { |
663 | if (unlikely(timer->sigq == NULL)) { | 613 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { |
614 | /* | ||
615 | * User don't want any signal. | ||
616 | */ | ||
617 | timer->it.cpu.expires.sched = 0; | ||
618 | } else if (unlikely(timer->sigq == NULL)) { | ||
664 | /* | 619 | /* |
665 | * This a special case for clock_nanosleep, | 620 | * This a special case for clock_nanosleep, |
666 | * not a normal timer from sys_timer_create. | 621 | * not a normal timer from sys_timer_create. |
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
721 | struct itimerspec *new, struct itimerspec *old) | 676 | struct itimerspec *new, struct itimerspec *old) |
722 | { | 677 | { |
723 | struct task_struct *p = timer->it.cpu.task; | 678 | struct task_struct *p = timer->it.cpu.task; |
724 | union cpu_time_count old_expires, new_expires, val; | 679 | union cpu_time_count old_expires, new_expires, old_incr, val; |
725 | int ret; | 680 | int ret; |
726 | 681 | ||
727 | if (unlikely(p == NULL)) { | 682 | if (unlikely(p == NULL)) { |
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
752 | BUG_ON(!irqs_disabled()); | 707 | BUG_ON(!irqs_disabled()); |
753 | 708 | ||
754 | ret = 0; | 709 | ret = 0; |
710 | old_incr = timer->it.cpu.incr; | ||
755 | spin_lock(&p->sighand->siglock); | 711 | spin_lock(&p->sighand->siglock); |
756 | old_expires = timer->it.cpu.expires; | 712 | old_expires = timer->it.cpu.expires; |
757 | if (unlikely(timer->it.cpu.firing)) { | 713 | if (unlikely(timer->it.cpu.firing)) { |
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
759 | ret = TIMER_RETRY; | 715 | ret = TIMER_RETRY; |
760 | } else | 716 | } else |
761 | list_del_init(&timer->it.cpu.entry); | 717 | list_del_init(&timer->it.cpu.entry); |
762 | spin_unlock(&p->sighand->siglock); | ||
763 | 718 | ||
764 | /* | 719 | /* |
765 | * We need to sample the current value to convert the new | 720 | * We need to sample the current value to convert the new |
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
813 | * disable this firing since we are already reporting | 768 | * disable this firing since we are already reporting |
814 | * it as an overrun (thanks to bump_cpu_timer above). | 769 | * it as an overrun (thanks to bump_cpu_timer above). |
815 | */ | 770 | */ |
771 | spin_unlock(&p->sighand->siglock); | ||
816 | read_unlock(&tasklist_lock); | 772 | read_unlock(&tasklist_lock); |
817 | goto out; | 773 | goto out; |
818 | } | 774 | } |
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
828 | */ | 784 | */ |
829 | timer->it.cpu.expires = new_expires; | 785 | timer->it.cpu.expires = new_expires; |
830 | if (new_expires.sched != 0 && | 786 | if (new_expires.sched != 0 && |
831 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
832 | cpu_time_before(timer->it_clock, val, new_expires)) { | 787 | cpu_time_before(timer->it_clock, val, new_expires)) { |
833 | arm_timer(timer, val); | 788 | arm_timer(timer); |
834 | } | 789 | } |
835 | 790 | ||
791 | spin_unlock(&p->sighand->siglock); | ||
836 | read_unlock(&tasklist_lock); | 792 | read_unlock(&tasklist_lock); |
837 | 793 | ||
838 | /* | 794 | /* |
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
853 | timer->it_overrun = -1; | 809 | timer->it_overrun = -1; |
854 | 810 | ||
855 | if (new_expires.sched != 0 && | 811 | if (new_expires.sched != 0 && |
856 | (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE && | ||
857 | !cpu_time_before(timer->it_clock, val, new_expires)) { | 812 | !cpu_time_before(timer->it_clock, val, new_expires)) { |
858 | /* | 813 | /* |
859 | * The designated time already passed, so we notify | 814 | * The designated time already passed, so we notify |
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
867 | out: | 822 | out: |
868 | if (old) { | 823 | if (old) { |
869 | sample_to_timespec(timer->it_clock, | 824 | sample_to_timespec(timer->it_clock, |
870 | timer->it.cpu.incr, &old->it_interval); | 825 | old_incr, &old->it_interval); |
871 | } | 826 | } |
872 | return ret; | 827 | return ret; |
873 | } | 828 | } |
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
927 | read_unlock(&tasklist_lock); | 882 | read_unlock(&tasklist_lock); |
928 | } | 883 | } |
929 | 884 | ||
930 | if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | ||
931 | if (timer->it.cpu.incr.sched == 0 && | ||
932 | cpu_time_before(timer->it_clock, | ||
933 | timer->it.cpu.expires, now)) { | ||
934 | /* | ||
935 | * Do-nothing timer expired and has no reload, | ||
936 | * so it's as if it was never set. | ||
937 | */ | ||
938 | timer->it.cpu.expires.sched = 0; | ||
939 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | ||
940 | return; | ||
941 | } | ||
942 | /* | ||
943 | * Account for any expirations and reloads that should | ||
944 | * have happened. | ||
945 | */ | ||
946 | bump_cpu_timer(timer, now); | ||
947 | } | ||
948 | |||
949 | if (unlikely(clear_dead)) { | 885 | if (unlikely(clear_dead)) { |
950 | /* | 886 | /* |
951 | * We've noticed that the thread is dead, but | 887 | * We've noticed that the thread is dead, but |
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig) | |||
1066 | struct thread_group_cputimer *cputimer = &sig->cputimer; | 1002 | struct thread_group_cputimer *cputimer = &sig->cputimer; |
1067 | unsigned long flags; | 1003 | unsigned long flags; |
1068 | 1004 | ||
1069 | if (!cputimer->running) | ||
1070 | return; | ||
1071 | |||
1072 | spin_lock_irqsave(&cputimer->lock, flags); | 1005 | spin_lock_irqsave(&cputimer->lock, flags); |
1073 | cputimer->running = 0; | 1006 | cputimer->running = 0; |
1074 | spin_unlock_irqrestore(&cputimer->lock, flags); | 1007 | spin_unlock_irqrestore(&cputimer->lock, flags); |
1075 | |||
1076 | sig->cputime_expires.prof_exp = cputime_zero; | ||
1077 | sig->cputime_expires.virt_exp = cputime_zero; | ||
1078 | sig->cputime_expires.sched_exp = 0; | ||
1079 | } | 1008 | } |
1080 | 1009 | ||
1081 | static u32 onecputick; | 1010 | static u32 onecputick; |
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
1112 | } | 1041 | } |
1113 | } | 1042 | } |
1114 | 1043 | ||
1044 | /** | ||
1045 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
1046 | * | ||
1047 | * @cputime: The struct to compare. | ||
1048 | * | ||
1049 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
1050 | * are zero, false if any field is nonzero. | ||
1051 | */ | ||
1052 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
1053 | { | ||
1054 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
1055 | cputime_eq(cputime->stime, cputime_zero) && | ||
1056 | cputime->sum_exec_runtime == 0) | ||
1057 | return 1; | ||
1058 | return 0; | ||
1059 | } | ||
1060 | |||
1115 | /* | 1061 | /* |
1116 | * Check for any per-thread CPU timers that have fired and move them | 1062 | * Check for any per-thread CPU timers that have fired and move them |
1117 | * off the tsk->*_timers list onto the firing list. Per-thread timers | 1063 | * off the tsk->*_timers list onto the firing list. Per-thread timers |
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk, | |||
1129 | unsigned long soft; | 1075 | unsigned long soft; |
1130 | 1076 | ||
1131 | /* | 1077 | /* |
1132 | * Don't sample the current process CPU clocks if there are no timers. | ||
1133 | */ | ||
1134 | if (list_empty(&timers[CPUCLOCK_PROF]) && | ||
1135 | cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && | ||
1136 | sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && | ||
1137 | list_empty(&timers[CPUCLOCK_VIRT]) && | ||
1138 | cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && | ||
1139 | list_empty(&timers[CPUCLOCK_SCHED])) { | ||
1140 | stop_process_timers(sig); | ||
1141 | return; | ||
1142 | } | ||
1143 | |||
1144 | /* | ||
1145 | * Collect the current process totals. | 1078 | * Collect the current process totals. |
1146 | */ | 1079 | */ |
1147 | thread_group_cputimer(tsk, &cputime); | 1080 | thread_group_cputimer(tsk, &cputime); |
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk, | |||
1230 | } | 1163 | } |
1231 | } | 1164 | } |
1232 | 1165 | ||
1233 | if (!cputime_eq(prof_expires, cputime_zero) && | 1166 | sig->cputime_expires.prof_exp = prof_expires; |
1234 | (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || | 1167 | sig->cputime_expires.virt_exp = virt_expires; |
1235 | cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) | 1168 | sig->cputime_expires.sched_exp = sched_expires; |
1236 | sig->cputime_expires.prof_exp = prof_expires; | 1169 | if (task_cputime_zero(&sig->cputime_expires)) |
1237 | if (!cputime_eq(virt_expires, cputime_zero) && | 1170 | stop_process_timers(sig); |
1238 | (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || | ||
1239 | cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) | ||
1240 | sig->cputime_expires.virt_exp = virt_expires; | ||
1241 | if (sched_expires != 0 && | ||
1242 | (sig->cputime_expires.sched_exp == 0 || | ||
1243 | sig->cputime_expires.sched_exp > sched_expires)) | ||
1244 | sig->cputime_expires.sched_exp = sched_expires; | ||
1245 | } | 1171 | } |
1246 | 1172 | ||
1247 | /* | 1173 | /* |
@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1270 | goto out; | 1196 | goto out; |
1271 | } | 1197 | } |
1272 | read_lock(&tasklist_lock); /* arm_timer needs it. */ | 1198 | read_lock(&tasklist_lock); /* arm_timer needs it. */ |
1199 | spin_lock(&p->sighand->siglock); | ||
1273 | } else { | 1200 | } else { |
1274 | read_lock(&tasklist_lock); | 1201 | read_lock(&tasklist_lock); |
1275 | if (unlikely(p->signal == NULL)) { | 1202 | if (unlikely(p->signal == NULL)) { |
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1290 | clear_dead_task(timer, now); | 1217 | clear_dead_task(timer, now); |
1291 | goto out_unlock; | 1218 | goto out_unlock; |
1292 | } | 1219 | } |
1220 | spin_lock(&p->sighand->siglock); | ||
1293 | cpu_timer_sample_group(timer->it_clock, p, &now); | 1221 | cpu_timer_sample_group(timer->it_clock, p, &now); |
1294 | bump_cpu_timer(timer, now); | 1222 | bump_cpu_timer(timer, now); |
1295 | /* Leave the tasklist_lock locked for the call below. */ | 1223 | /* Leave the tasklist_lock locked for the call below. */ |
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1298 | /* | 1226 | /* |
1299 | * Now re-arm for the new expiry time. | 1227 | * Now re-arm for the new expiry time. |
1300 | */ | 1228 | */ |
1301 | arm_timer(timer, now); | 1229 | BUG_ON(!irqs_disabled()); |
1230 | arm_timer(timer); | ||
1231 | spin_unlock(&p->sighand->siglock); | ||
1302 | 1232 | ||
1303 | out_unlock: | 1233 | out_unlock: |
1304 | read_unlock(&tasklist_lock); | 1234 | read_unlock(&tasklist_lock); |
@@ -1310,23 +1240,6 @@ out: | |||
1310 | } | 1240 | } |
1311 | 1241 | ||
1312 | /** | 1242 | /** |
1313 | * task_cputime_zero - Check a task_cputime struct for all zero fields. | ||
1314 | * | ||
1315 | * @cputime: The struct to compare. | ||
1316 | * | ||
1317 | * Checks @cputime to see if all fields are zero. Returns true if all fields | ||
1318 | * are zero, false if any field is nonzero. | ||
1319 | */ | ||
1320 | static inline int task_cputime_zero(const struct task_cputime *cputime) | ||
1321 | { | ||
1322 | if (cputime_eq(cputime->utime, cputime_zero) && | ||
1323 | cputime_eq(cputime->stime, cputime_zero) && | ||
1324 | cputime->sum_exec_runtime == 0) | ||
1325 | return 1; | ||
1326 | return 0; | ||
1327 | } | ||
1328 | |||
1329 | /** | ||
1330 | * task_cputime_expired - Compare two task_cputime entities. | 1243 | * task_cputime_expired - Compare two task_cputime entities. |
1331 | * | 1244 | * |
1332 | * @sample: The task_cputime structure to be checked for expiration. | 1245 | * @sample: The task_cputime structure to be checked for expiration. |
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1382 | } | 1295 | } |
1383 | 1296 | ||
1384 | sig = tsk->signal; | 1297 | sig = tsk->signal; |
1385 | if (!task_cputime_zero(&sig->cputime_expires)) { | 1298 | if (sig->cputimer.running) { |
1386 | struct task_cputime group_sample; | 1299 | struct task_cputime group_sample; |
1387 | 1300 | ||
1388 | thread_group_cputimer(tsk, &group_sample); | 1301 | thread_group_cputimer(tsk, &group_sample); |
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |||
1390 | return 1; | 1303 | return 1; |
1391 | } | 1304 | } |
1392 | 1305 | ||
1393 | return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; | 1306 | return 0; |
1394 | } | 1307 | } |
1395 | 1308 | ||
1396 | /* | 1309 | /* |
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1419 | * put them on the firing list. | 1332 | * put them on the firing list. |
1420 | */ | 1333 | */ |
1421 | check_thread_timers(tsk, &firing); | 1334 | check_thread_timers(tsk, &firing); |
1422 | check_process_timers(tsk, &firing); | 1335 | /* |
1336 | * If there are any active process wide timers (POSIX 1.b, itimers, | ||
1337 | * RLIMIT_CPU) cputimer must be running. | ||
1338 | */ | ||
1339 | if (tsk->signal->cputimer.running) | ||
1340 | check_process_timers(tsk, &firing); | ||
1423 | 1341 | ||
1424 | /* | 1342 | /* |
1425 | * We must release these locks before taking any timer's lock. | 1343 | * We must release these locks before taking any timer's lock. |
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1456 | } | 1374 | } |
1457 | 1375 | ||
1458 | /* | 1376 | /* |
1459 | * Set one of the process-wide special case CPU timers. | 1377 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. |
1460 | * The tsk->sighand->siglock must be held by the caller. | 1378 | * The tsk->sighand->siglock must be held by the caller. |
1461 | * The *newval argument is relative and we update it to be absolute, *oldval | ||
1462 | * is absolute and we update it to be relative. | ||
1463 | */ | 1379 | */ |
1464 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1380 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
1465 | cputime_t *newval, cputime_t *oldval) | 1381 | cputime_t *newval, cputime_t *oldval) |
1466 | { | 1382 | { |
1467 | union cpu_time_count now; | 1383 | union cpu_time_count now; |
1468 | struct list_head *head; | ||
1469 | 1384 | ||
1470 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1385 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1471 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1386 | cpu_timer_sample_group(clock_idx, tsk, &now); |
1472 | 1387 | ||
1473 | if (oldval) { | 1388 | if (oldval) { |
1389 | /* | ||
1390 | * We are setting itimer. The *oldval is absolute and we update | ||
1391 | * it to be relative, *newval argument is relative and we update | ||
1392 | * it to be absolute. | ||
1393 | */ | ||
1474 | if (!cputime_eq(*oldval, cputime_zero)) { | 1394 | if (!cputime_eq(*oldval, cputime_zero)) { |
1475 | if (cputime_le(*oldval, now.cpu)) { | 1395 | if (cputime_le(*oldval, now.cpu)) { |
1476 | /* Just about to fire. */ | 1396 | /* Just about to fire. */ |
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1483 | if (cputime_eq(*newval, cputime_zero)) | 1403 | if (cputime_eq(*newval, cputime_zero)) |
1484 | return; | 1404 | return; |
1485 | *newval = cputime_add(*newval, now.cpu); | 1405 | *newval = cputime_add(*newval, now.cpu); |
1486 | |||
1487 | /* | ||
1488 | * If the RLIMIT_CPU timer will expire before the | ||
1489 | * ITIMER_PROF timer, we have nothing else to do. | ||
1490 | */ | ||
1491 | if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur | ||
1492 | < cputime_to_secs(*newval)) | ||
1493 | return; | ||
1494 | } | 1406 | } |
1495 | 1407 | ||
1496 | /* | 1408 | /* |
1497 | * Check whether there are any process timers already set to fire | 1409 | * Update expiration cache if we are the earliest timer, or eventually |
1498 | * before this one. If so, we don't have anything more to do. | 1410 | * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. |
1499 | */ | 1411 | */ |
1500 | head = &tsk->signal->cpu_timers[clock_idx]; | 1412 | switch (clock_idx) { |
1501 | if (list_empty(head) || | 1413 | case CPUCLOCK_PROF: |
1502 | cputime_ge(list_first_entry(head, | 1414 | if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) |
1503 | struct cpu_timer_list, entry)->expires.cpu, | ||
1504 | *newval)) { | ||
1505 | switch (clock_idx) { | ||
1506 | case CPUCLOCK_PROF: | ||
1507 | tsk->signal->cputime_expires.prof_exp = *newval; | 1415 | tsk->signal->cputime_expires.prof_exp = *newval; |
1508 | break; | 1416 | break; |
1509 | case CPUCLOCK_VIRT: | 1417 | case CPUCLOCK_VIRT: |
1418 | if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) | ||
1510 | tsk->signal->cputime_expires.virt_exp = *newval; | 1419 | tsk->signal->cputime_expires.virt_exp = *newval; |
1511 | break; | 1420 | break; |
1512 | } | ||
1513 | } | 1421 | } |
1514 | } | 1422 | } |
1515 | 1423 | ||
diff --git a/kernel/time.c b/kernel/time.c index 656dccfe1cbb..50612faa9baf 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, | |||
132 | */ | 132 | */ |
133 | static inline void warp_clock(void) | 133 | static inline void warp_clock(void) |
134 | { | 134 | { |
135 | write_seqlock_irq(&xtime_lock); | 135 | struct timespec delta, adjust; |
136 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 136 | delta.tv_sec = sys_tz.tz_minuteswest * 60; |
137 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 137 | delta.tv_nsec = 0; |
138 | update_xtime_cache(0); | 138 | adjust = timespec_add_safe(current_kernel_time(), delta); |
139 | write_sequnlock_irq(&xtime_lock); | 139 | do_settimeofday(&adjust); |
140 | clock_was_set(); | ||
141 | } | 140 | } |
142 | 141 | ||
143 | /* | 142 | /* |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1f5dde637457..f08e99c1d561 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
625 | list_add(&cs->list, entry); | 625 | list_add(&cs->list, entry); |
626 | } | 626 | } |
627 | 627 | ||
628 | |||
629 | /* | ||
630 | * Maximum time we expect to go between ticks. This includes idle | ||
631 | * tickless time. It provides the trade off between selecting a | ||
632 | * mult/shift pair that is very precise but can only handle a short | ||
633 | * period of time, vs. a mult/shift pair that can handle long periods | ||
634 | * of time but isn't as precise. | ||
635 | * | ||
636 | * This is a subsystem constant, and actual hardware limitations | ||
637 | * may override it (ie: clocksources that wrap every 3 seconds). | ||
638 | */ | ||
639 | #define MAX_UPDATE_LENGTH 5 /* Seconds */ | ||
640 | |||
641 | /** | ||
642 | * __clocksource_register_scale - Used to install new clocksources | ||
643 | * @t: clocksource to be registered | ||
644 | * @scale: Scale factor multiplied against freq to get clocksource hz | ||
645 | * @freq: clocksource frequency (cycles per second) divided by scale | ||
646 | * | ||
647 | * Returns -EBUSY if registration fails, zero otherwise. | ||
648 | * | ||
649 | * This *SHOULD NOT* be called directly! Please use the | ||
650 | * clocksource_register_hz() or clocksource_register_khz helper functions. | ||
651 | */ | ||
652 | int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | ||
653 | { | ||
654 | |||
655 | /* | ||
656 | * Ideally we want to use some of the limits used in | ||
657 | * clocksource_max_deferment, to provide a more informed | ||
658 | * MAX_UPDATE_LENGTH. But for now this just gets the | ||
659 | * register interface working properly. | ||
660 | */ | ||
661 | clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, | ||
662 | NSEC_PER_SEC/scale, | ||
663 | MAX_UPDATE_LENGTH*scale); | ||
664 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
665 | |||
666 | mutex_lock(&clocksource_mutex); | ||
667 | clocksource_enqueue(cs); | ||
668 | clocksource_select(); | ||
669 | clocksource_enqueue_watchdog(cs); | ||
670 | mutex_unlock(&clocksource_mutex); | ||
671 | return 0; | ||
672 | } | ||
673 | EXPORT_SYMBOL_GPL(__clocksource_register_scale); | ||
674 | |||
675 | |||
628 | /** | 676 | /** |
629 | * clocksource_register - Used to install new clocksources | 677 | * clocksource_register - Used to install new clocksources |
630 | * @t: clocksource to be registered | 678 | * @t: clocksource to be registered |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7c0f180d6e9d..c63116863a80 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -69,7 +69,7 @@ static s64 time_freq; | |||
69 | /* time at last adjustment (secs): */ | 69 | /* time at last adjustment (secs): */ |
70 | static long time_reftime; | 70 | static long time_reftime; |
71 | 71 | ||
72 | long time_adjust; | 72 | static long time_adjust; |
73 | 73 | ||
74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ | 74 | /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ |
75 | static s64 ntp_tick_adj; | 75 | static s64 ntp_tick_adj; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 39f6177fafac..caf8d4d4f5c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -165,13 +165,6 @@ struct timespec raw_time; | |||
165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
167 | 167 | ||
168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
169 | void update_xtime_cache(u64 nsec) | ||
170 | { | ||
171 | xtime_cache = xtime; | ||
172 | timespec_add_ns(&xtime_cache, nsec); | ||
173 | } | ||
174 | |||
175 | /* must hold xtime_lock */ | 168 | /* must hold xtime_lock */ |
176 | void timekeeping_leap_insert(int leapsecond) | 169 | void timekeeping_leap_insert(int leapsecond) |
177 | { | 170 | { |
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) | |||
332 | 325 | ||
333 | xtime = *tv; | 326 | xtime = *tv; |
334 | 327 | ||
335 | update_xtime_cache(0); | ||
336 | |||
337 | timekeeper.ntp_error = 0; | 328 | timekeeper.ntp_error = 0; |
338 | ntp_clear(); | 329 | ntp_clear(); |
339 | 330 | ||
@@ -559,7 +550,6 @@ void __init timekeeping_init(void) | |||
559 | } | 550 | } |
560 | set_normalized_timespec(&wall_to_monotonic, | 551 | set_normalized_timespec(&wall_to_monotonic, |
561 | -boot.tv_sec, -boot.tv_nsec); | 552 | -boot.tv_sec, -boot.tv_nsec); |
562 | update_xtime_cache(0); | ||
563 | total_sleep_time.tv_sec = 0; | 553 | total_sleep_time.tv_sec = 0; |
564 | total_sleep_time.tv_nsec = 0; | 554 | total_sleep_time.tv_nsec = 0; |
565 | write_sequnlock_irqrestore(&xtime_lock, flags); | 555 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
593 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
594 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
595 | } | 585 | } |
596 | update_xtime_cache(0); | ||
597 | /* re-base the last cycle value */ | 586 | /* re-base the last cycle value */ |
598 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
599 | timekeeper.ntp_error = 0; | 588 | timekeeper.ntp_error = 0; |
@@ -788,7 +777,6 @@ void update_wall_time(void) | |||
788 | { | 777 | { |
789 | struct clocksource *clock; | 778 | struct clocksource *clock; |
790 | cycle_t offset; | 779 | cycle_t offset; |
791 | u64 nsecs; | ||
792 | int shift = 0, maxshift; | 780 | int shift = 0, maxshift; |
793 | 781 | ||
794 | /* Make sure we're fully resumed: */ | 782 | /* Make sure we're fully resumed: */ |
@@ -847,7 +835,9 @@ void update_wall_time(void) | |||
847 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; | 835 | timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; |
848 | } | 836 | } |
849 | 837 | ||
850 | /* store full nanoseconds into xtime after rounding it up and | 838 | |
839 | /* | ||
840 | * Store full nanoseconds into xtime after rounding it up and | ||
851 | * add the remainder to the error difference. | 841 | * add the remainder to the error difference. |
852 | */ | 842 | */ |
853 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; | 843 | xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; |
@@ -855,8 +845,15 @@ void update_wall_time(void) | |||
855 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 845 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
856 | timekeeper.ntp_error_shift; | 846 | timekeeper.ntp_error_shift; |
857 | 847 | ||
858 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | 848 | /* |
859 | update_xtime_cache(nsecs); | 849 | * Finally, make sure that after the rounding |
850 | * xtime.tv_nsec isn't larger then NSEC_PER_SEC | ||
851 | */ | ||
852 | if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { | ||
853 | xtime.tv_nsec -= NSEC_PER_SEC; | ||
854 | xtime.tv_sec++; | ||
855 | second_overflow(); | ||
856 | } | ||
860 | 857 | ||
861 | /* check to see if there is a new clocksource to use */ | 858 | /* check to see if there is a new clocksource to use */ |
862 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); | 859 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); | |||
896 | 893 | ||
897 | unsigned long get_seconds(void) | 894 | unsigned long get_seconds(void) |
898 | { | 895 | { |
899 | return xtime_cache.tv_sec; | 896 | return xtime.tv_sec; |
900 | } | 897 | } |
901 | EXPORT_SYMBOL(get_seconds); | 898 | EXPORT_SYMBOL(get_seconds); |
902 | 899 | ||
903 | struct timespec __current_kernel_time(void) | 900 | struct timespec __current_kernel_time(void) |
904 | { | 901 | { |
905 | return xtime_cache; | 902 | return xtime; |
906 | } | 903 | } |
907 | 904 | ||
908 | struct timespec current_kernel_time(void) | 905 | struct timespec current_kernel_time(void) |
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void) | |||
913 | do { | 910 | do { |
914 | seq = read_seqbegin(&xtime_lock); | 911 | seq = read_seqbegin(&xtime_lock); |
915 | 912 | ||
916 | now = xtime_cache; | 913 | now = xtime; |
917 | } while (read_seqretry(&xtime_lock, seq)); | 914 | } while (read_seqretry(&xtime_lock, seq)); |
918 | 915 | ||
919 | return now; | 916 | return now; |
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void) | |||
928 | do { | 925 | do { |
929 | seq = read_seqbegin(&xtime_lock); | 926 | seq = read_seqbegin(&xtime_lock); |
930 | 927 | ||
931 | now = xtime_cache; | 928 | now = xtime; |
932 | mono = wall_to_monotonic; | 929 | mono = wall_to_monotonic; |
933 | } while (read_seqretry(&xtime_lock, seq)); | 930 | } while (read_seqretry(&xtime_lock, seq)); |
934 | 931 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f2771..9199f3c52215 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) | |||
319 | } | 319 | } |
320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); | 320 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); |
321 | 321 | ||
322 | /** | ||
323 | * set_timer_slack - set the allowed slack for a timer | ||
324 | * @slack_hz: the amount of time (in jiffies) allowed for rounding | ||
325 | * | ||
326 | * Set the amount of time, in jiffies, that a certain timer has | ||
327 | * in terms of slack. By setting this value, the timer subsystem | ||
328 | * will schedule the actual timer somewhere between | ||
329 | * the time mod_timer() asks for, and that time plus the slack. | ||
330 | * | ||
331 | * By setting the slack to -1, a percentage of the delay is used | ||
332 | * instead. | ||
333 | */ | ||
334 | void set_timer_slack(struct timer_list *timer, int slack_hz) | ||
335 | { | ||
336 | timer->slack = slack_hz; | ||
337 | } | ||
338 | EXPORT_SYMBOL_GPL(set_timer_slack); | ||
339 | |||
322 | 340 | ||
323 | static inline void set_running_timer(struct tvec_base *base, | 341 | static inline void set_running_timer(struct tvec_base *base, |
324 | struct timer_list *timer) | 342 | struct timer_list *timer) |
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer, | |||
550 | { | 568 | { |
551 | timer->entry.next = NULL; | 569 | timer->entry.next = NULL; |
552 | timer->base = __raw_get_cpu_var(tvec_bases); | 570 | timer->base = __raw_get_cpu_var(tvec_bases); |
571 | timer->slack = -1; | ||
553 | #ifdef CONFIG_TIMER_STATS | 572 | #ifdef CONFIG_TIMER_STATS |
554 | timer->start_site = NULL; | 573 | timer->start_site = NULL; |
555 | timer->start_pid = -1; | 574 | timer->start_pid = -1; |
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) | |||
715 | } | 734 | } |
716 | EXPORT_SYMBOL(mod_timer_pending); | 735 | EXPORT_SYMBOL(mod_timer_pending); |
717 | 736 | ||
737 | /* | ||
738 | * Decide where to put the timer while taking the slack into account | ||
739 | * | ||
740 | * Algorithm: | ||
741 | * 1) calculate the maximum (absolute) time | ||
742 | * 2) calculate the highest bit where the expires and new max are different | ||
743 | * 3) use this bit to make a mask | ||
744 | * 4) use the bitmask to round down the maximum time, so that all last | ||
745 | * bits are zeros | ||
746 | */ | ||
747 | static inline | ||
748 | unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | ||
749 | { | ||
750 | unsigned long expires_limit, mask; | ||
751 | int bit; | ||
752 | |||
753 | expires_limit = expires + timer->slack; | ||
754 | |||
755 | if (timer->slack < 0) /* auto slack: use 0.4% */ | ||
756 | expires_limit = expires + (expires - jiffies)/256; | ||
757 | |||
758 | mask = expires ^ expires_limit; | ||
759 | |||
760 | if (mask == 0) | ||
761 | return expires; | ||
762 | |||
763 | bit = find_last_bit(&mask, BITS_PER_LONG); | ||
764 | |||
765 | mask = (1 << bit) - 1; | ||
766 | |||
767 | expires_limit = expires_limit & ~(mask); | ||
768 | |||
769 | return expires_limit; | ||
770 | } | ||
771 | |||
718 | /** | 772 | /** |
719 | * mod_timer - modify a timer's timeout | 773 | * mod_timer - modify a timer's timeout |
720 | * @timer: the timer to be modified | 774 | * @timer: the timer to be modified |
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
745 | if (timer_pending(timer) && timer->expires == expires) | 799 | if (timer_pending(timer) && timer->expires == expires) |
746 | return 1; | 800 | return 1; |
747 | 801 | ||
802 | expires = apply_slack(timer, expires); | ||
803 | |||
748 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 804 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
749 | } | 805 | } |
750 | EXPORT_SYMBOL(mod_timer); | 806 | EXPORT_SYMBOL(mod_timer); |
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
955 | return index; | 1011 | return index; |
956 | } | 1012 | } |
957 | 1013 | ||
1014 | static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), | ||
1015 | unsigned long data) | ||
1016 | { | ||
1017 | int preempt_count = preempt_count(); | ||
1018 | |||
1019 | #ifdef CONFIG_LOCKDEP | ||
1020 | /* | ||
1021 | * It is permissible to free the timer from inside the | ||
1022 | * function that is called from it, this we need to take into | ||
1023 | * account for lockdep too. To avoid bogus "held lock freed" | ||
1024 | * warnings as well as problems when looking into | ||
1025 | * timer->lockdep_map, make a copy and use that here. | ||
1026 | */ | ||
1027 | struct lockdep_map lockdep_map = timer->lockdep_map; | ||
1028 | #endif | ||
1029 | /* | ||
1030 | * Couple the lock chain with the lock chain at | ||
1031 | * del_timer_sync() by acquiring the lock_map around the fn() | ||
1032 | * call here and in del_timer_sync(). | ||
1033 | */ | ||
1034 | lock_map_acquire(&lockdep_map); | ||
1035 | |||
1036 | trace_timer_expire_entry(timer); | ||
1037 | fn(data); | ||
1038 | trace_timer_expire_exit(timer); | ||
1039 | |||
1040 | lock_map_release(&lockdep_map); | ||
1041 | |||
1042 | if (preempt_count != preempt_count()) { | ||
1043 | WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", | ||
1044 | fn, preempt_count, preempt_count()); | ||
1045 | /* | ||
1046 | * Restore the preempt count. That gives us a decent | ||
1047 | * chance to survive and extract information. If the | ||
1048 | * callback kept a lock held, bad luck, but not worse | ||
1049 | * than the BUG() we had. | ||
1050 | */ | ||
1051 | preempt_count() = preempt_count; | ||
1052 | } | ||
1053 | } | ||
1054 | |||
958 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 1055 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
959 | 1056 | ||
960 | /** | 1057 | /** |
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
998 | detach_timer(timer, 1); | 1095 | detach_timer(timer, 1); |
999 | 1096 | ||
1000 | spin_unlock_irq(&base->lock); | 1097 | spin_unlock_irq(&base->lock); |
1001 | { | 1098 | call_timer_fn(timer, fn, data); |
1002 | int preempt_count = preempt_count(); | ||
1003 | |||
1004 | #ifdef CONFIG_LOCKDEP | ||
1005 | /* | ||
1006 | * It is permissible to free the timer from | ||
1007 | * inside the function that is called from | ||
1008 | * it, this we need to take into account for | ||
1009 | * lockdep too. To avoid bogus "held lock | ||
1010 | * freed" warnings as well as problems when | ||
1011 | * looking into timer->lockdep_map, make a | ||
1012 | * copy and use that here. | ||
1013 | */ | ||
1014 | struct lockdep_map lockdep_map = | ||
1015 | timer->lockdep_map; | ||
1016 | #endif | ||
1017 | /* | ||
1018 | * Couple the lock chain with the lock chain at | ||
1019 | * del_timer_sync() by acquiring the lock_map | ||
1020 | * around the fn() call here and in | ||
1021 | * del_timer_sync(). | ||
1022 | */ | ||
1023 | lock_map_acquire(&lockdep_map); | ||
1024 | |||
1025 | trace_timer_expire_entry(timer); | ||
1026 | fn(data); | ||
1027 | trace_timer_expire_exit(timer); | ||
1028 | |||
1029 | lock_map_release(&lockdep_map); | ||
1030 | |||
1031 | if (preempt_count != preempt_count()) { | ||
1032 | printk(KERN_ERR "huh, entered %p " | ||
1033 | "with preempt_count %08x, exited" | ||
1034 | " with %08x?\n", | ||
1035 | fn, preempt_count, | ||
1036 | preempt_count()); | ||
1037 | BUG(); | ||
1038 | } | ||
1039 | } | ||
1040 | spin_lock_irq(&base->lock); | 1099 | spin_lock_irq(&base->lock); |
1041 | } | 1100 | } |
1042 | } | 1101 | } |