aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 13:53:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 13:53:02 -0400
commit1f6d6e8ebe73ba9d9d4c693f7f6f50f661dbd6e4 (patch)
treebe7a2d20b1728da5a0d844a6f4cd382b2c2569fb
parentdb563fc2e80534f98c7f9121a6f7dfe41f177a79 (diff)
parent268a3dcfea2077fca60d3715caa5c96f9b5e6ea7 (diff)
Merge branch 'v28-range-hrtimers-for-linus-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'v28-range-hrtimers-for-linus-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits) hrtimers: add missing docbook comments to struct hrtimer hrtimers: simplify hrtimer_peek_ahead_timers() hrtimers: fix docbook comments DECLARE_PER_CPU needs linux/percpu.h hrtimers: fix typo rangetimers: fix the bug reported by Ingo for real rangetimer: fix BUG_ON reported by Ingo rangetimer: fix x86 build failure for the !HRTIMERS case select: fix alpha OSF wrapper select: fix alpha OSF wrapper hrtimer: peek at the timer queue just before going idle hrtimer: make the futex() system call use the per process slack value hrtimer: make the nanosleep() syscall use the per process slack hrtimer: fix signed/unsigned bug in slack estimator hrtimer: show the timer ranges in /proc/timer_list hrtimer: incorporate feedback from Peter Zijlstra hrtimer: add a hrtimer_start_range() function hrtimer: another build fix hrtimer: fix build bug found by Ingo hrtimer: make select() and poll() use the hrtimer range feature ...
-rw-r--r--arch/alpha/kernel/osf_sys.c13
-rw-r--r--arch/ia64/kvm/kvm-ia64.c2
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c2
-rw-r--r--arch/x86/kvm/i8254.c8
-rw-r--r--arch/x86/kvm/lapic.c6
-rw-r--r--drivers/cpuidle/cpuidle.c7
-rw-r--r--drivers/s390/crypto/ap_bus.c6
-rw-r--r--fs/compat.c187
-rw-r--r--fs/select.c396
-rw-r--r--fs/timerfd.c8
-rw-r--r--include/linux/hrtimer.h105
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/poll.h8
-rw-r--r--include/linux/prctl.h7
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/thread_info.h8
-rw-r--r--include/linux/time.h4
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex.c11
-rw-r--r--kernel/hrtimer.c206
-rw-r--r--kernel/posix-timers.c10
-rw-r--r--kernel/rtmutex.c3
-rw-r--r--kernel/sched.c7
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/time.c18
-rw-r--r--kernel/time/ntp.c3
-rw-r--r--kernel/time/tick-sched.c25
-rw-r--r--kernel/time/timer_list.c8
-rw-r--r--net/sched/sch_cbq.c7
-rw-r--r--sound/drivers/pcsp/pcsp_lib.c5
30 files changed, 710 insertions, 379 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index f25f6c490952..18a3ea1aac51 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -983,10 +983,12 @@ asmlinkage int
983osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, 983osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
984 struct timeval32 __user *tvp) 984 struct timeval32 __user *tvp)
985{ 985{
986 s64 timeout = MAX_SCHEDULE_TIMEOUT; 986 struct timespec end_time, *to = NULL;
987 if (tvp) { 987 if (tvp) {
988 time_t sec, usec; 988 time_t sec, usec;
989 989
990 to = &end_time;
991
990 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) 992 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
991 || __get_user(sec, &tvp->tv_sec) 993 || __get_user(sec, &tvp->tv_sec)
992 || __get_user(usec, &tvp->tv_usec)) { 994 || __get_user(usec, &tvp->tv_usec)) {
@@ -996,14 +998,13 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
996 if (sec < 0 || usec < 0) 998 if (sec < 0 || usec < 0)
997 return -EINVAL; 999 return -EINVAL;
998 1000
999 if ((unsigned long) sec < MAX_SELECT_SECONDS) { 1001 if (poll_select_set_timeout(to, sec, usec * NSEC_PER_USEC))
1000 timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); 1002 return -EINVAL;
1001 timeout += sec * (unsigned long) HZ; 1003
1002 }
1003 } 1004 }
1004 1005
1005 /* OSF does not copy back the remaining time. */ 1006 /* OSF does not copy back the remaining time. */
1006 return core_sys_select(n, inp, outp, exp, &timeout); 1007 return core_sys_select(n, inp, outp, exp, to);
1007} 1008}
1008 1009
1009struct rusage32 { 1010struct rusage32 {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index c0699f0e35a9..a312c9e9b9ef 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1114,7 +1114,7 @@ static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
1114 struct hrtimer *p_ht = &vcpu->arch.hlt_timer; 1114 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
1115 1115
1116 if (hrtimer_cancel(p_ht)) 1116 if (hrtimer_cancel(p_ht))
1117 hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); 1117 hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
1118} 1118}
1119 1119
1120static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) 1120static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index 6edaebd5099a..dd499c3e9da7 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -195,7 +195,7 @@ int start_spu_profiling(unsigned int cycles_reset)
195 pr_debug("timer resolution: %lu\n", TICK_NSEC); 195 pr_debug("timer resolution: %lu\n", TICK_NSEC);
196 kt = ktime_set(0, profiling_interval); 196 kt = ktime_set(0, profiling_interval);
197 hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 197 hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
198 timer.expires = kt; 198 hrtimer_set_expires(&timer, kt);
199 timer.function = profile_spus; 199 timer.function = profile_spus;
200 200
201 /* Allocate arrays for collecting SPU PC samples */ 201 /* Allocate arrays for collecting SPU PC samples */
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 634132a9a512..11c6725fb798 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -204,10 +204,10 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
204 if (vcpu0 && waitqueue_active(&vcpu0->wq)) 204 if (vcpu0 && waitqueue_active(&vcpu0->wq))
205 wake_up_interruptible(&vcpu0->wq); 205 wake_up_interruptible(&vcpu0->wq);
206 206
207 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); 207 hrtimer_add_expires_ns(&pt->timer, pt->period);
208 pt->scheduled = ktime_to_ns(pt->timer.expires); 208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
209 if (pt->period) 209 if (pt->period)
210 ps->channels[0].count_load_time = pt->timer.expires; 210 ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
211 211
212 return (pt->period == 0 ? 0 : 1); 212 return (pt->period == 0 ? 0 : 1);
213} 213}
@@ -257,7 +257,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
257 257
258 timer = &pit->pit_state.pit_timer.timer; 258 timer = &pit->pit_state.pit_timer.timer;
259 if (hrtimer_cancel(timer)) 259 if (hrtimer_cancel(timer))
260 hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); 260 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
261} 261}
262 262
263static void destroy_pit_timer(struct kvm_kpit_timer *pt) 263static void destroy_pit_timer(struct kvm_kpit_timer *pt)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6571926bfd33..0fc3cab48943 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -946,9 +946,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
946 946
947 if (apic_lvtt_period(apic)) { 947 if (apic_lvtt_period(apic)) {
948 result = 1; 948 result = 1;
949 apic->timer.dev.expires = ktime_add_ns( 949 hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period);
950 apic->timer.dev.expires,
951 apic->timer.period);
952 } 950 }
953 return result; 951 return result;
954} 952}
@@ -1117,7 +1115,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1117 1115
1118 timer = &apic->timer.dev; 1116 timer = &apic->timer.dev;
1119 if (hrtimer_cancel(timer)) 1117 if (hrtimer_cancel(timer))
1120 hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); 1118 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
1121} 1119}
1122 1120
1123void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1121void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index bb6e3b338043..5bed73329ef8 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -16,6 +16,7 @@
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/cpuidle.h> 17#include <linux/cpuidle.h>
18#include <linux/ktime.h> 18#include <linux/ktime.h>
19#include <linux/hrtimer.h>
19 20
20#include "cpuidle.h" 21#include "cpuidle.h"
21 22
@@ -64,6 +65,12 @@ static void cpuidle_idle_call(void)
64 return; 65 return;
65 } 66 }
66 67
68 /*
69 * run any timers that can be run now, at this point
70 * before calculating the idle duration etc.
71 */
72 hrtimer_peek_ahead_timers();
73
67 /* ask the governor for the next state */ 74 /* ask the governor for the next state */
68 next_state = cpuidle_curr_governor->select(dev); 75 next_state = cpuidle_curr_governor->select(dev);
69 if (need_resched()) 76 if (need_resched())
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 326db1e827c4..e3fe6838293a 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -659,9 +659,9 @@ static ssize_t poll_timeout_store(struct bus_type *bus, const char *buf,
659 hr_time = ktime_set(0, poll_timeout); 659 hr_time = ktime_set(0, poll_timeout);
660 660
661 if (!hrtimer_is_queued(&ap_poll_timer) || 661 if (!hrtimer_is_queued(&ap_poll_timer) ||
662 !hrtimer_forward(&ap_poll_timer, ap_poll_timer.expires, hr_time)) { 662 !hrtimer_forward(&ap_poll_timer, hrtimer_get_expires(&ap_poll_timer), hr_time)) {
663 ap_poll_timer.expires = hr_time; 663 hrtimer_set_expires(&ap_poll_timer, hr_time);
664 hrtimer_start(&ap_poll_timer, hr_time, HRTIMER_MODE_ABS); 664 hrtimer_start_expires(&ap_poll_timer, HRTIMER_MODE_ABS);
665 } 665 }
666 return count; 666 return count;
667} 667}
diff --git a/fs/compat.c b/fs/compat.c
index cb36245f9fe0..fe3c9bf87608 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1469,6 +1469,57 @@ out_ret:
1469 1469
1470#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1470#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1471 1471
1472static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1473 int timeval, int ret)
1474{
1475 struct timespec ts;
1476
1477 if (!p)
1478 return ret;
1479
1480 if (current->personality & STICKY_TIMEOUTS)
1481 goto sticky;
1482
1483 /* No update for zero timeout */
1484 if (!end_time->tv_sec && !end_time->tv_nsec)
1485 return ret;
1486
1487 ktime_get_ts(&ts);
1488 ts = timespec_sub(*end_time, ts);
1489 if (ts.tv_sec < 0)
1490 ts.tv_sec = ts.tv_nsec = 0;
1491
1492 if (timeval) {
1493 struct compat_timeval rtv;
1494
1495 rtv.tv_sec = ts.tv_sec;
1496 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1497
1498 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1499 return ret;
1500 } else {
1501 struct compat_timespec rts;
1502
1503 rts.tv_sec = ts.tv_sec;
1504 rts.tv_nsec = ts.tv_nsec;
1505
1506 if (!copy_to_user(p, &rts, sizeof(rts)))
1507 return ret;
1508 }
1509 /*
1510 * If an application puts its timeval in read-only memory, we
1511 * don't want the Linux-specific update to the timeval to
1512 * cause a fault after the select has completed
1513 * successfully. However, because we're not updating the
1514 * timeval, we can't restart the system call.
1515 */
1516
1517sticky:
1518 if (ret == -ERESTARTNOHAND)
1519 ret = -EINTR;
1520 return ret;
1521}
1522
1472/* 1523/*
1473 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1524 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1474 * 64-bit unsigned longs. 1525 * 64-bit unsigned longs.
@@ -1550,7 +1601,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1550 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1601 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1551 1602
1552int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1603int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1553 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) 1604 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1605 struct timespec *end_time)
1554{ 1606{
1555 fd_set_bits fds; 1607 fd_set_bits fds;
1556 void *bits; 1608 void *bits;
@@ -1597,7 +1649,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1597 zero_fd_set(n, fds.res_out); 1649 zero_fd_set(n, fds.res_out);
1598 zero_fd_set(n, fds.res_ex); 1650 zero_fd_set(n, fds.res_ex);
1599 1651
1600 ret = do_select(n, &fds, timeout); 1652 ret = do_select(n, &fds, end_time);
1601 1653
1602 if (ret < 0) 1654 if (ret < 0)
1603 goto out; 1655 goto out;
@@ -1623,7 +1675,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1623 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1624 struct compat_timeval __user *tvp) 1676 struct compat_timeval __user *tvp)
1625{ 1677{
1626 s64 timeout = -1; 1678 struct timespec end_time, *to = NULL;
1627 struct compat_timeval tv; 1679 struct compat_timeval tv;
1628 int ret; 1680 int ret;
1629 1681
@@ -1631,43 +1683,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1631 if (copy_from_user(&tv, tvp, sizeof(tv))) 1683 if (copy_from_user(&tv, tvp, sizeof(tv)))
1632 return -EFAULT; 1684 return -EFAULT;
1633 1685
1634 if (tv.tv_sec < 0 || tv.tv_usec < 0) 1686 to = &end_time;
1687 if (poll_select_set_timeout(to, tv.tv_sec,
1688 tv.tv_usec * NSEC_PER_USEC))
1635 return -EINVAL; 1689 return -EINVAL;
1636
1637 /* Cast to u64 to make GCC stop complaining */
1638 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1639 timeout = -1; /* infinite */
1640 else {
1641 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1642 timeout += tv.tv_sec * HZ;
1643 }
1644 } 1690 }
1645 1691
1646 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1692 ret = compat_core_sys_select(n, inp, outp, exp, to);
1647 1693 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1648 if (tvp) {
1649 struct compat_timeval rtv;
1650
1651 if (current->personality & STICKY_TIMEOUTS)
1652 goto sticky;
1653 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1654 rtv.tv_sec = timeout;
1655 if (compat_timeval_compare(&rtv, &tv) >= 0)
1656 rtv = tv;
1657 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1658sticky:
1659 /*
1660 * If an application puts its timeval in read-only
1661 * memory, we don't want the Linux-specific update to
1662 * the timeval to cause a fault after the select has
1663 * completed successfully. However, because we're not
1664 * updating the timeval, we can't restart the system
1665 * call.
1666 */
1667 if (ret == -ERESTARTNOHAND)
1668 ret = -EINTR;
1669 }
1670 }
1671 1694
1672 return ret; 1695 return ret;
1673} 1696}
@@ -1680,15 +1703,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1680{ 1703{
1681 compat_sigset_t ss32; 1704 compat_sigset_t ss32;
1682 sigset_t ksigmask, sigsaved; 1705 sigset_t ksigmask, sigsaved;
1683 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1684 struct compat_timespec ts; 1706 struct compat_timespec ts;
1707 struct timespec end_time, *to = NULL;
1685 int ret; 1708 int ret;
1686 1709
1687 if (tsp) { 1710 if (tsp) {
1688 if (copy_from_user(&ts, tsp, sizeof(ts))) 1711 if (copy_from_user(&ts, tsp, sizeof(ts)))
1689 return -EFAULT; 1712 return -EFAULT;
1690 1713
1691 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 1714 to = &end_time;
1715 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1692 return -EINVAL; 1716 return -EINVAL;
1693 } 1717 }
1694 1718
@@ -1703,51 +1727,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1703 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1727 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1704 } 1728 }
1705 1729
1706 do { 1730 ret = compat_core_sys_select(n, inp, outp, exp, to);
1707 if (tsp) { 1731 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1708 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1709 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1710 timeout += ts.tv_sec * (unsigned long)HZ;
1711 ts.tv_sec = 0;
1712 ts.tv_nsec = 0;
1713 } else {
1714 ts.tv_sec -= MAX_SELECT_SECONDS;
1715 timeout = MAX_SELECT_SECONDS * HZ;
1716 }
1717 }
1718
1719 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1720
1721 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1722
1723 if (tsp) {
1724 struct compat_timespec rts;
1725
1726 if (current->personality & STICKY_TIMEOUTS)
1727 goto sticky;
1728
1729 rts.tv_sec = timeout / HZ;
1730 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1731 if (rts.tv_nsec >= NSEC_PER_SEC) {
1732 rts.tv_sec++;
1733 rts.tv_nsec -= NSEC_PER_SEC;
1734 }
1735 if (compat_timespec_compare(&rts, &ts) >= 0)
1736 rts = ts;
1737 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1738sticky:
1739 /*
1740 * If an application puts its timeval in read-only
1741 * memory, we don't want the Linux-specific update to
1742 * the timeval to cause a fault after the select has
1743 * completed successfully. However, because we're not
1744 * updating the timeval, we can't restart the system
1745 * call.
1746 */
1747 if (ret == -ERESTARTNOHAND)
1748 ret = -EINTR;
1749 }
1750 }
1751 1732
1752 if (ret == -ERESTARTNOHAND) { 1733 if (ret == -ERESTARTNOHAND) {
1753 /* 1734 /*
@@ -1792,18 +1773,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1792 compat_sigset_t ss32; 1773 compat_sigset_t ss32;
1793 sigset_t ksigmask, sigsaved; 1774 sigset_t ksigmask, sigsaved;
1794 struct compat_timespec ts; 1775 struct compat_timespec ts;
1795 s64 timeout = -1; 1776 struct timespec end_time, *to = NULL;
1796 int ret; 1777 int ret;
1797 1778
1798 if (tsp) { 1779 if (tsp) {
1799 if (copy_from_user(&ts, tsp, sizeof(ts))) 1780 if (copy_from_user(&ts, tsp, sizeof(ts)))
1800 return -EFAULT; 1781 return -EFAULT;
1801 1782
1802 /* We assume that ts.tv_sec is always lower than 1783 to = &end_time;
1803 the number of seconds that can be expressed in 1784 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1804 an s64. Otherwise the compiler bitches at us */ 1785 return -EINVAL;
1805 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1806 timeout += ts.tv_sec * HZ;
1807 } 1786 }
1808 1787
1809 if (sigmask) { 1788 if (sigmask) {
@@ -1817,7 +1796,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1817 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1796 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1818 } 1797 }
1819 1798
1820 ret = do_sys_poll(ufds, nfds, &timeout); 1799 ret = do_sys_poll(ufds, nfds, to);
1821 1800
1822 /* We can restart this syscall, usually */ 1801 /* We can restart this syscall, usually */
1823 if (ret == -EINTR) { 1802 if (ret == -EINTR) {
@@ -1835,31 +1814,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1835 } else if (sigmask) 1814 } else if (sigmask)
1836 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1815 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1837 1816
1838 if (tsp && timeout >= 0) { 1817 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1839 struct compat_timespec rts;
1840
1841 if (current->personality & STICKY_TIMEOUTS)
1842 goto sticky;
1843 /* Yes, we know it's actually an s64, but it's also positive. */
1844 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1845 1000;
1846 rts.tv_sec = timeout;
1847 if (compat_timespec_compare(&rts, &ts) >= 0)
1848 rts = ts;
1849 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1850sticky:
1851 /*
1852 * If an application puts its timeval in read-only
1853 * memory, we don't want the Linux-specific update to
1854 * the timeval to cause a fault after the select has
1855 * completed successfully. However, because we're not
1856 * updating the timeval, we can't restart the system
1857 * call.
1858 */
1859 if (ret == -ERESTARTNOHAND && timeout >= 0)
1860 ret = -EINTR;
1861 }
1862 }
1863 1818
1864 return ret; 1819 return ret;
1865} 1820}
diff --git a/fs/select.c b/fs/select.c
index da0e88201c3a..448e44001286 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,9 +24,64 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
31
32/*
33 * Estimate expected accuracy in ns from a timeval.
34 *
35 * After quite a bit of churning around, we've settled on
36 * a simple thing of taking 0.1% of the timeout as the
37 * slack, with a cap of 100 msec.
38 * "nice" tasks get a 0.5% slack instead.
39 *
40 * Consider this comment an open invitation to come up with even
41 * better solutions..
42 */
43
44static long __estimate_accuracy(struct timespec *tv)
45{
46 long slack;
47 int divfactor = 1000;
48
49 if (task_nice(current) > 0)
50 divfactor = divfactor / 5;
51
52 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54
55 if (slack > 100 * NSEC_PER_MSEC)
56 slack = 100 * NSEC_PER_MSEC;
57
58 if (slack < 0)
59 slack = 0;
60 return slack;
61}
62
63static long estimate_accuracy(struct timespec *tv)
64{
65 unsigned long ret;
66 struct timespec now;
67
68 /*
69 * Realtime tasks get a slack of 0 for obvious reasons.
70 */
71
72 if (rt_task(current))
73 return 0;
74
75 ktime_get_ts(&now);
76 now = timespec_sub(*tv, now);
77 ret = __estimate_accuracy(&now);
78 if (ret < current->timer_slack_ns)
79 return current->timer_slack_ns;
80 return ret;
81}
82
83
84
30struct poll_table_page { 85struct poll_table_page {
31 struct poll_table_page * next; 86 struct poll_table_page * next;
32 struct poll_table_entry * entry; 87 struct poll_table_entry * entry;
@@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
130 add_wait_queue(wait_address, &entry->wait); 185 add_wait_queue(wait_address, &entry->wait);
131} 186}
132 187
188/**
189 * poll_select_set_timeout - helper function to setup the timeout value
190 * @to: pointer to timespec variable for the final timeout
191 * @sec: seconds (from user space)
192 * @nsec: nanoseconds (from user space)
193 *
194 * Note, we do not use a timespec for the user space value here, That
195 * way we can use the function for timeval and compat interfaces as well.
196 *
197 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
198 */
199int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
200{
201 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
202
203 if (!timespec_valid(&ts))
204 return -EINVAL;
205
206 /* Optimize for the zero timeout value here */
207 if (!sec && !nsec) {
208 to->tv_sec = to->tv_nsec = 0;
209 } else {
210 ktime_get_ts(to);
211 *to = timespec_add_safe(*to, ts);
212 }
213 return 0;
214}
215
216static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
217 int timeval, int ret)
218{
219 struct timespec rts;
220 struct timeval rtv;
221
222 if (!p)
223 return ret;
224
225 if (current->personality & STICKY_TIMEOUTS)
226 goto sticky;
227
228 /* No update for zero timeout */
229 if (!end_time->tv_sec && !end_time->tv_nsec)
230 return ret;
231
232 ktime_get_ts(&rts);
233 rts = timespec_sub(*end_time, rts);
234 if (rts.tv_sec < 0)
235 rts.tv_sec = rts.tv_nsec = 0;
236
237 if (timeval) {
238 rtv.tv_sec = rts.tv_sec;
239 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
240
241 if (!copy_to_user(p, &rtv, sizeof(rtv)))
242 return ret;
243
244 } else if (!copy_to_user(p, &rts, sizeof(rts)))
245 return ret;
246
247 /*
248 * If an application puts its timeval in read-only memory, we
249 * don't want the Linux-specific update to the timeval to
250 * cause a fault after the select has completed
251 * successfully. However, because we're not updating the
252 * timeval, we can't restart the system call.
253 */
254
255sticky:
256 if (ret == -ERESTARTNOHAND)
257 ret = -EINTR;
258 return ret;
259}
260
133#define FDS_IN(fds, n) (fds->in + n) 261#define FDS_IN(fds, n) (fds->in + n)
134#define FDS_OUT(fds, n) (fds->out + n) 262#define FDS_OUT(fds, n) (fds->out + n)
135#define FDS_EX(fds, n) (fds->ex + n) 263#define FDS_EX(fds, n) (fds->ex + n)
@@ -182,11 +310,13 @@ get_max:
182#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 310#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
183#define POLLEX_SET (POLLPRI) 311#define POLLEX_SET (POLLPRI)
184 312
185int do_select(int n, fd_set_bits *fds, s64 *timeout) 313int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
186{ 314{
315 ktime_t expire, *to = NULL;
187 struct poll_wqueues table; 316 struct poll_wqueues table;
188 poll_table *wait; 317 poll_table *wait;
189 int retval, i; 318 int retval, i, timed_out = 0;
319 unsigned long slack = 0;
190 320
191 rcu_read_lock(); 321 rcu_read_lock();
192 retval = max_select_fd(n, fds); 322 retval = max_select_fd(n, fds);
@@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
198 328
199 poll_initwait(&table); 329 poll_initwait(&table);
200 wait = &table.pt; 330 wait = &table.pt;
201 if (!*timeout) 331 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
202 wait = NULL; 332 wait = NULL;
333 timed_out = 1;
334 }
335
336 if (end_time && !timed_out)
337 slack = estimate_accuracy(end_time);
338
203 retval = 0; 339 retval = 0;
204 for (;;) { 340 for (;;) {
205 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 341 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
206 long __timeout;
207 342
208 set_current_state(TASK_INTERRUPTIBLE); 343 set_current_state(TASK_INTERRUPTIBLE);
209 344
@@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
259 cond_resched(); 394 cond_resched();
260 } 395 }
261 wait = NULL; 396 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 397 if (retval || timed_out || signal_pending(current))
263 break; 398 break;
264 if (table.error) { 399 if (table.error) {
265 retval = table.error; 400 retval = table.error;
266 break; 401 break;
267 } 402 }
268 403
269 if (*timeout < 0) { 404 /*
270 /* Wait indefinitely */ 405 * If this is the first loop and we have a timeout
271 __timeout = MAX_SCHEDULE_TIMEOUT; 406 * given, then we convert to ktime_t and set the to
272 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 407 * pointer to the expiry value.
273 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 408 */
274 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 409 if (end_time && !to) {
275 *timeout -= __timeout; 410 expire = timespec_to_ktime(*end_time);
276 } else { 411 to = &expire;
277 __timeout = *timeout;
278 *timeout = 0;
279 } 412 }
280 __timeout = schedule_timeout(__timeout); 413
281 if (*timeout >= 0) 414 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
282 *timeout += __timeout; 415 timed_out = 1;
283 } 416 }
284 __set_current_state(TASK_RUNNING); 417 __set_current_state(TASK_RUNNING);
285 418
@@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 433 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
301 434
302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 435int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
303 fd_set __user *exp, s64 *timeout) 436 fd_set __user *exp, struct timespec *end_time)
304{ 437{
305 fd_set_bits fds; 438 fd_set_bits fds;
306 void *bits; 439 void *bits;
@@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
351 zero_fd_set(n, fds.res_out); 484 zero_fd_set(n, fds.res_out);
352 zero_fd_set(n, fds.res_ex); 485 zero_fd_set(n, fds.res_ex);
353 486
354 ret = do_select(n, &fds, timeout); 487 ret = do_select(n, &fds, end_time);
355 488
356 if (ret < 0) 489 if (ret < 0)
357 goto out; 490 goto out;
@@ -377,7 +510,7 @@ out_nofds:
377asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 510asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, struct timeval __user *tvp) 511 fd_set __user *exp, struct timeval __user *tvp)
379{ 512{
380 s64 timeout = -1; 513 struct timespec end_time, *to = NULL;
381 struct timeval tv; 514 struct timeval tv;
382 int ret; 515 int ret;
383 516
@@ -385,43 +518,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
385 if (copy_from_user(&tv, tvp, sizeof(tv))) 518 if (copy_from_user(&tv, tvp, sizeof(tv)))
386 return -EFAULT; 519 return -EFAULT;
387 520
388 if (tv.tv_sec < 0 || tv.tv_usec < 0) 521 to = &end_time;
522 if (poll_select_set_timeout(to, tv.tv_sec,
523 tv.tv_usec * NSEC_PER_USEC))
389 return -EINVAL; 524 return -EINVAL;
390
391 /* Cast to u64 to make GCC stop complaining */
392 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
393 timeout = -1; /* infinite */
394 else {
395 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
396 timeout += tv.tv_sec * HZ;
397 }
398 } 525 }
399 526
400 ret = core_sys_select(n, inp, outp, exp, &timeout); 527 ret = core_sys_select(n, inp, outp, exp, to);
401 528 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
402 if (tvp) {
403 struct timeval rtv;
404
405 if (current->personality & STICKY_TIMEOUTS)
406 goto sticky;
407 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
408 rtv.tv_sec = timeout;
409 if (timeval_compare(&rtv, &tv) >= 0)
410 rtv = tv;
411 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
412sticky:
413 /*
414 * If an application puts its timeval in read-only
415 * memory, we don't want the Linux-specific update to
416 * the timeval to cause a fault after the select has
417 * completed successfully. However, because we're not
418 * updating the timeval, we can't restart the system
419 * call.
420 */
421 if (ret == -ERESTARTNOHAND)
422 ret = -EINTR;
423 }
424 }
425 529
426 return ret; 530 return ret;
427} 531}
@@ -431,25 +535,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
431 fd_set __user *exp, struct timespec __user *tsp, 535 fd_set __user *exp, struct timespec __user *tsp,
432 const sigset_t __user *sigmask, size_t sigsetsize) 536 const sigset_t __user *sigmask, size_t sigsetsize)
433{ 537{
434 s64 timeout = MAX_SCHEDULE_TIMEOUT;
435 sigset_t ksigmask, sigsaved; 538 sigset_t ksigmask, sigsaved;
436 struct timespec ts; 539 struct timespec ts, end_time, *to = NULL;
437 int ret; 540 int ret;
438 541
439 if (tsp) { 542 if (tsp) {
440 if (copy_from_user(&ts, tsp, sizeof(ts))) 543 if (copy_from_user(&ts, tsp, sizeof(ts)))
441 return -EFAULT; 544 return -EFAULT;
442 545
443 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 546 to = &end_time;
547 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
444 return -EINVAL; 548 return -EINVAL;
445
446 /* Cast to u64 to make GCC stop complaining */
447 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
448 timeout = -1; /* infinite */
449 else {
450 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
451 timeout += ts.tv_sec * HZ;
452 }
453 } 549 }
454 550
455 if (sigmask) { 551 if (sigmask) {
@@ -463,32 +559,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
463 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 559 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
464 } 560 }
465 561
466 ret = core_sys_select(n, inp, outp, exp, &timeout); 562 ret = core_sys_select(n, inp, outp, exp, &end_time);
467 563 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
468 if (tsp) {
469 struct timespec rts;
470
471 if (current->personality & STICKY_TIMEOUTS)
472 goto sticky;
473 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
474 1000;
475 rts.tv_sec = timeout;
476 if (timespec_compare(&rts, &ts) >= 0)
477 rts = ts;
478 if (copy_to_user(tsp, &rts, sizeof(rts))) {
479sticky:
480 /*
481 * If an application puts its timeval in read-only
482 * memory, we don't want the Linux-specific update to
483 * the timeval to cause a fault after the select has
484 * completed successfully. However, because we're not
485 * updating the timeval, we can't restart the system
486 * call.
487 */
488 if (ret == -ERESTARTNOHAND)
489 ret = -EINTR;
490 }
491 }
492 564
493 if (ret == -ERESTARTNOHAND) { 565 if (ret == -ERESTARTNOHAND) {
494 /* 566 /*
@@ -574,18 +646,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
574} 646}
575 647
576static int do_poll(unsigned int nfds, struct poll_list *list, 648static int do_poll(unsigned int nfds, struct poll_list *list,
577 struct poll_wqueues *wait, s64 *timeout) 649 struct poll_wqueues *wait, struct timespec *end_time)
578{ 650{
579 int count = 0;
580 poll_table* pt = &wait->pt; 651 poll_table* pt = &wait->pt;
652 ktime_t expire, *to = NULL;
653 int timed_out = 0, count = 0;
654 unsigned long slack = 0;
581 655
582 /* Optimise the no-wait case */ 656 /* Optimise the no-wait case */
583 if (!(*timeout)) 657 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
584 pt = NULL; 658 pt = NULL;
659 timed_out = 1;
660 }
661
662 if (end_time && !timed_out)
663 slack = estimate_accuracy(end_time);
585 664
586 for (;;) { 665 for (;;) {
587 struct poll_list *walk; 666 struct poll_list *walk;
588 long __timeout;
589 667
590 set_current_state(TASK_INTERRUPTIBLE); 668 set_current_state(TASK_INTERRUPTIBLE);
591 for (walk = list; walk != NULL; walk = walk->next) { 669 for (walk = list; walk != NULL; walk = walk->next) {
@@ -617,27 +695,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
617 if (signal_pending(current)) 695 if (signal_pending(current))
618 count = -EINTR; 696 count = -EINTR;
619 } 697 }
620 if (count || !*timeout) 698 if (count || timed_out)
621 break; 699 break;
622 700
623 if (*timeout < 0) { 701 /*
624 /* Wait indefinitely */ 702 * If this is the first loop and we have a timeout
625 __timeout = MAX_SCHEDULE_TIMEOUT; 703 * given, then we convert to ktime_t and set the to
626 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 704 * pointer to the expiry value.
627 /* 705 */
628 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 706 if (end_time && !to) {
629 * a loop 707 expire = timespec_to_ktime(*end_time);
630 */ 708 to = &expire;
631 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
632 *timeout -= __timeout;
633 } else {
634 __timeout = *timeout;
635 *timeout = 0;
636 } 709 }
637 710
638 __timeout = schedule_timeout(__timeout); 711 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
639 if (*timeout >= 0) 712 timed_out = 1;
640 *timeout += __timeout;
641 } 713 }
642 __set_current_state(TASK_RUNNING); 714 __set_current_state(TASK_RUNNING);
643 return count; 715 return count;
@@ -646,7 +718,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
646#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 718#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
647 sizeof(struct pollfd)) 719 sizeof(struct pollfd))
648 720
649int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 721int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
722 struct timespec *end_time)
650{ 723{
651 struct poll_wqueues table; 724 struct poll_wqueues table;
652 int err = -EFAULT, fdcount, len, size; 725 int err = -EFAULT, fdcount, len, size;
@@ -686,7 +759,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
686 } 759 }
687 760
688 poll_initwait(&table); 761 poll_initwait(&table);
689 fdcount = do_poll(nfds, head, &table, timeout); 762 fdcount = do_poll(nfds, head, &table, end_time);
690 poll_freewait(&table); 763 poll_freewait(&table);
691 764
692 for (walk = head; walk; walk = walk->next) { 765 for (walk = head; walk; walk = walk->next) {
@@ -712,16 +785,21 @@ out_fds:
712 785
713static long do_restart_poll(struct restart_block *restart_block) 786static long do_restart_poll(struct restart_block *restart_block)
714{ 787{
715 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 788 struct pollfd __user *ufds = restart_block->poll.ufds;
716 int nfds = restart_block->arg1; 789 int nfds = restart_block->poll.nfds;
717 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 790 struct timespec *to = NULL, end_time;
718 int ret; 791 int ret;
719 792
720 ret = do_sys_poll(ufds, nfds, &timeout); 793 if (restart_block->poll.has_timeout) {
794 end_time.tv_sec = restart_block->poll.tv_sec;
795 end_time.tv_nsec = restart_block->poll.tv_nsec;
796 to = &end_time;
797 }
798
799 ret = do_sys_poll(ufds, nfds, to);
800
721 if (ret == -EINTR) { 801 if (ret == -EINTR) {
722 restart_block->fn = do_restart_poll; 802 restart_block->fn = do_restart_poll;
723 restart_block->arg2 = timeout & 0xFFFFFFFF;
724 restart_block->arg3 = (u64)timeout >> 32;
725 ret = -ERESTART_RESTARTBLOCK; 803 ret = -ERESTART_RESTARTBLOCK;
726 } 804 }
727 return ret; 805 return ret;
@@ -730,31 +808,32 @@ static long do_restart_poll(struct restart_block *restart_block)
730asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 808asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
731 long timeout_msecs) 809 long timeout_msecs)
732{ 810{
733 s64 timeout_jiffies; 811 struct timespec end_time, *to = NULL;
734 int ret; 812 int ret;
735 813
736 if (timeout_msecs > 0) { 814 if (timeout_msecs >= 0) {
737#if HZ > 1000 815 to = &end_time;
738 /* We can only overflow if HZ > 1000 */ 816 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
739 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 817 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
740 timeout_jiffies = -1;
741 else
742#endif
743 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
744 } else {
745 /* Infinite (< 0) or no (0) timeout */
746 timeout_jiffies = timeout_msecs;
747 } 818 }
748 819
749 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 820 ret = do_sys_poll(ufds, nfds, to);
821
750 if (ret == -EINTR) { 822 if (ret == -EINTR) {
751 struct restart_block *restart_block; 823 struct restart_block *restart_block;
824
752 restart_block = &current_thread_info()->restart_block; 825 restart_block = &current_thread_info()->restart_block;
753 restart_block->fn = do_restart_poll; 826 restart_block->fn = do_restart_poll;
754 restart_block->arg0 = (unsigned long)ufds; 827 restart_block->poll.ufds = ufds;
755 restart_block->arg1 = nfds; 828 restart_block->poll.nfds = nfds;
756 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 829
757 restart_block->arg3 = (u64)timeout_jiffies >> 32; 830 if (timeout_msecs >= 0) {
831 restart_block->poll.tv_sec = end_time.tv_sec;
832 restart_block->poll.tv_nsec = end_time.tv_nsec;
833 restart_block->poll.has_timeout = 1;
834 } else
835 restart_block->poll.has_timeout = 0;
836
758 ret = -ERESTART_RESTARTBLOCK; 837 ret = -ERESTART_RESTARTBLOCK;
759 } 838 }
760 return ret; 839 return ret;
@@ -766,21 +845,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
766 size_t sigsetsize) 845 size_t sigsetsize)
767{ 846{
768 sigset_t ksigmask, sigsaved; 847 sigset_t ksigmask, sigsaved;
769 struct timespec ts; 848 struct timespec ts, end_time, *to = NULL;
770 s64 timeout = -1;
771 int ret; 849 int ret;
772 850
773 if (tsp) { 851 if (tsp) {
774 if (copy_from_user(&ts, tsp, sizeof(ts))) 852 if (copy_from_user(&ts, tsp, sizeof(ts)))
775 return -EFAULT; 853 return -EFAULT;
776 854
777 /* Cast to u64 to make GCC stop complaining */ 855 to = &end_time;
778 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 856 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
779 timeout = -1; /* infinite */ 857 return -EINVAL;
780 else {
781 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
782 timeout += ts.tv_sec * HZ;
783 }
784 } 858 }
785 859
786 if (sigmask) { 860 if (sigmask) {
@@ -794,7 +868,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
794 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 868 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
795 } 869 }
796 870
797 ret = do_sys_poll(ufds, nfds, &timeout); 871 ret = do_sys_poll(ufds, nfds, to);
798 872
799 /* We can restart this syscall, usually */ 873 /* We can restart this syscall, usually */
800 if (ret == -EINTR) { 874 if (ret == -EINTR) {
@@ -812,31 +886,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
812 } else if (sigmask) 886 } else if (sigmask)
813 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 887 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
814 888
815 if (tsp && timeout >= 0) { 889 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
816 struct timespec rts;
817
818 if (current->personality & STICKY_TIMEOUTS)
819 goto sticky;
820 /* Yes, we know it's actually an s64, but it's also positive. */
821 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
822 1000;
823 rts.tv_sec = timeout;
824 if (timespec_compare(&rts, &ts) >= 0)
825 rts = ts;
826 if (copy_to_user(tsp, &rts, sizeof(rts))) {
827 sticky:
828 /*
829 * If an application puts its timeval in read-only
830 * memory, we don't want the Linux-specific update to
831 * the timeval to cause a fault after the select has
832 * completed successfully. However, because we're not
833 * updating the timeval, we can't restart the system
834 * call.
835 */
836 if (ret == -ERESTARTNOHAND && timeout >= 0)
837 ret = -EINTR;
838 }
839 }
840 890
841 return ret; 891 return ret;
842} 892}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c502c60e4f54..0862f0e49d0c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
52 52
53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
54{ 54{
55 ktime_t now, remaining; 55 ktime_t remaining;
56
57 now = ctx->tmr.base->get_time();
58 remaining = ktime_sub(ctx->tmr.expires, now);
59 56
57 remaining = hrtimer_expires_remaining(&ctx->tmr);
60 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 58 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
61} 59}
62 60
@@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
74 ctx->ticks = 0; 72 ctx->ticks = 0;
75 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 73 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
76 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 74 hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
77 ctx->tmr.expires = texp; 75 hrtimer_set_expires(&ctx->tmr, texp);
78 ctx->tmr.function = timerfd_tmrproc; 76 ctx->tmr.function = timerfd_tmrproc;
79 if (texp.tv64 != 0) 77 if (texp.tv64 != 0)
80 hrtimer_start(&ctx->tmr, texp, htmode); 78 hrtimer_start(&ctx->tmr, texp, htmode);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 9a4e35cd5f79..2b3645b1acf4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -20,6 +20,8 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/wait.h> 22#include <linux/wait.h>
23#include <linux/percpu.h>
24
23 25
24struct hrtimer_clock_base; 26struct hrtimer_clock_base;
25struct hrtimer_cpu_base; 27struct hrtimer_cpu_base;
@@ -101,9 +103,14 @@ enum hrtimer_cb_mode {
101/** 103/**
102 * struct hrtimer - the basic hrtimer structure 104 * struct hrtimer - the basic hrtimer structure
103 * @node: red black tree node for time ordered insertion 105 * @node: red black tree node for time ordered insertion
104 * @expires: the absolute expiry time in the hrtimers internal 106 * @_expires: the absolute expiry time in the hrtimers internal
105 * representation. The time is related to the clock on 107 * representation. The time is related to the clock on
106 * which the timer is based. 108 * which the timer is based. Is setup by adding
109 * slack to the _softexpires value. For non range timers
110 * identical to _softexpires.
111 * @_softexpires: the absolute earliest expiry time of the hrtimer.
112 * The time which was given as expiry time when the timer
113 * was armed.
107 * @function: timer expiry callback function 114 * @function: timer expiry callback function
108 * @base: pointer to the timer base (per cpu and per clock) 115 * @base: pointer to the timer base (per cpu and per clock)
109 * @state: state information (See bit values above) 116 * @state: state information (See bit values above)
@@ -121,7 +128,8 @@ enum hrtimer_cb_mode {
121 */ 128 */
122struct hrtimer { 129struct hrtimer {
123 struct rb_node node; 130 struct rb_node node;
124 ktime_t expires; 131 ktime_t _expires;
132 ktime_t _softexpires;
125 enum hrtimer_restart (*function)(struct hrtimer *); 133 enum hrtimer_restart (*function)(struct hrtimer *);
126 struct hrtimer_clock_base *base; 134 struct hrtimer_clock_base *base;
127 unsigned long state; 135 unsigned long state;
@@ -201,6 +209,71 @@ struct hrtimer_cpu_base {
201#endif 209#endif
202}; 210};
203 211
212static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
213{
214 timer->_expires = time;
215 timer->_softexpires = time;
216}
217
218static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
219{
220 timer->_softexpires = time;
221 timer->_expires = ktime_add_safe(time, delta);
222}
223
224static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
225{
226 timer->_softexpires = time;
227 timer->_expires = ktime_add_safe(time, ns_to_ktime(delta));
228}
229
230static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
231{
232 timer->_expires.tv64 = tv64;
233 timer->_softexpires.tv64 = tv64;
234}
235
236static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
237{
238 timer->_expires = ktime_add_safe(timer->_expires, time);
239 timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
240}
241
242static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns)
243{
244 timer->_expires = ktime_add_ns(timer->_expires, ns);
245 timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
246}
247
248static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
249{
250 return timer->_expires;
251}
252
253static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
254{
255 return timer->_softexpires;
256}
257
258static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
259{
260 return timer->_expires.tv64;
261}
262static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
263{
264 return timer->_softexpires.tv64;
265}
266
267static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
268{
269 return ktime_to_ns(timer->_expires);
270}
271
272static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
273{
274 return ktime_sub(timer->_expires, timer->base->get_time());
275}
276
204#ifdef CONFIG_HIGH_RES_TIMERS 277#ifdef CONFIG_HIGH_RES_TIMERS
205struct clock_event_device; 278struct clock_event_device;
206 279
@@ -221,6 +294,8 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
221 return timer->base->cpu_base->hres_active; 294 return timer->base->cpu_base->hres_active;
222} 295}
223 296
297extern void hrtimer_peek_ahead_timers(void);
298
224/* 299/*
225 * The resolution of the clocks. The resolution value is returned in 300 * The resolution of the clocks. The resolution value is returned in
226 * the clock_getres() system call to give application programmers an 301 * the clock_getres() system call to give application programmers an
@@ -243,6 +318,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
243 * is expired in the next softirq when the clock was advanced. 318 * is expired in the next softirq when the clock was advanced.
244 */ 319 */
245static inline void clock_was_set(void) { } 320static inline void clock_was_set(void) { }
321static inline void hrtimer_peek_ahead_timers(void) { }
246 322
247static inline void hres_timers_resume(void) { } 323static inline void hres_timers_resume(void) { }
248 324
@@ -264,6 +340,10 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
264extern ktime_t ktime_get(void); 340extern ktime_t ktime_get(void);
265extern ktime_t ktime_get_real(void); 341extern ktime_t ktime_get_real(void);
266 342
343
344DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
345
346
267/* Exported timer functions: */ 347/* Exported timer functions: */
268 348
269/* Initialize timers: */ 349/* Initialize timers: */
@@ -288,12 +368,25 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
288/* Basic timer operations: */ 368/* Basic timer operations: */
289extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, 369extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
290 const enum hrtimer_mode mode); 370 const enum hrtimer_mode mode);
371extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
372 unsigned long range_ns, const enum hrtimer_mode mode);
291extern int hrtimer_cancel(struct hrtimer *timer); 373extern int hrtimer_cancel(struct hrtimer *timer);
292extern int hrtimer_try_to_cancel(struct hrtimer *timer); 374extern int hrtimer_try_to_cancel(struct hrtimer *timer);
293 375
376static inline int hrtimer_start_expires(struct hrtimer *timer,
377 enum hrtimer_mode mode)
378{
379 unsigned long delta;
380 ktime_t soft, hard;
381 soft = hrtimer_get_softexpires(timer);
382 hard = hrtimer_get_expires(timer);
383 delta = ktime_to_ns(ktime_sub(hard, soft));
384 return hrtimer_start_range_ns(timer, soft, delta, mode);
385}
386
294static inline int hrtimer_restart(struct hrtimer *timer) 387static inline int hrtimer_restart(struct hrtimer *timer)
295{ 388{
296 return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); 389 return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
297} 390}
298 391
299/* Query timers: */ 392/* Query timers: */
@@ -350,6 +443,10 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
350extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, 443extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
351 struct task_struct *tsk); 444 struct task_struct *tsk);
352 445
446extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
447 const enum hrtimer_mode mode);
448extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
449
353/* Soft interrupt function to run the hrtimer queues: */ 450/* Soft interrupt function to run the hrtimer queues: */
354extern void hrtimer_run_queues(void); 451extern void hrtimer_run_queues(void);
355extern void hrtimer_run_pending(void); 452extern void hrtimer_run_pending(void);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 021d8e720c79..23fd8909b9e5 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -170,6 +170,7 @@ extern struct group_info init_groups;
170 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 170 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
171 .fs_excl = ATOMIC_INIT(0), \ 171 .fs_excl = ATOMIC_INIT(0), \
172 .pi_lock = __SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ 172 .pi_lock = __SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
173 .timer_slack_ns = 50000, /* 50 usec default slack */ \
173 .pids = { \ 174 .pids = { \
174 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ 175 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
175 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ 176 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
diff --git a/include/linux/poll.h b/include/linux/poll.h
index ef453828877a..badd98ab06f6 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -114,11 +114,13 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset)
114 114
115#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) 115#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
116 116
117extern int do_select(int n, fd_set_bits *fds, s64 *timeout); 117extern int do_select(int n, fd_set_bits *fds, struct timespec *end_time);
118extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, 118extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
119 s64 *timeout); 119 struct timespec *end_time);
120extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 120extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
121 fd_set __user *exp, s64 *timeout); 121 fd_set __user *exp, struct timespec *end_time);
122
123extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec);
122 124
123#endif /* KERNEL */ 125#endif /* KERNEL */
124 126
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 5ad79198d6f9..48d887e3c6e7 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -78,4 +78,11 @@
78#define PR_GET_SECUREBITS 27 78#define PR_GET_SECUREBITS 27
79#define PR_SET_SECUREBITS 28 79#define PR_SET_SECUREBITS 28
80 80
81/*
82 * Get/set the timerslack as used by poll/select/nanosleep
83 * A value of 0 means "use default"
84 */
85#define PR_SET_TIMERSLACK 29
86#define PR_GET_TIMERSLACK 30
87
81#endif /* _LINUX_PRCTL_H */ 88#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 10bff55b0824..5ca620573d47 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1345,6 +1345,12 @@ struct task_struct {
1345 int latency_record_count; 1345 int latency_record_count;
1346 struct latency_record latency_record[LT_SAVECOUNT]; 1346 struct latency_record latency_record[LT_SAVECOUNT];
1347#endif 1347#endif
1348 /*
1349 * time slack values; these are used to round up poll() and
1350 * select() etc timeout values. These are in nanoseconds.
1351 */
1352 unsigned long timer_slack_ns;
1353 unsigned long default_timer_slack_ns;
1348}; 1354};
1349 1355
1350/* 1356/*
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 38a56477f27a..e6b820f8b56b 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -38,6 +38,14 @@ struct restart_block {
38#endif 38#endif
39 u64 expires; 39 u64 expires;
40 } nanosleep; 40 } nanosleep;
41 /* For poll */
42 struct {
43 struct pollfd __user *ufds;
44 int nfds;
45 int has_timeout;
46 unsigned long tv_sec;
47 unsigned long tv_nsec;
48 } poll;
41 }; 49 };
42}; 50};
43 51
diff --git a/include/linux/time.h b/include/linux/time.h
index 4f1c9db57707..ce321ac5c8f8 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -40,6 +40,8 @@ extern struct timezone sys_tz;
40#define NSEC_PER_SEC 1000000000L 40#define NSEC_PER_SEC 1000000000L
41#define FSEC_PER_SEC 1000000000000000L 41#define FSEC_PER_SEC 1000000000000000L
42 42
43#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
44
43static inline int timespec_equal(const struct timespec *a, 45static inline int timespec_equal(const struct timespec *a,
44 const struct timespec *b) 46 const struct timespec *b)
45{ 47{
@@ -74,6 +76,8 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
74 const unsigned int min, const unsigned int sec); 76 const unsigned int min, const unsigned int sec);
75 77
76extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); 78extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
79extern struct timespec timespec_add_safe(const struct timespec lhs,
80 const struct timespec rhs);
77 81
78/* 82/*
79 * sub = lhs - rhs, in normalized form 83 * sub = lhs - rhs, in normalized form
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d093552dd6e..f6083561dfe0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1018,6 +1018,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1018 p->prev_utime = cputime_zero; 1018 p->prev_utime = cputime_zero;
1019 p->prev_stime = cputime_zero; 1019 p->prev_stime = cputime_zero;
1020 1020
1021 p->default_timer_slack_ns = current->timer_slack_ns;
1022
1021#ifdef CONFIG_DETECT_SOFTLOCKUP 1023#ifdef CONFIG_DETECT_SOFTLOCKUP
1022 p->last_switch_count = 0; 1024 p->last_switch_count = 0;
1023 p->last_switch_timestamp = 0; 1025 p->last_switch_timestamp = 0;
diff --git a/kernel/futex.c b/kernel/futex.c
index 7d1136e97c14..8af10027514b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1296,13 +1296,16 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1296 if (!abs_time) 1296 if (!abs_time)
1297 schedule(); 1297 schedule();
1298 else { 1298 else {
1299 unsigned long slack;
1300 slack = current->timer_slack_ns;
1301 if (rt_task(current))
1302 slack = 0;
1299 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, 1303 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
1300 HRTIMER_MODE_ABS); 1304 HRTIMER_MODE_ABS);
1301 hrtimer_init_sleeper(&t, current); 1305 hrtimer_init_sleeper(&t, current);
1302 t.timer.expires = *abs_time; 1306 hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
1303 1307
1304 hrtimer_start(&t.timer, t.timer.expires, 1308 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
1305 HRTIMER_MODE_ABS);
1306 if (!hrtimer_active(&t.timer)) 1309 if (!hrtimer_active(&t.timer))
1307 t.task = NULL; 1310 t.task = NULL;
1308 1311
@@ -1404,7 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1404 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, 1407 hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
1405 HRTIMER_MODE_ABS); 1408 HRTIMER_MODE_ABS);
1406 hrtimer_init_sleeper(to, current); 1409 hrtimer_init_sleeper(to, current);
1407 to->timer.expires = *time; 1410 hrtimer_set_expires(&to->timer, *time);
1408 } 1411 }
1409 1412
1410 q.pi_state = NULL; 1413 q.pi_state = NULL;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 95978f48e039..2b465dfde426 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
517 if (!base->first) 517 if (!base->first)
518 continue; 518 continue;
519 timer = rb_entry(base->first, struct hrtimer, node); 519 timer = rb_entry(base->first, struct hrtimer, node);
520 expires = ktime_sub(timer->expires, base->offset); 520 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
521 if (expires.tv64 < cpu_base->expires_next.tv64) 521 if (expires.tv64 < cpu_base->expires_next.tv64)
522 cpu_base->expires_next = expires; 522 cpu_base->expires_next = expires;
523 } 523 }
@@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer,
539 struct hrtimer_clock_base *base) 539 struct hrtimer_clock_base *base)
540{ 540{
541 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; 541 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
542 ktime_t expires = ktime_sub(timer->expires, base->offset); 542 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
543 int res; 543 int res;
544 544
545 WARN_ON_ONCE(timer->expires.tv64 < 0); 545 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
546 546
547 /* 547 /*
548 * When the callback is running, we do not reprogram the clock event 548 * When the callback is running, we do not reprogram the clock event
@@ -795,7 +795,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
795 u64 orun = 1; 795 u64 orun = 1;
796 ktime_t delta; 796 ktime_t delta;
797 797
798 delta = ktime_sub(now, timer->expires); 798 delta = ktime_sub(now, hrtimer_get_expires(timer));
799 799
800 if (delta.tv64 < 0) 800 if (delta.tv64 < 0)
801 return 0; 801 return 0;
@@ -807,8 +807,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
807 s64 incr = ktime_to_ns(interval); 807 s64 incr = ktime_to_ns(interval);
808 808
809 orun = ktime_divns(delta, incr); 809 orun = ktime_divns(delta, incr);
810 timer->expires = ktime_add_ns(timer->expires, incr * orun); 810 hrtimer_add_expires_ns(timer, incr * orun);
811 if (timer->expires.tv64 > now.tv64) 811 if (hrtimer_get_expires_tv64(timer) > now.tv64)
812 return orun; 812 return orun;
813 /* 813 /*
814 * This (and the ktime_add() below) is the 814 * This (and the ktime_add() below) is the
@@ -816,7 +816,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
816 */ 816 */
817 orun++; 817 orun++;
818 } 818 }
819 timer->expires = ktime_add_safe(timer->expires, interval); 819 hrtimer_add_expires(timer, interval);
820 820
821 return orun; 821 return orun;
822} 822}
@@ -848,7 +848,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
848 * We dont care about collisions. Nodes with 848 * We dont care about collisions. Nodes with
849 * the same expiry time stay together. 849 * the same expiry time stay together.
850 */ 850 */
851 if (timer->expires.tv64 < entry->expires.tv64) { 851 if (hrtimer_get_expires_tv64(timer) <
852 hrtimer_get_expires_tv64(entry)) {
852 link = &(*link)->rb_left; 853 link = &(*link)->rb_left;
853 } else { 854 } else {
854 link = &(*link)->rb_right; 855 link = &(*link)->rb_right;
@@ -945,9 +946,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
945} 946}
946 947
947/** 948/**
948 * hrtimer_start - (re)start an relative timer on the current CPU 949 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
949 * @timer: the timer to be added 950 * @timer: the timer to be added
950 * @tim: expiry time 951 * @tim: expiry time
952 * @delta_ns: "slack" range for the timer
951 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 953 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
952 * 954 *
953 * Returns: 955 * Returns:
@@ -955,7 +957,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
955 * 1 when the timer was active 957 * 1 when the timer was active
956 */ 958 */
957int 959int
958hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) 960hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
961 const enum hrtimer_mode mode)
959{ 962{
960 struct hrtimer_clock_base *base, *new_base; 963 struct hrtimer_clock_base *base, *new_base;
961 unsigned long flags; 964 unsigned long flags;
@@ -983,7 +986,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
983#endif 986#endif
984 } 987 }
985 988
986 timer->expires = tim; 989 hrtimer_set_expires_range_ns(timer, tim, delta_ns);
987 990
988 timer_stats_hrtimer_set_start_info(timer); 991 timer_stats_hrtimer_set_start_info(timer);
989 992
@@ -1016,8 +1019,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1016 1019
1017 return ret; 1020 return ret;
1018} 1021}
1022EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1023
1024/**
1025 * hrtimer_start - (re)start an hrtimer on the current CPU
1026 * @timer: the timer to be added
1027 * @tim: expiry time
1028 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
1029 *
1030 * Returns:
1031 * 0 on success
1032 * 1 when the timer was active
1033 */
1034int
1035hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1036{
1037 return hrtimer_start_range_ns(timer, tim, 0, mode);
1038}
1019EXPORT_SYMBOL_GPL(hrtimer_start); 1039EXPORT_SYMBOL_GPL(hrtimer_start);
1020 1040
1041
1021/** 1042/**
1022 * hrtimer_try_to_cancel - try to deactivate a timer 1043 * hrtimer_try_to_cancel - try to deactivate a timer
1023 * @timer: hrtimer to stop 1044 * @timer: hrtimer to stop
@@ -1077,7 +1098,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
1077 ktime_t rem; 1098 ktime_t rem;
1078 1099
1079 base = lock_hrtimer_base(timer, &flags); 1100 base = lock_hrtimer_base(timer, &flags);
1080 rem = ktime_sub(timer->expires, base->get_time()); 1101 rem = hrtimer_expires_remaining(timer);
1081 unlock_hrtimer_base(timer, &flags); 1102 unlock_hrtimer_base(timer, &flags);
1082 1103
1083 return rem; 1104 return rem;
@@ -1109,7 +1130,7 @@ ktime_t hrtimer_get_next_event(void)
1109 continue; 1130 continue;
1110 1131
1111 timer = rb_entry(base->first, struct hrtimer, node); 1132 timer = rb_entry(base->first, struct hrtimer, node);
1112 delta.tv64 = timer->expires.tv64; 1133 delta.tv64 = hrtimer_get_expires_tv64(timer);
1113 delta = ktime_sub(delta, base->get_time()); 1134 delta = ktime_sub(delta, base->get_time());
1114 if (delta.tv64 < mindelta.tv64) 1135 if (delta.tv64 < mindelta.tv64)
1115 mindelta.tv64 = delta.tv64; 1136 mindelta.tv64 = delta.tv64;
@@ -1310,10 +1331,23 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1310 1331
1311 timer = rb_entry(node, struct hrtimer, node); 1332 timer = rb_entry(node, struct hrtimer, node);
1312 1333
1313 if (basenow.tv64 < timer->expires.tv64) { 1334 /*
1335 * The immediate goal for using the softexpires is
1336 * minimizing wakeups, not running timers at the
1337 * earliest interrupt after their soft expiration.
1338 * This allows us to avoid using a Priority Search
1339 * Tree, which can answer a stabbing querry for
1340 * overlapping intervals and instead use the simple
1341 * BST we already have.
1342 * We don't add extra wakeups by delaying timers that
1343 * are right-of a not yet expired timer, because that
1344 * timer will have to trigger a wakeup anyway.
1345 */
1346
1347 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
1314 ktime_t expires; 1348 ktime_t expires;
1315 1349
1316 expires = ktime_sub(timer->expires, 1350 expires = ktime_sub(hrtimer_get_expires(timer),
1317 base->offset); 1351 base->offset);
1318 if (expires.tv64 < expires_next.tv64) 1352 if (expires.tv64 < expires_next.tv64)
1319 expires_next = expires; 1353 expires_next = expires;
@@ -1349,6 +1383,30 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1349 raise_softirq(HRTIMER_SOFTIRQ); 1383 raise_softirq(HRTIMER_SOFTIRQ);
1350} 1384}
1351 1385
1386/**
1387 * hrtimer_peek_ahead_timers -- run soft-expired timers now
1388 *
1389 * hrtimer_peek_ahead_timers will peek at the timer queue of
1390 * the current cpu and check if there are any timers for which
1391 * the soft expires time has passed. If any such timers exist,
1392 * they are run immediately and then removed from the timer queue.
1393 *
1394 */
1395void hrtimer_peek_ahead_timers(void)
1396{
1397 struct tick_device *td;
1398 unsigned long flags;
1399
1400 if (!hrtimer_hres_active())
1401 return;
1402
1403 local_irq_save(flags);
1404 td = &__get_cpu_var(tick_cpu_device);
1405 if (td && td->evtdev)
1406 hrtimer_interrupt(td->evtdev);
1407 local_irq_restore(flags);
1408}
1409
1352static void run_hrtimer_softirq(struct softirq_action *h) 1410static void run_hrtimer_softirq(struct softirq_action *h)
1353{ 1411{
1354 run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); 1412 run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
@@ -1414,7 +1472,8 @@ void hrtimer_run_queues(void)
1414 struct hrtimer *timer; 1472 struct hrtimer *timer;
1415 1473
1416 timer = rb_entry(node, struct hrtimer, node); 1474 timer = rb_entry(node, struct hrtimer, node);
1417 if (base->softirq_time.tv64 <= timer->expires.tv64) 1475 if (base->softirq_time.tv64 <=
1476 hrtimer_get_expires_tv64(timer))
1418 break; 1477 break;
1419 1478
1420 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { 1479 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
@@ -1462,7 +1521,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
1462 1521
1463 do { 1522 do {
1464 set_current_state(TASK_INTERRUPTIBLE); 1523 set_current_state(TASK_INTERRUPTIBLE);
1465 hrtimer_start(&t->timer, t->timer.expires, mode); 1524 hrtimer_start_expires(&t->timer, mode);
1466 if (!hrtimer_active(&t->timer)) 1525 if (!hrtimer_active(&t->timer))
1467 t->task = NULL; 1526 t->task = NULL;
1468 1527
@@ -1484,7 +1543,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
1484 struct timespec rmt; 1543 struct timespec rmt;
1485 ktime_t rem; 1544 ktime_t rem;
1486 1545
1487 rem = ktime_sub(timer->expires, timer->base->get_time()); 1546 rem = hrtimer_expires_remaining(timer);
1488 if (rem.tv64 <= 0) 1547 if (rem.tv64 <= 0)
1489 return 0; 1548 return 0;
1490 rmt = ktime_to_timespec(rem); 1549 rmt = ktime_to_timespec(rem);
@@ -1503,7 +1562,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1503 1562
1504 hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, 1563 hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
1505 HRTIMER_MODE_ABS); 1564 HRTIMER_MODE_ABS);
1506 t.timer.expires.tv64 = restart->nanosleep.expires; 1565 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1507 1566
1508 if (do_nanosleep(&t, HRTIMER_MODE_ABS)) 1567 if (do_nanosleep(&t, HRTIMER_MODE_ABS))
1509 goto out; 1568 goto out;
@@ -1528,9 +1587,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1528 struct restart_block *restart; 1587 struct restart_block *restart;
1529 struct hrtimer_sleeper t; 1588 struct hrtimer_sleeper t;
1530 int ret = 0; 1589 int ret = 0;
1590 unsigned long slack;
1591
1592 slack = current->timer_slack_ns;
1593 if (rt_task(current))
1594 slack = 0;
1531 1595
1532 hrtimer_init_on_stack(&t.timer, clockid, mode); 1596 hrtimer_init_on_stack(&t.timer, clockid, mode);
1533 t.timer.expires = timespec_to_ktime(*rqtp); 1597 hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
1534 if (do_nanosleep(&t, mode)) 1598 if (do_nanosleep(&t, mode))
1535 goto out; 1599 goto out;
1536 1600
@@ -1550,7 +1614,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1550 restart->fn = hrtimer_nanosleep_restart; 1614 restart->fn = hrtimer_nanosleep_restart;
1551 restart->nanosleep.index = t.timer.base->index; 1615 restart->nanosleep.index = t.timer.base->index;
1552 restart->nanosleep.rmtp = rmtp; 1616 restart->nanosleep.rmtp = rmtp;
1553 restart->nanosleep.expires = t.timer.expires.tv64; 1617 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1554 1618
1555 ret = -ERESTART_RESTARTBLOCK; 1619 ret = -ERESTART_RESTARTBLOCK;
1556out: 1620out:
@@ -1752,3 +1816,103 @@ void __init hrtimers_init(void)
1752#endif 1816#endif
1753} 1817}
1754 1818
1819/**
1820 * schedule_hrtimeout_range - sleep until timeout
1821 * @expires: timeout value (ktime_t)
1822 * @delta: slack in expires timeout (ktime_t)
1823 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1824 *
1825 * Make the current task sleep until the given expiry time has
1826 * elapsed. The routine will return immediately unless
1827 * the current task state has been set (see set_current_state()).
1828 *
1829 * The @delta argument gives the kernel the freedom to schedule the
1830 * actual wakeup to a time that is both power and performance friendly.
1831 * The kernel give the normal best effort behavior for "@expires+@delta",
1832 * but may decide to fire the timer earlier, but no earlier than @expires.
1833 *
1834 * You can set the task state as follows -
1835 *
1836 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1837 * pass before the routine returns.
1838 *
1839 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1840 * delivered to the current task.
1841 *
1842 * The current task state is guaranteed to be TASK_RUNNING when this
1843 * routine returns.
1844 *
1845 * Returns 0 when the timer has expired otherwise -EINTR
1846 */
1847int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1848 const enum hrtimer_mode mode)
1849{
1850 struct hrtimer_sleeper t;
1851
1852 /*
1853 * Optimize when a zero timeout value is given. It does not
1854 * matter whether this is an absolute or a relative time.
1855 */
1856 if (expires && !expires->tv64) {
1857 __set_current_state(TASK_RUNNING);
1858 return 0;
1859 }
1860
1861 /*
1862 * A NULL parameter means "inifinte"
1863 */
1864 if (!expires) {
1865 schedule();
1866 __set_current_state(TASK_RUNNING);
1867 return -EINTR;
1868 }
1869
1870 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
1871 hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1872
1873 hrtimer_init_sleeper(&t, current);
1874
1875 hrtimer_start_expires(&t.timer, mode);
1876 if (!hrtimer_active(&t.timer))
1877 t.task = NULL;
1878
1879 if (likely(t.task))
1880 schedule();
1881
1882 hrtimer_cancel(&t.timer);
1883 destroy_hrtimer_on_stack(&t.timer);
1884
1885 __set_current_state(TASK_RUNNING);
1886
1887 return !t.task ? 0 : -EINTR;
1888}
1889EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
1890
1891/**
1892 * schedule_hrtimeout - sleep until timeout
1893 * @expires: timeout value (ktime_t)
1894 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1895 *
1896 * Make the current task sleep until the given expiry time has
1897 * elapsed. The routine will return immediately unless
1898 * the current task state has been set (see set_current_state()).
1899 *
1900 * You can set the task state as follows -
1901 *
1902 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1903 * pass before the routine returns.
1904 *
1905 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1906 * delivered to the current task.
1907 *
1908 * The current task state is guaranteed to be TASK_RUNNING when this
1909 * routine returns.
1910 *
1911 * Returns 0 when the timer has expired otherwise -EINTR
1912 */
1913int __sched schedule_hrtimeout(ktime_t *expires,
1914 const enum hrtimer_mode mode)
1915{
1916 return schedule_hrtimeout_range(expires, 0, mode);
1917}
1918EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index b931d7cedbfa..5e79c662294b 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -639,7 +639,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
639 (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) 639 (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
640 timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); 640 timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
641 641
642 remaining = ktime_sub(timer->expires, now); 642 remaining = ktime_sub(hrtimer_get_expires(timer), now);
643 /* Return 0 only, when the timer is expired and not pending */ 643 /* Return 0 only, when the timer is expired and not pending */
644 if (remaining.tv64 <= 0) { 644 if (remaining.tv64 <= 0) {
645 /* 645 /*
@@ -733,7 +733,7 @@ common_timer_set(struct k_itimer *timr, int flags,
733 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); 733 hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
734 timr->it.real.timer.function = posix_timer_fn; 734 timr->it.real.timer.function = posix_timer_fn;
735 735
736 timer->expires = timespec_to_ktime(new_setting->it_value); 736 hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
737 737
738 /* Convert interval */ 738 /* Convert interval */
739 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); 739 timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
@@ -742,14 +742,12 @@ common_timer_set(struct k_itimer *timr, int flags,
742 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { 742 if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
743 /* Setup correct expiry time for relative timers */ 743 /* Setup correct expiry time for relative timers */
744 if (mode == HRTIMER_MODE_REL) { 744 if (mode == HRTIMER_MODE_REL) {
745 timer->expires = 745 hrtimer_add_expires(timer, timer->base->get_time());
746 ktime_add_safe(timer->expires,
747 timer->base->get_time());
748 } 746 }
749 return 0; 747 return 0;
750 } 748 }
751 749
752 hrtimer_start(timer, timer->expires, mode); 750 hrtimer_start_expires(timer, mode);
753 return 0; 751 return 0;
754} 752}
755 753
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 6522ae5b14a2..69d9cb921ffa 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -631,8 +631,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
631 631
632 /* Setup the timer, when timeout != NULL */ 632 /* Setup the timer, when timeout != NULL */
633 if (unlikely(timeout)) { 633 if (unlikely(timeout)) {
634 hrtimer_start(&timeout->timer, timeout->timer.expires, 634 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
635 HRTIMER_MODE_ABS);
636 if (!hrtimer_active(&timeout->timer)) 635 if (!hrtimer_active(&timeout->timer))
637 timeout->task = NULL; 636 timeout->task = NULL;
638 } 637 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 945a97b9600d..1645c7211944 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -227,9 +227,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
227 227
228 now = hrtimer_cb_get_time(&rt_b->rt_period_timer); 228 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
229 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); 229 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
230 hrtimer_start(&rt_b->rt_period_timer, 230 hrtimer_start_expires(&rt_b->rt_period_timer,
231 rt_b->rt_period_timer.expires, 231 HRTIMER_MODE_ABS);
232 HRTIMER_MODE_ABS);
233 } 232 }
234 spin_unlock(&rt_b->rt_runtime_lock); 233 spin_unlock(&rt_b->rt_runtime_lock);
235} 234}
@@ -1071,7 +1070,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
1071 struct hrtimer *timer = &rq->hrtick_timer; 1070 struct hrtimer *timer = &rq->hrtick_timer;
1072 ktime_t time = ktime_add_ns(timer->base->get_time(), delay); 1071 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1073 1072
1074 timer->expires = time; 1073 hrtimer_set_expires(timer, time);
1075 1074
1076 if (rq == this_rq()) { 1075 if (rq == this_rq()) {
1077 hrtimer_restart(timer); 1076 hrtimer_restart(timer);
diff --git a/kernel/sys.c b/kernel/sys.c
index 53879cdae483..31deba8f7d16 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1716,6 +1716,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1716 case PR_SET_TSC: 1716 case PR_SET_TSC:
1717 error = SET_TSC_CTL(arg2); 1717 error = SET_TSC_CTL(arg2);
1718 break; 1718 break;
1719 case PR_GET_TIMERSLACK:
1720 error = current->timer_slack_ns;
1721 break;
1722 case PR_SET_TIMERSLACK:
1723 if (arg2 <= 0)
1724 current->timer_slack_ns =
1725 current->default_timer_slack_ns;
1726 else
1727 current->timer_slack_ns = arg2;
1728 break;
1719 default: 1729 default:
1720 error = -EINVAL; 1730 error = -EINVAL;
1721 break; 1731 break;
diff --git a/kernel/time.c b/kernel/time.c
index 6a08660b4fac..d63a4336fad6 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64);
669#endif 669#endif
670 670
671EXPORT_SYMBOL(jiffies); 671EXPORT_SYMBOL(jiffies);
672
673/*
674 * Add two timespec values and do a safety check for overflow.
675 * It's assumed that both values are valid (>= 0)
676 */
677struct timespec timespec_add_safe(const struct timespec lhs,
678 const struct timespec rhs)
679{
680 struct timespec res;
681
682 set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
683 lhs.tv_nsec + rhs.tv_nsec);
684
685 if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
686 res.tv_sec = TIME_T_MAX;
687
688 return res;
689}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 1a20715bfd6e..8ff15e5d486b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -142,8 +142,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
142 time_state = TIME_OOP; 142 time_state = TIME_OOP;
143 printk(KERN_NOTICE "Clock: " 143 printk(KERN_NOTICE "Clock: "
144 "inserting leap second 23:59:60 UTC\n"); 144 "inserting leap second 23:59:60 UTC\n");
145 leap_timer.expires = ktime_add_ns(leap_timer.expires, 145 hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
146 NSEC_PER_SEC);
147 res = HRTIMER_RESTART; 146 res = HRTIMER_RESTART;
148 break; 147 break;
149 case TIME_DEL: 148 case TIME_DEL:
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 727c1ae0517a..5bbb1044f847 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -300,7 +300,7 @@ void tick_nohz_stop_sched_tick(int inidle)
300 goto out; 300 goto out;
301 } 301 }
302 302
303 ts->idle_tick = ts->sched_timer.expires; 303 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
304 ts->tick_stopped = 1; 304 ts->tick_stopped = 1;
305 ts->idle_jiffies = last_jiffies; 305 ts->idle_jiffies = last_jiffies;
306 rcu_enter_nohz(); 306 rcu_enter_nohz();
@@ -380,21 +380,21 @@ ktime_t tick_nohz_get_sleep_length(void)
380static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 380static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
381{ 381{
382 hrtimer_cancel(&ts->sched_timer); 382 hrtimer_cancel(&ts->sched_timer);
383 ts->sched_timer.expires = ts->idle_tick; 383 hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
384 384
385 while (1) { 385 while (1) {
386 /* Forward the time to expire in the future */ 386 /* Forward the time to expire in the future */
387 hrtimer_forward(&ts->sched_timer, now, tick_period); 387 hrtimer_forward(&ts->sched_timer, now, tick_period);
388 388
389 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 389 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
390 hrtimer_start(&ts->sched_timer, 390 hrtimer_start_expires(&ts->sched_timer,
391 ts->sched_timer.expires,
392 HRTIMER_MODE_ABS); 391 HRTIMER_MODE_ABS);
393 /* Check, if the timer was already in the past */ 392 /* Check, if the timer was already in the past */
394 if (hrtimer_active(&ts->sched_timer)) 393 if (hrtimer_active(&ts->sched_timer))
395 break; 394 break;
396 } else { 395 } else {
397 if (!tick_program_event(ts->sched_timer.expires, 0)) 396 if (!tick_program_event(
397 hrtimer_get_expires(&ts->sched_timer), 0))
398 break; 398 break;
399 } 399 }
400 /* Update jiffies and reread time */ 400 /* Update jiffies and reread time */
@@ -456,14 +456,16 @@ void tick_nohz_restart_sched_tick(void)
456 */ 456 */
457 ts->tick_stopped = 0; 457 ts->tick_stopped = 0;
458 ts->idle_exittime = now; 458 ts->idle_exittime = now;
459
459 tick_nohz_restart(ts, now); 460 tick_nohz_restart(ts, now);
461
460 local_irq_enable(); 462 local_irq_enable();
461} 463}
462 464
463static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) 465static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
464{ 466{
465 hrtimer_forward(&ts->sched_timer, now, tick_period); 467 hrtimer_forward(&ts->sched_timer, now, tick_period);
466 return tick_program_event(ts->sched_timer.expires, 0); 468 return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
467} 469}
468 470
469/* 471/*
@@ -542,7 +544,7 @@ static void tick_nohz_switch_to_nohz(void)
542 next = tick_init_jiffy_update(); 544 next = tick_init_jiffy_update();
543 545
544 for (;;) { 546 for (;;) {
545 ts->sched_timer.expires = next; 547 hrtimer_set_expires(&ts->sched_timer, next);
546 if (!tick_program_event(next, 0)) 548 if (!tick_program_event(next, 0))
547 break; 549 break;
548 next = ktime_add(next, tick_period); 550 next = ktime_add(next, tick_period);
@@ -577,7 +579,7 @@ static void tick_nohz_kick_tick(int cpu)
577 * already reached or less/equal than the tick period. 579 * already reached or less/equal than the tick period.
578 */ 580 */
579 now = ktime_get(); 581 now = ktime_get();
580 delta = ktime_sub(ts->sched_timer.expires, now); 582 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
581 if (delta.tv64 <= tick_period.tv64) 583 if (delta.tv64 <= tick_period.tv64)
582 return; 584 return;
583 585
@@ -678,16 +680,15 @@ void tick_setup_sched_timer(void)
678 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; 680 ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
679 681
680 /* Get the next period (per cpu) */ 682 /* Get the next period (per cpu) */
681 ts->sched_timer.expires = tick_init_jiffy_update(); 683 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
682 offset = ktime_to_ns(tick_period) >> 1; 684 offset = ktime_to_ns(tick_period) >> 1;
683 do_div(offset, num_possible_cpus()); 685 do_div(offset, num_possible_cpus());
684 offset *= smp_processor_id(); 686 offset *= smp_processor_id();
685 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); 687 hrtimer_add_expires_ns(&ts->sched_timer, offset);
686 688
687 for (;;) { 689 for (;;) {
688 hrtimer_forward(&ts->sched_timer, now, tick_period); 690 hrtimer_forward(&ts->sched_timer, now, tick_period);
689 hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, 691 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
690 HRTIMER_MODE_ABS);
691 /* Check, if the timer was already in the past */ 692 /* Check, if the timer was already in the past */
692 if (hrtimer_active(&ts->sched_timer)) 693 if (hrtimer_active(&ts->sched_timer))
693 break; 694 break;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index f6426911e35a..a999b92a1277 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -66,9 +66,11 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
66 SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); 66 SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
67#endif 67#endif
68 SEQ_printf(m, "\n"); 68 SEQ_printf(m, "\n");
69 SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", 69 SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
70 (unsigned long long)ktime_to_ns(timer->expires), 70 (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
71 (long long)(ktime_to_ns(timer->expires) - now)); 71 (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)),
72 (long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now),
73 (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now));
72} 74}
73 75
74static void 76static void
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 8b06fa900482..03e389e8d945 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -545,9 +545,10 @@ static void cbq_ovl_delay(struct cbq_class *cl)
545 expires = ktime_set(0, 0); 545 expires = ktime_set(0, 0);
546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched)); 546 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
547 if (hrtimer_try_to_cancel(&q->delay_timer) && 547 if (hrtimer_try_to_cancel(&q->delay_timer) &&
548 ktime_to_ns(ktime_sub(q->delay_timer.expires, 548 ktime_to_ns(ktime_sub(
549 expires)) > 0) 549 hrtimer_get_expires(&q->delay_timer),
550 q->delay_timer.expires = expires; 550 expires)) > 0)
551 hrtimer_set_expires(&q->delay_timer, expires);
551 hrtimer_restart(&q->delay_timer); 552 hrtimer_restart(&q->delay_timer);
552 cl->delayed = 1; 553 cl->delayed = 1;
553 cl->xstats.overactions++; 554 cl->xstats.overactions++;
diff --git a/sound/drivers/pcsp/pcsp_lib.c b/sound/drivers/pcsp/pcsp_lib.c
index e341f3f83b6a..1f42e4063118 100644
--- a/sound/drivers/pcsp/pcsp_lib.c
+++ b/sound/drivers/pcsp/pcsp_lib.c
@@ -34,7 +34,7 @@ enum hrtimer_restart pcsp_do_timer(struct hrtimer *handle)
34 chip->thalf = 0; 34 chip->thalf = 0;
35 if (!atomic_read(&chip->timer_active)) 35 if (!atomic_read(&chip->timer_active))
36 return HRTIMER_NORESTART; 36 return HRTIMER_NORESTART;
37 hrtimer_forward(&chip->timer, chip->timer.expires, 37 hrtimer_forward(&chip->timer, hrtimer_get_expires(&chip->timer),
38 ktime_set(0, chip->ns_rem)); 38 ktime_set(0, chip->ns_rem));
39 return HRTIMER_RESTART; 39 return HRTIMER_RESTART;
40 } 40 }
@@ -118,7 +118,8 @@ enum hrtimer_restart pcsp_do_timer(struct hrtimer *handle)
118 chip->ns_rem = PCSP_PERIOD_NS(); 118 chip->ns_rem = PCSP_PERIOD_NS();
119 ns = (chip->thalf ? PCSP_CALC_NS(timer_cnt) : chip->ns_rem); 119 ns = (chip->thalf ? PCSP_CALC_NS(timer_cnt) : chip->ns_rem);
120 chip->ns_rem -= ns; 120 chip->ns_rem -= ns;
121 hrtimer_forward(&chip->timer, chip->timer.expires, ktime_set(0, ns)); 121 hrtimer_forward(&chip->timer, hrtimer_get_expires(&chip->timer),
122 ktime_set(0, ns));
122 return HRTIMER_RESTART; 123 return HRTIMER_RESTART;
123 124
124exit_nr_unlock2: 125exit_nr_unlock2: