From 6c5e9059692567740a4ee51530dffe51a4b9584d Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 21 Oct 2016 08:58:50 -0700 Subject: timers: Fix usleep_range() in the context of wake_up_process() Users of usleep_range() expect that it will _never_ return in less time than the minimum passed parameter. However, nothing in the code ensures this, when the sleeping task is woken by wake_up_process() or any other mechanism which can wake a task from uninterruptible state. Neither usleep_range() nor schedule_hrtimeout_range*() have any protection against wakeups. schedule_hrtimeout_range*() is designed this way despite the fact that the API documentation does not mention it. msleep() already has code to handle this case since it will loop as long as there was still time left. usleep_range() has no such loop, add it. Presumably this problem was not detected before because usleep_range() is only used in a few places and the function is mostly used in contexts which are not exposed to wakeups of any form. An effort was made to look for users relying on the old behavior by looking for usleep_range() in the same file as wake_up_process(). No problems were found by this search, though it is conceivable that someone could have put the sleep and wakeup in two different files. An effort was made to ask several upstream maintainers if they were aware of people relying on wake_up_process() to wake up usleep_range(). No maintainers were aware of that but they were aware of many people relying on usleep_range() never returning before the minimum. Reported-by: Tao Huang Signed-off-by: Douglas Anderson Cc: heiko@sntech.de Cc: broonie@kernel.org Cc: briannorris@chromium.org Cc: Andreas Mohr Cc: linux-rockchip@lists.infradead.org Cc: tony.xie@rock-chips.com Cc: John Stultz Cc: djkurtz@chromium.org Cc: linux@roeck-us.net Cc: tskd08@gmail.com Link: http://lkml.kernel.org/r/1477065531-30342-1-git-send-email-dianders@chromium.org Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'kernel/time/timer.c') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d47980a1bc4..12681c9a7683 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1896,16 +1896,6 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); -static void __sched do_usleep_range(unsigned long min, unsigned long max) -{ - ktime_t kmin; - u64 delta; - - kmin = ktime_set(0, min * NSEC_PER_USEC); - delta = (u64)(max - min) * NSEC_PER_USEC; - schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); -} - /** * usleep_range - Sleep for an approximate time * @min: Minimum time in usecs to sleep @@ -1919,7 +1909,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max) */ void __sched usleep_range(unsigned long min, unsigned long max) { - __set_current_state(TASK_UNINTERRUPTIBLE); - do_usleep_range(min, max); + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(TASK_UNINTERRUPTIBLE); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } } EXPORT_SYMBOL(usleep_range); -- cgit v1.2.2 From 4b7e9cf9c84b09adc428e0433cd376b91f9c52a7 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 21 Oct 2016 08:58:51 -0700 Subject: timers: Fix documentation for schedule_timeout() and similar The documentation for schedule_timeout(), schedule_hrtimeout(), and schedule_hrtimeout_range() all claim that the routines couldn't possibly return early if the task state was TASK_UNINTERRUPTIBLE. This is simply not true since wake_up_process() will cause those routines to exit early. We cannot make schedule_[hr]timeout() loop until the timeout expires if the task state is uninterruptible because we have users which rely on the existing and designed behaviour. Make the documentation match the (correct) implementation. schedule_hrtimeout() returns -EINTR even when a uninterruptible task was woken up. This might look strange, but making the return code depend on the state is too much of an effort as it would affect all the call sites. There is no value in doing so, but we spell it out clearly in the documentation. Suggested-by: Daniel Kurtz Signed-off-by: Douglas Anderson Cc: huangtao@rock-chips.com Cc: heiko@sntech.de Cc: broonie@kernel.org Cc: briannorris@chromium.org Cc: Andreas Mohr Cc: linux-rockchip@lists.infradead.org Cc: tony.xie@rock-chips.com Cc: John Stultz Cc: linux@roeck-us.net Cc: tskd08@gmail.com Link: http://lkml.kernel.org/r/1477065531-30342-2-git-send-email-dianders@chromium.org Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel/time/timer.c') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 12681c9a7683..88aab86a4594 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1691,11 +1691,12 @@ static void process_timeout(unsigned long __data) * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to - * pass before the routine returns. The routine will return 0 + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process())". * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. In this case the remaining time - * in jiffies will be returned, or 0 if the timer expired in time + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. @@ -1704,7 +1705,9 @@ static void process_timeout(unsigned long __data) * the CPU away without a bound on the timeout. In this case the return * value will be %MAX_SCHEDULE_TIMEOUT. * - * In all cases the return value is guaranteed to be non-negative. + * Returns 0 when the timer has expired otherwise the remaining time in + * jiffies will be returned. In all cases the return value is guaranteed + * to be non-negative. */ signed long __sched schedule_timeout(signed long timeout) { -- cgit v1.2.2 From 74ba181e61c6accf9066d6980f44588de2f854f6 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 11 Nov 2016 00:10:08 -0500 Subject: timer: Move sys_alarm from timer.c to itimer.c Move the only user of alarm_setitimer to itimer.c where it is defined. This allows for making alarm_setitimer static, and dropping it from the build when __ARCH_WANT_SYS_ALARM is not defined. Signed-off-by: Nicolas Pitre Acked-by: John Stultz Cc: Paul Bolle Cc: linux-kbuild@vger.kernel.org Cc: netdev@vger.kernel.org Cc: Richard Cochran Cc: Josh Triplett Cc: Michal Marek Cc: Edward Cree Link: http://lkml.kernel.org/r/1478841010-28605-5-git-send-email-nicolas.pitre@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'kernel/time/timer.c') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 88aab86a4594..42d27aa242b9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1662,19 +1662,6 @@ void run_local_timers(void) raise_softirq(TIMER_SOFTIRQ); } -#ifdef __ARCH_WANT_SYS_ALARM - -/* - * For backwards compatibility? This can be done in libc so Alpha - * and all newer ports shouldn't need it. - */ -SYSCALL_DEFINE1(alarm, unsigned int, seconds) -{ - return alarm_setitimer(seconds); -} - -#endif - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); -- cgit v1.2.2 From baa73d9e478ff32d62f3f9422822b59dd9a95a21 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 11 Nov 2016 00:10:10 -0500 Subject: posix-timers: Make them configurable Some embedded systems have no use for them. This removes about 25KB from the kernel binary size when configured out. Corresponding syscalls are routed to a stub logging the attempt to use those syscalls which should be enough of a clue if they were disabled without proper consideration. They are: timer_create, timer_gettime: timer_getoverrun, timer_settime, timer_delete, clock_adjtime, setitimer, getitimer, alarm. The clock_settime, clock_gettime, clock_getres and clock_nanosleep syscalls are replaced by simple wrappers compatible with CLOCK_REALTIME, CLOCK_MONOTONIC and CLOCK_BOOTTIME only which should cover the vast majority of use cases with very little code. Signed-off-by: Nicolas Pitre Acked-by: Richard Cochran Acked-by: Thomas Gleixner Acked-by: John Stultz Reviewed-by: Josh Triplett Cc: Paul Bolle Cc: linux-kbuild@vger.kernel.org Cc: netdev@vger.kernel.org Cc: Michal Marek Cc: Edward Cree Link: http://lkml.kernel.org/r/1478841010-28605-7-git-send-email-nicolas.pitre@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/time/timer.c') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 42d27aa242b9..e2892e454fe3 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1601,7 +1601,8 @@ void update_process_times(int user_tick) irq_work_tick(); #endif scheduler_tick(); - run_posix_cpu_timers(p); + if (IS_ENABLED(CONFIG_POSIX_TIMERS)) + run_posix_cpu_timers(p); } /** -- cgit v1.2.2