aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-06 16:12:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-06 16:12:36 -0400
commitaf390084359a5de20046c901529b2b6a50b941cb (patch)
treeb73a6261d1b1f9fb34432cc9a47411a49330b8dc
parent7645e4320497b35ce9fb6c2269ebcd57af9fe735 (diff)
parent0fcb80818bc3ade5befd409051089f710adcf7b0 (diff)
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: Documentation: Add timers/timers-howto.txt timer: Added usleep_range timer Revert "timer: Added usleep[_range] timer" clockevents: Remove the per cpu tick skew posix_timer: Move copy_to_user(created_timer_id) down in timer_create() timer: Added usleep[_range] timer timers: Document meaning of deferrable timer
-rw-r--r--Documentation/timers/timers-howto.txt105
-rw-r--r--include/linux/delay.h1
-rw-r--r--kernel/posix-timers.c11
-rw-r--r--kernel/time/tick-sched.c5
-rw-r--r--kernel/timer.c31
5 files changed, 141 insertions, 12 deletions
diff --git a/Documentation/timers/timers-howto.txt b/Documentation/timers/timers-howto.txt
new file mode 100644
index 000000000000..c9ef29d2ede3
--- /dev/null
+++ b/Documentation/timers/timers-howto.txt
@@ -0,0 +1,105 @@
1delays - Information on the various kernel delay / sleep mechanisms
2-------------------------------------------------------------------
3
4This document seeks to answer the common question: "What is the
5RightWay (TM) to insert a delay?"
6
7This question is most often faced by driver writers who have to
8deal with hardware delays and who may not be the most intimately
9familiar with the inner workings of the Linux Kernel.
10
11
12Inserting Delays
13----------------
14
15The first, and most important, question you need to ask is "Is my
16code in an atomic context?" This should be followed closely by "Does
17it really need to delay in atomic context?" If so...
18
19ATOMIC CONTEXT:
20 You must use the *delay family of functions. These
21 functions use the jiffie estimation of clock speed
22 and will busy wait for enough loop cycles to achieve
23 the desired delay:
24
25 ndelay(unsigned long nsecs)
26 udelay(unsigned long usecs)
27 mdelay(unsgined long msecs)
28
29 udelay is the generally preferred API; ndelay-level
30 precision may not actually exist on many non-PC devices.
31
32 mdelay is macro wrapper around udelay, to account for
33 possible overflow when passing large arguments to udelay.
34 In general, use of mdelay is discouraged and code should
35 be refactored to allow for the use of msleep.
36
37NON-ATOMIC CONTEXT:
38 You should use the *sleep[_range] family of functions.
39 There are a few more options here, while any of them may
40 work correctly, using the "right" sleep function will
41 help the scheduler, power management, and just make your
42 driver better :)
43
44 -- Backed by busy-wait loop:
45 udelay(unsigned long usecs)
46 -- Backed by hrtimers:
47 usleep_range(unsigned long min, unsigned long max)
48 -- Backed by jiffies / legacy_timers
49 msleep(unsigned long msecs)
50 msleep_interruptible(unsigned long msecs)
51
52 Unlike the *delay family, the underlying mechanism
53 driving each of these calls varies, thus there are
54 quirks you should be aware of.
55
56
57 SLEEPING FOR "A FEW" USECS ( < ~10us? ):
58 * Use udelay
59
60 - Why not usleep?
61 On slower systems, (embedded, OR perhaps a speed-
62 stepped PC!) the overhead of setting up the hrtimers
63 for usleep *may* not be worth it. Such an evaluation
64 will obviously depend on your specific situation, but
65 it is something to be aware of.
66
67 SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms):
68 * Use usleep_range
69
70 - Why not msleep for (1ms - 20ms)?
71 Explained originally here:
72 http://lkml.org/lkml/2007/8/3/250
73 msleep(1~20) may not do what the caller intends, and
74 will often sleep longer (~20 ms actual sleep for any
75 value given in the 1~20ms range). In many cases this
76 is not the desired behavior.
77
78 - Why is there no "usleep" / What is a good range?
79 Since usleep_range is built on top of hrtimers, the
80 wakeup will be very precise (ish), thus a simple
81 usleep function would likely introduce a large number
82 of undesired interrupts.
83
84 With the introduction of a range, the scheduler is
85 free to coalesce your wakeup with any other wakeup
86 that may have happened for other reasons, or at the
87 worst case, fire an interrupt for your upper bound.
88
89 The larger a range you supply, the greater a chance
90 that you will not trigger an interrupt; this should
91 be balanced with what is an acceptable upper bound on
92 delay / performance for your specific code path. Exact
93 tolerances here are very situation specific, thus it
94 is left to the caller to determine a reasonable range.
95
96 SLEEPING FOR LARGER MSECS ( 10ms+ )
97 * Use msleep or possibly msleep_interruptible
98
99 - What's the difference?
100 msleep sets the current task to TASK_UNINTERRUPTIBLE
101 whereas msleep_interruptible sets the current task to
102 TASK_INTERRUPTIBLE before scheduling the sleep. In
103 short, the difference is whether the sleep can be ended
104 early by a signal. In general, just use msleep unless
105 you know you have a need for the interruptible variant.
diff --git a/include/linux/delay.h b/include/linux/delay.h
index fd832c6d419e..a6ecb34cf547 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -45,6 +45,7 @@ extern unsigned long lpj_fine;
45void calibrate_delay(void); 45void calibrate_delay(void);
46void msleep(unsigned int msecs); 46void msleep(unsigned int msecs);
47unsigned long msleep_interruptible(unsigned int msecs); 47unsigned long msleep_interruptible(unsigned int msecs);
48void usleep_range(unsigned long min, unsigned long max);
48 49
49static inline void ssleep(unsigned int seconds) 50static inline void ssleep(unsigned int seconds)
50{ 51{
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index ad723420acc3..9ca4973f736d 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -560,11 +560,6 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
560 new_timer->it_clock = which_clock; 560 new_timer->it_clock = which_clock;
561 new_timer->it_overrun = -1; 561 new_timer->it_overrun = -1;
562 562
563 if (copy_to_user(created_timer_id,
564 &new_timer_id, sizeof (new_timer_id))) {
565 error = -EFAULT;
566 goto out;
567 }
568 if (timer_event_spec) { 563 if (timer_event_spec) {
569 if (copy_from_user(&event, timer_event_spec, sizeof (event))) { 564 if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
570 error = -EFAULT; 565 error = -EFAULT;
@@ -590,6 +585,12 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
590 new_timer->sigq->info.si_tid = new_timer->it_id; 585 new_timer->sigq->info.si_tid = new_timer->it_id;
591 new_timer->sigq->info.si_code = SI_TIMER; 586 new_timer->sigq->info.si_code = SI_TIMER;
592 587
588 if (copy_to_user(created_timer_id,
589 &new_timer_id, sizeof (new_timer_id))) {
590 error = -EFAULT;
591 goto out;
592 }
593
593 error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); 594 error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
594 if (error) 595 if (error)
595 goto out; 596 goto out;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 021d2f878f19..3e216e01bbd1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -774,7 +774,6 @@ void tick_setup_sched_timer(void)
774{ 774{
775 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 775 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
776 ktime_t now = ktime_get(); 776 ktime_t now = ktime_get();
777 u64 offset;
778 777
779 /* 778 /*
780 * Emulate tick processing via per-CPU hrtimers: 779 * Emulate tick processing via per-CPU hrtimers:
@@ -784,10 +783,6 @@ void tick_setup_sched_timer(void)
784 783
785 /* Get the next period (per cpu) */ 784 /* Get the next period (per cpu) */
786 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 785 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
787 offset = ktime_to_ns(tick_period) >> 1;
788 do_div(offset, num_possible_cpus());
789 offset *= smp_processor_id();
790 hrtimer_add_expires_ns(&ts->sched_timer, offset);
791 786
792 for (;;) { 787 for (;;) {
793 hrtimer_forward(&ts->sched_timer, now, tick_period); 788 hrtimer_forward(&ts->sched_timer, now, tick_period);
diff --git a/kernel/timer.c b/kernel/timer.c
index d61d16da0b64..f1b8afe1ad86 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -90,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
90 90
91/* 91/*
92 * Note that all tvec_bases are 2 byte aligned and lower bit of 92 * Note that all tvec_bases are 2 byte aligned and lower bit of
93 * base in timer_list is guaranteed to be zero. Use the LSB for 93 * base in timer_list is guaranteed to be zero. Use the LSB to
94 * the new flag to indicate whether the timer is deferrable 94 * indicate whether the timer is deferrable.
95 *
96 * A deferrable timer will work normally when the system is busy, but
97 * will not cause a CPU to come out of idle just to service it; instead,
98 * the timer will be serviced when the CPU eventually wakes up with a
99 * subsequent non-deferrable timer.
95 */ 100 */
96#define TBASE_DEFERRABLE_FLAG (0x1) 101#define TBASE_DEFERRABLE_FLAG (0x1)
97 102
@@ -1758,3 +1763,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
1758} 1763}
1759 1764
1760EXPORT_SYMBOL(msleep_interruptible); 1765EXPORT_SYMBOL(msleep_interruptible);
1766
1767static int __sched do_usleep_range(unsigned long min, unsigned long max)
1768{
1769 ktime_t kmin;
1770 unsigned long delta;
1771
1772 kmin = ktime_set(0, min * NSEC_PER_USEC);
1773 delta = (max - min) * NSEC_PER_USEC;
1774 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1775}
1776
1777/**
1778 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1779 * @min: Minimum time in usecs to sleep
1780 * @max: Maximum time in usecs to sleep
1781 */
1782void usleep_range(unsigned long min, unsigned long max)
1783{
1784 __set_current_state(TASK_UNINTERRUPTIBLE);
1785 do_usleep_range(min, max);
1786}
1787EXPORT_SYMBOL(usleep_range);