diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-06 16:12:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-06 16:12:36 -0400 |
commit | af390084359a5de20046c901529b2b6a50b941cb (patch) | |
tree | b73a6261d1b1f9fb34432cc9a47411a49330b8dc | |
parent | 7645e4320497b35ce9fb6c2269ebcd57af9fe735 (diff) | |
parent | 0fcb80818bc3ade5befd409051089f710adcf7b0 (diff) |
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
Documentation: Add timers/timers-howto.txt
timer: Added usleep_range timer
Revert "timer: Added usleep[_range] timer"
clockevents: Remove the per cpu tick skew
posix_timer: Move copy_to_user(created_timer_id) down in timer_create()
timer: Added usleep[_range] timer
timers: Document meaning of deferrable timer
-rw-r--r-- | Documentation/timers/timers-howto.txt | 105 | ||||
-rw-r--r-- | include/linux/delay.h | 1 | ||||
-rw-r--r-- | kernel/posix-timers.c | 11 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 5 | ||||
-rw-r--r-- | kernel/timer.c | 31 |
5 files changed, 141 insertions, 12 deletions
diff --git a/Documentation/timers/timers-howto.txt b/Documentation/timers/timers-howto.txt new file mode 100644 index 000000000000..c9ef29d2ede3 --- /dev/null +++ b/Documentation/timers/timers-howto.txt | |||
@@ -0,0 +1,105 @@ | |||
1 | delays - Information on the various kernel delay / sleep mechanisms | ||
2 | ------------------------------------------------------------------- | ||
3 | |||
4 | This document seeks to answer the common question: "What is the | ||
5 | RightWay (TM) to insert a delay?" | ||
6 | |||
7 | This question is most often faced by driver writers who have to | ||
8 | deal with hardware delays and who may not be the most intimately | ||
9 | familiar with the inner workings of the Linux Kernel. | ||
10 | |||
11 | |||
12 | Inserting Delays | ||
13 | ---------------- | ||
14 | |||
15 | The first, and most important, question you need to ask is "Is my | ||
16 | code in an atomic context?" This should be followed closely by "Does | ||
17 | it really need to delay in atomic context?" If so... | ||
18 | |||
19 | ATOMIC CONTEXT: | ||
20 | You must use the *delay family of functions. These | ||
21 | functions use the jiffie estimation of clock speed | ||
22 | and will busy wait for enough loop cycles to achieve | ||
23 | the desired delay: | ||
24 | |||
25 | ndelay(unsigned long nsecs) | ||
26 | udelay(unsigned long usecs) | ||
27 | mdelay(unsgined long msecs) | ||
28 | |||
29 | udelay is the generally preferred API; ndelay-level | ||
30 | precision may not actually exist on many non-PC devices. | ||
31 | |||
32 | mdelay is macro wrapper around udelay, to account for | ||
33 | possible overflow when passing large arguments to udelay. | ||
34 | In general, use of mdelay is discouraged and code should | ||
35 | be refactored to allow for the use of msleep. | ||
36 | |||
37 | NON-ATOMIC CONTEXT: | ||
38 | You should use the *sleep[_range] family of functions. | ||
39 | There are a few more options here, while any of them may | ||
40 | work correctly, using the "right" sleep function will | ||
41 | help the scheduler, power management, and just make your | ||
42 | driver better :) | ||
43 | |||
44 | -- Backed by busy-wait loop: | ||
45 | udelay(unsigned long usecs) | ||
46 | -- Backed by hrtimers: | ||
47 | usleep_range(unsigned long min, unsigned long max) | ||
48 | -- Backed by jiffies / legacy_timers | ||
49 | msleep(unsigned long msecs) | ||
50 | msleep_interruptible(unsigned long msecs) | ||
51 | |||
52 | Unlike the *delay family, the underlying mechanism | ||
53 | driving each of these calls varies, thus there are | ||
54 | quirks you should be aware of. | ||
55 | |||
56 | |||
57 | SLEEPING FOR "A FEW" USECS ( < ~10us? ): | ||
58 | * Use udelay | ||
59 | |||
60 | - Why not usleep? | ||
61 | On slower systems, (embedded, OR perhaps a speed- | ||
62 | stepped PC!) the overhead of setting up the hrtimers | ||
63 | for usleep *may* not be worth it. Such an evaluation | ||
64 | will obviously depend on your specific situation, but | ||
65 | it is something to be aware of. | ||
66 | |||
67 | SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms): | ||
68 | * Use usleep_range | ||
69 | |||
70 | - Why not msleep for (1ms - 20ms)? | ||
71 | Explained originally here: | ||
72 | http://lkml.org/lkml/2007/8/3/250 | ||
73 | msleep(1~20) may not do what the caller intends, and | ||
74 | will often sleep longer (~20 ms actual sleep for any | ||
75 | value given in the 1~20ms range). In many cases this | ||
76 | is not the desired behavior. | ||
77 | |||
78 | - Why is there no "usleep" / What is a good range? | ||
79 | Since usleep_range is built on top of hrtimers, the | ||
80 | wakeup will be very precise (ish), thus a simple | ||
81 | usleep function would likely introduce a large number | ||
82 | of undesired interrupts. | ||
83 | |||
84 | With the introduction of a range, the scheduler is | ||
85 | free to coalesce your wakeup with any other wakeup | ||
86 | that may have happened for other reasons, or at the | ||
87 | worst case, fire an interrupt for your upper bound. | ||
88 | |||
89 | The larger a range you supply, the greater a chance | ||
90 | that you will not trigger an interrupt; this should | ||
91 | be balanced with what is an acceptable upper bound on | ||
92 | delay / performance for your specific code path. Exact | ||
93 | tolerances here are very situation specific, thus it | ||
94 | is left to the caller to determine a reasonable range. | ||
95 | |||
96 | SLEEPING FOR LARGER MSECS ( 10ms+ ) | ||
97 | * Use msleep or possibly msleep_interruptible | ||
98 | |||
99 | - What's the difference? | ||
100 | msleep sets the current task to TASK_UNINTERRUPTIBLE | ||
101 | whereas msleep_interruptible sets the current task to | ||
102 | TASK_INTERRUPTIBLE before scheduling the sleep. In | ||
103 | short, the difference is whether the sleep can be ended | ||
104 | early by a signal. In general, just use msleep unless | ||
105 | you know you have a need for the interruptible variant. | ||
diff --git a/include/linux/delay.h b/include/linux/delay.h index fd832c6d419e..a6ecb34cf547 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h | |||
@@ -45,6 +45,7 @@ extern unsigned long lpj_fine; | |||
45 | void calibrate_delay(void); | 45 | void calibrate_delay(void); |
46 | void msleep(unsigned int msecs); | 46 | void msleep(unsigned int msecs); |
47 | unsigned long msleep_interruptible(unsigned int msecs); | 47 | unsigned long msleep_interruptible(unsigned int msecs); |
48 | void usleep_range(unsigned long min, unsigned long max); | ||
48 | 49 | ||
49 | static inline void ssleep(unsigned int seconds) | 50 | static inline void ssleep(unsigned int seconds) |
50 | { | 51 | { |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ad723420acc3..9ca4973f736d 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -560,11 +560,6 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, | |||
560 | new_timer->it_clock = which_clock; | 560 | new_timer->it_clock = which_clock; |
561 | new_timer->it_overrun = -1; | 561 | new_timer->it_overrun = -1; |
562 | 562 | ||
563 | if (copy_to_user(created_timer_id, | ||
564 | &new_timer_id, sizeof (new_timer_id))) { | ||
565 | error = -EFAULT; | ||
566 | goto out; | ||
567 | } | ||
568 | if (timer_event_spec) { | 563 | if (timer_event_spec) { |
569 | if (copy_from_user(&event, timer_event_spec, sizeof (event))) { | 564 | if (copy_from_user(&event, timer_event_spec, sizeof (event))) { |
570 | error = -EFAULT; | 565 | error = -EFAULT; |
@@ -590,6 +585,12 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, | |||
590 | new_timer->sigq->info.si_tid = new_timer->it_id; | 585 | new_timer->sigq->info.si_tid = new_timer->it_id; |
591 | new_timer->sigq->info.si_code = SI_TIMER; | 586 | new_timer->sigq->info.si_code = SI_TIMER; |
592 | 587 | ||
588 | if (copy_to_user(created_timer_id, | ||
589 | &new_timer_id, sizeof (new_timer_id))) { | ||
590 | error = -EFAULT; | ||
591 | goto out; | ||
592 | } | ||
593 | |||
593 | error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); | 594 | error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); |
594 | if (error) | 595 | if (error) |
595 | goto out; | 596 | goto out; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 021d2f878f19..3e216e01bbd1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -774,7 +774,6 @@ void tick_setup_sched_timer(void) | |||
774 | { | 774 | { |
775 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 775 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
776 | ktime_t now = ktime_get(); | 776 | ktime_t now = ktime_get(); |
777 | u64 offset; | ||
778 | 777 | ||
779 | /* | 778 | /* |
780 | * Emulate tick processing via per-CPU hrtimers: | 779 | * Emulate tick processing via per-CPU hrtimers: |
@@ -784,10 +783,6 @@ void tick_setup_sched_timer(void) | |||
784 | 783 | ||
785 | /* Get the next period (per cpu) */ | 784 | /* Get the next period (per cpu) */ |
786 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 785 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
787 | offset = ktime_to_ns(tick_period) >> 1; | ||
788 | do_div(offset, num_possible_cpus()); | ||
789 | offset *= smp_processor_id(); | ||
790 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | ||
791 | 786 | ||
792 | for (;;) { | 787 | for (;;) { |
793 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 788 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
diff --git a/kernel/timer.c b/kernel/timer.c index d61d16da0b64..f1b8afe1ad86 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -90,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; | |||
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Note that all tvec_bases are 2 byte aligned and lower bit of | 92 | * Note that all tvec_bases are 2 byte aligned and lower bit of |
93 | * base in timer_list is guaranteed to be zero. Use the LSB for | 93 | * base in timer_list is guaranteed to be zero. Use the LSB to |
94 | * the new flag to indicate whether the timer is deferrable | 94 | * indicate whether the timer is deferrable. |
95 | * | ||
96 | * A deferrable timer will work normally when the system is busy, but | ||
97 | * will not cause a CPU to come out of idle just to service it; instead, | ||
98 | * the timer will be serviced when the CPU eventually wakes up with a | ||
99 | * subsequent non-deferrable timer. | ||
95 | */ | 100 | */ |
96 | #define TBASE_DEFERRABLE_FLAG (0x1) | 101 | #define TBASE_DEFERRABLE_FLAG (0x1) |
97 | 102 | ||
@@ -1758,3 +1763,25 @@ unsigned long msleep_interruptible(unsigned int msecs) | |||
1758 | } | 1763 | } |
1759 | 1764 | ||
1760 | EXPORT_SYMBOL(msleep_interruptible); | 1765 | EXPORT_SYMBOL(msleep_interruptible); |
1766 | |||
1767 | static int __sched do_usleep_range(unsigned long min, unsigned long max) | ||
1768 | { | ||
1769 | ktime_t kmin; | ||
1770 | unsigned long delta; | ||
1771 | |||
1772 | kmin = ktime_set(0, min * NSEC_PER_USEC); | ||
1773 | delta = (max - min) * NSEC_PER_USEC; | ||
1774 | return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); | ||
1775 | } | ||
1776 | |||
1777 | /** | ||
1778 | * usleep_range - Drop in replacement for udelay where wakeup is flexible | ||
1779 | * @min: Minimum time in usecs to sleep | ||
1780 | * @max: Maximum time in usecs to sleep | ||
1781 | */ | ||
1782 | void usleep_range(unsigned long min, unsigned long max) | ||
1783 | { | ||
1784 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
1785 | do_usleep_range(min, max); | ||
1786 | } | ||
1787 | EXPORT_SYMBOL(usleep_range); | ||