diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-25 04:48:51 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-06-09 04:34:49 -0400 |
commit | c676329abb2b8359d9a5d734dec0c81779823fd6 (patch) | |
tree | b6c33715176221a87100228399c2a6f5049e44ea | |
parent | 95ae3c59fa8ad616c73745e21154b5af0fb10168 (diff) |
sched_clock: Add local_clock() API and improve documentation
For people who otherwise get to write: cpu_clock(smp_processor_id()),
there is now: local_clock().
Also, as per suggestion from Andrew, provide some documentation on
the various clock interfaces, and minimize the unsigned long long vs
u64 mess.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
LKML-Reference: <1275052414.1645.52.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/parisc/kernel/ftrace.c | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 37 | ||||
-rw-r--r-- | kernel/lockdep.c | 2 | ||||
-rw-r--r-- | kernel/perf_event.c | 2 | ||||
-rw-r--r-- | kernel/rcutorture.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/sched_clock.c | 95 | ||||
-rw-r--r-- | kernel/trace/trace_clock.c | 2 |
8 files changed, 113 insertions, 34 deletions
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 9877372ffdba..5beb97bafbb1 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c | |||
@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0, | |||
82 | unsigned long ret; | 82 | unsigned long ret; |
83 | 83 | ||
84 | pop_return_trace(&trace, &ret); | 84 | pop_return_trace(&trace, &ret); |
85 | trace.rettime = cpu_clock(raw_smp_processor_id()); | 85 | trace.rettime = local_clock(); |
86 | ftrace_graph_return(&trace); | 86 | ftrace_graph_return(&trace); |
87 | 87 | ||
88 | if (unlikely(!ret)) { | 88 | if (unlikely(!ret)) { |
@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
126 | return; | 126 | return; |
127 | } | 127 | } |
128 | 128 | ||
129 | calltime = cpu_clock(raw_smp_processor_id()); | 129 | calltime = local_clock(); |
130 | 130 | ||
131 | if (push_return_trace(old, calltime, | 131 | if (push_return_trace(old, calltime, |
132 | self_addr, &trace.depth) == -EBUSY) { | 132 | self_addr, &trace.depth) == -EBUSY) { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index edc3dd168d87..c2d4316a04bb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1791,20 +1791,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) | |||
1791 | #endif | 1791 | #endif |
1792 | 1792 | ||
1793 | /* | 1793 | /* |
1794 | * Architectures can set this to 1 if they have specified | 1794 | * Do not use outside of architecture code which knows its limitations. |
1795 | * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | 1795 | * |
1796 | * but then during bootup it turns out that sched_clock() | 1796 | * sched_clock() has no promise of monotonicity or bounded drift between |
1797 | * is reliable after all: | 1797 | * CPUs, use (which you should not) requires disabling IRQs. |
1798 | * | ||
1799 | * Please use one of the three interfaces below. | ||
1798 | */ | 1800 | */ |
1799 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | ||
1800 | extern int sched_clock_stable; | ||
1801 | #endif | ||
1802 | |||
1803 | /* ftrace calls sched_clock() directly */ | ||
1804 | extern unsigned long long notrace sched_clock(void); | 1801 | extern unsigned long long notrace sched_clock(void); |
1802 | /* | ||
1803 | * See the comment in kernel/sched_clock.c | ||
1804 | */ | ||
1805 | extern u64 cpu_clock(int cpu); | ||
1806 | extern u64 local_clock(void); | ||
1807 | extern u64 sched_clock_cpu(int cpu); | ||
1808 | |||
1805 | 1809 | ||
1806 | extern void sched_clock_init(void); | 1810 | extern void sched_clock_init(void); |
1807 | extern u64 sched_clock_cpu(int cpu); | ||
1808 | 1811 | ||
1809 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 1812 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
1810 | static inline void sched_clock_tick(void) | 1813 | static inline void sched_clock_tick(void) |
@@ -1819,17 +1822,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
1819 | { | 1822 | { |
1820 | } | 1823 | } |
1821 | #else | 1824 | #else |
1825 | /* | ||
1826 | * Architectures can set this to 1 if they have specified | ||
1827 | * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | ||
1828 | * but then during bootup it turns out that sched_clock() | ||
1829 | * is reliable after all: | ||
1830 | */ | ||
1831 | extern int sched_clock_stable; | ||
1832 | |||
1822 | extern void sched_clock_tick(void); | 1833 | extern void sched_clock_tick(void); |
1823 | extern void sched_clock_idle_sleep_event(void); | 1834 | extern void sched_clock_idle_sleep_event(void); |
1824 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1835 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
1825 | #endif | 1836 | #endif |
1826 | 1837 | ||
1827 | /* | ||
1828 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | ||
1829 | * clock constructed from sched_clock(): | ||
1830 | */ | ||
1831 | extern unsigned long long cpu_clock(int cpu); | ||
1832 | |||
1833 | extern unsigned long long | 1838 | extern unsigned long long |
1834 | task_sched_runtime(struct task_struct *task); | 1839 | task_sched_runtime(struct task_struct *task); |
1835 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); | 1840 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 54286798c37b..f2852a510232 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], | |||
146 | 146 | ||
147 | static inline u64 lockstat_clock(void) | 147 | static inline u64 lockstat_clock(void) |
148 | { | 148 | { |
149 | return cpu_clock(smp_processor_id()); | 149 | return local_clock(); |
150 | } | 150 | } |
151 | 151 | ||
152 | static int lock_point(unsigned long points[], unsigned long ip) | 152 | static int lock_point(unsigned long points[], unsigned long ip) |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 31d6afe92594..109c5ec88933 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
214 | 214 | ||
215 | static inline u64 perf_clock(void) | 215 | static inline u64 perf_clock(void) |
216 | { | 216 | { |
217 | return cpu_clock(raw_smp_processor_id()); | 217 | return local_clock(); |
218 | } | 218 | } |
219 | 219 | ||
220 | /* | 220 | /* |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 6535ac8bc6a5..2e2726d790b9 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -239,8 +239,7 @@ static unsigned long | |||
239 | rcu_random(struct rcu_random_state *rrsp) | 239 | rcu_random(struct rcu_random_state *rrsp) |
240 | { | 240 | { |
241 | if (--rrsp->rrs_count < 0) { | 241 | if (--rrsp->rrs_count < 0) { |
242 | rrsp->rrs_state += | 242 | rrsp->rrs_state += (unsigned long)local_clock(); |
243 | (unsigned long)cpu_clock(raw_smp_processor_id()); | ||
244 | rrsp->rrs_count = RCU_RANDOM_REFRESH; | 243 | rrsp->rrs_count = RCU_RANDOM_REFRESH; |
245 | } | 244 | } |
246 | rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; | 245 | rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; |
diff --git a/kernel/sched.c b/kernel/sched.c index 8f351c56567f..3abd8f780dae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1647,7 +1647,7 @@ static void update_shares(struct sched_domain *sd) | |||
1647 | if (root_task_group_empty()) | 1647 | if (root_task_group_empty()) |
1648 | return; | 1648 | return; |
1649 | 1649 | ||
1650 | now = cpu_clock(raw_smp_processor_id()); | 1650 | now = local_clock(); |
1651 | elapsed = now - sd->last_update; | 1651 | elapsed = now - sd->last_update; |
1652 | 1652 | ||
1653 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | 1653 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 906a0f718cb3..52f1a149bfb1 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -10,19 +10,55 @@ | |||
10 | * Ingo Molnar <mingo@redhat.com> | 10 | * Ingo Molnar <mingo@redhat.com> |
11 | * Guillaume Chazarain <guichaz@gmail.com> | 11 | * Guillaume Chazarain <guichaz@gmail.com> |
12 | * | 12 | * |
13 | * Create a semi stable clock from a mixture of other events, including: | 13 | * |
14 | * - gtod | 14 | * What: |
15 | * | ||
16 | * cpu_clock(i) provides a fast (execution time) high resolution | ||
17 | * clock with bounded drift between CPUs. The value of cpu_clock(i) | ||
18 | * is monotonic for constant i. The timestamp returned is in nanoseconds. | ||
19 | * | ||
20 | * ######################### BIG FAT WARNING ########################## | ||
21 | * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | ||
22 | * # go backwards !! # | ||
23 | * #################################################################### | ||
24 | * | ||
25 | * There is no strict promise about the base, although it tends to start | ||
26 | * at 0 on boot (but people really shouldn't rely on that). | ||
27 | * | ||
28 | * cpu_clock(i) -- can be used from any context, including NMI. | ||
29 | * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI) | ||
30 | * local_clock() -- is cpu_clock() on the current cpu. | ||
31 | * | ||
32 | * How: | ||
33 | * | ||
34 | * The implementation either uses sched_clock() when | ||
35 | * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the | ||
36 | * sched_clock() is assumed to provide these properties (mostly it means | ||
37 | * the architecture provides a globally synchronized highres time source). | ||
38 | * | ||
39 | * Otherwise it tries to create a semi stable clock from a mixture of other | ||
40 | * clocks, including: | ||
41 | * | ||
42 | * - GTOD (clock monotomic) | ||
15 | * - sched_clock() | 43 | * - sched_clock() |
16 | * - explicit idle events | 44 | * - explicit idle events |
17 | * | 45 | * |
18 | * We use gtod as base and the unstable clock deltas. The deltas are filtered, | 46 | * We use GTOD as base and use sched_clock() deltas to improve resolution. The |
19 | * making it monotonic and keeping it within an expected window. | 47 | * deltas are filtered to provide monotonicity and keeping it within an |
48 | * expected window. | ||
20 | * | 49 | * |
21 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time | 50 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time |
22 | * that is otherwise invisible (TSC gets stopped). | 51 | * that is otherwise invisible (TSC gets stopped). |
23 | * | 52 | * |
24 | * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat | 53 | * |
25 | * consistent between cpus (never more than 2 jiffies difference). | 54 | * Notes: |
55 | * | ||
56 | * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things | ||
57 | * like cpufreq interrupts that can change the base clock (TSC) multiplier | ||
58 | * and cause funny jumps in time -- although the filtering provided by | ||
59 | * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it | ||
60 | * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on | ||
61 | * sched_clock(). | ||
26 | */ | 62 | */ |
27 | #include <linux/spinlock.h> | 63 | #include <linux/spinlock.h> |
28 | #include <linux/hardirq.h> | 64 | #include <linux/hardirq.h> |
@@ -170,6 +206,11 @@ again: | |||
170 | return val; | 206 | return val; |
171 | } | 207 | } |
172 | 208 | ||
209 | /* | ||
210 | * Similar to cpu_clock(), but requires local IRQs to be disabled. | ||
211 | * | ||
212 | * See cpu_clock(). | ||
213 | */ | ||
173 | u64 sched_clock_cpu(int cpu) | 214 | u64 sched_clock_cpu(int cpu) |
174 | { | 215 | { |
175 | struct sched_clock_data *scd; | 216 | struct sched_clock_data *scd; |
@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
237 | } | 278 | } |
238 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | 279 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); |
239 | 280 | ||
240 | unsigned long long cpu_clock(int cpu) | 281 | /* |
282 | * As outlined at the top, provides a fast, high resolution, nanosecond | ||
283 | * time source that is monotonic per cpu argument and has bounded drift | ||
284 | * between cpus. | ||
285 | * | ||
286 | * ######################### BIG FAT WARNING ########################## | ||
287 | * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | ||
288 | * # go backwards !! # | ||
289 | * #################################################################### | ||
290 | */ | ||
291 | u64 cpu_clock(int cpu) | ||
241 | { | 292 | { |
242 | unsigned long long clock; | 293 | u64 clock; |
243 | unsigned long flags; | 294 | unsigned long flags; |
244 | 295 | ||
245 | local_irq_save(flags); | 296 | local_irq_save(flags); |
@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu) | |||
249 | return clock; | 300 | return clock; |
250 | } | 301 | } |
251 | 302 | ||
303 | /* | ||
304 | * Similar to cpu_clock() for the current cpu. Time will only be observed | ||
305 | * to be monotonic if care is taken to only compare timestampt taken on the | ||
306 | * same CPU. | ||
307 | * | ||
308 | * See cpu_clock(). | ||
309 | */ | ||
310 | u64 local_clock(void) | ||
311 | { | ||
312 | u64 clock; | ||
313 | unsigned long flags; | ||
314 | |||
315 | local_irq_save(flags); | ||
316 | clock = sched_clock_cpu(smp_processor_id()); | ||
317 | local_irq_restore(flags); | ||
318 | |||
319 | return clock; | ||
320 | } | ||
321 | |||
252 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 322 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
253 | 323 | ||
254 | void sched_clock_init(void) | 324 | void sched_clock_init(void) |
@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu) | |||
264 | return sched_clock(); | 334 | return sched_clock(); |
265 | } | 335 | } |
266 | 336 | ||
267 | 337 | u64 cpu_clock(int cpu) | |
268 | unsigned long long cpu_clock(int cpu) | ||
269 | { | 338 | { |
270 | return sched_clock_cpu(cpu); | 339 | return sched_clock_cpu(cpu); |
271 | } | 340 | } |
272 | 341 | ||
342 | u64 local_clock(void) | ||
343 | { | ||
344 | return sched_clock_cpu(0); | ||
345 | } | ||
346 | |||
273 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 347 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
274 | 348 | ||
275 | EXPORT_SYMBOL_GPL(cpu_clock); | 349 | EXPORT_SYMBOL_GPL(cpu_clock); |
350 | EXPORT_SYMBOL_GPL(local_clock); | ||
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 9d589d8dcd1a..1723e2b8c589 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -56,7 +56,7 @@ u64 notrace trace_clock_local(void) | |||
56 | */ | 56 | */ |
57 | u64 notrace trace_clock(void) | 57 | u64 notrace trace_clock(void) |
58 | { | 58 | { |
59 | return cpu_clock(raw_smp_processor_id()); | 59 | return local_clock(); |
60 | } | 60 | } |
61 | 61 | ||
62 | 62 | ||