aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-05-25 04:48:51 -0400
committerIngo Molnar <mingo@elte.hu>2010-06-09 04:34:49 -0400
commitc676329abb2b8359d9a5d734dec0c81779823fd6 (patch)
treeb6c33715176221a87100228399c2a6f5049e44ea
parent95ae3c59fa8ad616c73745e21154b5af0fb10168 (diff)
sched_clock: Add local_clock() API and improve documentation
For people who otherwise get to write: cpu_clock(smp_processor_id()), there is now: local_clock(). Also, as per suggestion from Andrew, provide some documentation on the various clock interfaces, and minimize the unsigned long long vs u64 mess. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Jens Axboe <jaxboe@fusionio.com> LKML-Reference: <1275052414.1645.52.camel@laptop> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/parisc/kernel/ftrace.c4
-rw-r--r--include/linux/sched.h37
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/perf_event.c2
-rw-r--r--kernel/rcutorture.c3
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_clock.c95
-rw-r--r--kernel/trace/trace_clock.c2
8 files changed, 113 insertions, 34 deletions
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 9877372ffdba..5beb97bafbb1 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0,
82 unsigned long ret; 82 unsigned long ret;
83 83
84 pop_return_trace(&trace, &ret); 84 pop_return_trace(&trace, &ret);
85 trace.rettime = cpu_clock(raw_smp_processor_id()); 85 trace.rettime = local_clock();
86 ftrace_graph_return(&trace); 86 ftrace_graph_return(&trace);
87 87
88 if (unlikely(!ret)) { 88 if (unlikely(!ret)) {
@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
126 return; 126 return;
127 } 127 }
128 128
129 calltime = cpu_clock(raw_smp_processor_id()); 129 calltime = local_clock();
130 130
131 if (push_return_trace(old, calltime, 131 if (push_return_trace(old, calltime,
132 self_addr, &trace.depth) == -EBUSY) { 132 self_addr, &trace.depth) == -EBUSY) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index edc3dd168d87..c2d4316a04bb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1791,20 +1791,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1791#endif 1791#endif
1792 1792
1793/* 1793/*
1794 * Architectures can set this to 1 if they have specified 1794 * Do not use outside of architecture code which knows its limitations.
1795 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, 1795 *
1796 * but then during bootup it turns out that sched_clock() 1796 * sched_clock() has no promise of monotonicity or bounded drift between
1797 * is reliable after all: 1797 * CPUs, use (which you should not) requires disabling IRQs.
1798 *
1799 * Please use one of the three interfaces below.
1798 */ 1800 */
1799#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1800extern int sched_clock_stable;
1801#endif
1802
1803/* ftrace calls sched_clock() directly */
1804extern unsigned long long notrace sched_clock(void); 1801extern unsigned long long notrace sched_clock(void);
1802/*
1803 * See the comment in kernel/sched_clock.c
1804 */
1805extern u64 cpu_clock(int cpu);
1806extern u64 local_clock(void);
1807extern u64 sched_clock_cpu(int cpu);
1808
1805 1809
1806extern void sched_clock_init(void); 1810extern void sched_clock_init(void);
1807extern u64 sched_clock_cpu(int cpu);
1808 1811
1809#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 1812#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1810static inline void sched_clock_tick(void) 1813static inline void sched_clock_tick(void)
@@ -1819,17 +1822,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
1819{ 1822{
1820} 1823}
1821#else 1824#else
1825/*
1826 * Architectures can set this to 1 if they have specified
1827 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
1828 * but then during bootup it turns out that sched_clock()
1829 * is reliable after all:
1830 */
1831extern int sched_clock_stable;
1832
1822extern void sched_clock_tick(void); 1833extern void sched_clock_tick(void);
1823extern void sched_clock_idle_sleep_event(void); 1834extern void sched_clock_idle_sleep_event(void);
1824extern void sched_clock_idle_wakeup_event(u64 delta_ns); 1835extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1825#endif 1836#endif
1826 1837
1827/*
1828 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
1829 * clock constructed from sched_clock():
1830 */
1831extern unsigned long long cpu_clock(int cpu);
1832
1833extern unsigned long long 1838extern unsigned long long
1834task_sched_runtime(struct task_struct *task); 1839task_sched_runtime(struct task_struct *task);
1835extern unsigned long long thread_group_sched_runtime(struct task_struct *task); 1840extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 54286798c37b..f2852a510232 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
146 146
147static inline u64 lockstat_clock(void) 147static inline u64 lockstat_clock(void)
148{ 148{
149 return cpu_clock(smp_processor_id()); 149 return local_clock();
150} 150}
151 151
152static int lock_point(unsigned long points[], unsigned long ip) 152static int lock_point(unsigned long points[], unsigned long ip)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 31d6afe92594..109c5ec88933 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
214 214
215static inline u64 perf_clock(void) 215static inline u64 perf_clock(void)
216{ 216{
217 return cpu_clock(raw_smp_processor_id()); 217 return local_clock();
218} 218}
219 219
220/* 220/*
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 6535ac8bc6a5..2e2726d790b9 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -239,8 +239,7 @@ static unsigned long
239rcu_random(struct rcu_random_state *rrsp) 239rcu_random(struct rcu_random_state *rrsp)
240{ 240{
241 if (--rrsp->rrs_count < 0) { 241 if (--rrsp->rrs_count < 0) {
242 rrsp->rrs_state += 242 rrsp->rrs_state += (unsigned long)local_clock();
243 (unsigned long)cpu_clock(raw_smp_processor_id());
244 rrsp->rrs_count = RCU_RANDOM_REFRESH; 243 rrsp->rrs_count = RCU_RANDOM_REFRESH;
245 } 244 }
246 rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; 245 rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
diff --git a/kernel/sched.c b/kernel/sched.c
index 8f351c56567f..3abd8f780dae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1647,7 +1647,7 @@ static void update_shares(struct sched_domain *sd)
1647 if (root_task_group_empty()) 1647 if (root_task_group_empty())
1648 return; 1648 return;
1649 1649
1650 now = cpu_clock(raw_smp_processor_id()); 1650 now = local_clock();
1651 elapsed = now - sd->last_update; 1651 elapsed = now - sd->last_update;
1652 1652
1653 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { 1653 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 906a0f718cb3..52f1a149bfb1 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -10,19 +10,55 @@
10 * Ingo Molnar <mingo@redhat.com> 10 * Ingo Molnar <mingo@redhat.com>
11 * Guillaume Chazarain <guichaz@gmail.com> 11 * Guillaume Chazarain <guichaz@gmail.com>
12 * 12 *
13 * Create a semi stable clock from a mixture of other events, including: 13 *
14 * - gtod 14 * What:
15 *
16 * cpu_clock(i) provides a fast (execution time) high resolution
17 * clock with bounded drift between CPUs. The value of cpu_clock(i)
18 * is monotonic for constant i. The timestamp returned is in nanoseconds.
19 *
20 * ######################### BIG FAT WARNING ##########################
21 * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
22 * # go backwards !! #
23 * ####################################################################
24 *
25 * There is no strict promise about the base, although it tends to start
26 * at 0 on boot (but people really shouldn't rely on that).
27 *
28 * cpu_clock(i) -- can be used from any context, including NMI.
29 * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
30 * local_clock() -- is cpu_clock() on the current cpu.
31 *
32 * How:
33 *
34 * The implementation either uses sched_clock() when
35 * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
36 * sched_clock() is assumed to provide these properties (mostly it means
37 * the architecture provides a globally synchronized highres time source).
38 *
39 * Otherwise it tries to create a semi stable clock from a mixture of other
40 * clocks, including:
41 *
42 * - GTOD (clock monotomic)
15 * - sched_clock() 43 * - sched_clock()
16 * - explicit idle events 44 * - explicit idle events
17 * 45 *
18 * We use gtod as base and the unstable clock deltas. The deltas are filtered, 46 * We use GTOD as base and use sched_clock() deltas to improve resolution. The
19 * making it monotonic and keeping it within an expected window. 47 * deltas are filtered to provide monotonicity and keeping it within an
48 * expected window.
20 * 49 *
21 * Furthermore, explicit sleep and wakeup hooks allow us to account for time 50 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
22 * that is otherwise invisible (TSC gets stopped). 51 * that is otherwise invisible (TSC gets stopped).
23 * 52 *
24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat 53 *
25 * consistent between cpus (never more than 2 jiffies difference). 54 * Notes:
55 *
56 * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
57 * like cpufreq interrupts that can change the base clock (TSC) multiplier
58 * and cause funny jumps in time -- although the filtering provided by
59 * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
60 * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
61 * sched_clock().
26 */ 62 */
27#include <linux/spinlock.h> 63#include <linux/spinlock.h>
28#include <linux/hardirq.h> 64#include <linux/hardirq.h>
@@ -170,6 +206,11 @@ again:
170 return val; 206 return val;
171} 207}
172 208
209/*
210 * Similar to cpu_clock(), but requires local IRQs to be disabled.
211 *
212 * See cpu_clock().
213 */
173u64 sched_clock_cpu(int cpu) 214u64 sched_clock_cpu(int cpu)
174{ 215{
175 struct sched_clock_data *scd; 216 struct sched_clock_data *scd;
@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
237} 278}
238EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 279EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
239 280
240unsigned long long cpu_clock(int cpu) 281/*
282 * As outlined at the top, provides a fast, high resolution, nanosecond
283 * time source that is monotonic per cpu argument and has bounded drift
284 * between cpus.
285 *
286 * ######################### BIG FAT WARNING ##########################
287 * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
288 * # go backwards !! #
289 * ####################################################################
290 */
291u64 cpu_clock(int cpu)
241{ 292{
242 unsigned long long clock; 293 u64 clock;
243 unsigned long flags; 294 unsigned long flags;
244 295
245 local_irq_save(flags); 296 local_irq_save(flags);
@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu)
249 return clock; 300 return clock;
250} 301}
251 302
303/*
304 * Similar to cpu_clock() for the current cpu. Time will only be observed
305 * to be monotonic if care is taken to only compare timestampt taken on the
306 * same CPU.
307 *
308 * See cpu_clock().
309 */
310u64 local_clock(void)
311{
312 u64 clock;
313 unsigned long flags;
314
315 local_irq_save(flags);
316 clock = sched_clock_cpu(smp_processor_id());
317 local_irq_restore(flags);
318
319 return clock;
320}
321
252#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 322#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
253 323
254void sched_clock_init(void) 324void sched_clock_init(void)
@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu)
264 return sched_clock(); 334 return sched_clock();
265} 335}
266 336
267 337u64 cpu_clock(int cpu)
268unsigned long long cpu_clock(int cpu)
269{ 338{
270 return sched_clock_cpu(cpu); 339 return sched_clock_cpu(cpu);
271} 340}
272 341
342u64 local_clock(void)
343{
344 return sched_clock_cpu(0);
345}
346
273#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 347#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
274 348
275EXPORT_SYMBOL_GPL(cpu_clock); 349EXPORT_SYMBOL_GPL(cpu_clock);
350EXPORT_SYMBOL_GPL(local_clock);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1a..1723e2b8c589 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -56,7 +56,7 @@ u64 notrace trace_clock_local(void)
56 */ 56 */
57u64 notrace trace_clock(void) 57u64 notrace trace_clock(void)
58{ 58{
59 return cpu_clock(raw_smp_processor_id()); 59 return local_clock();
60} 60}
61 61
62 62