diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-02-26 12:47:11 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-02-26 12:44:06 -0500 |
commit | 14131f2f98ac350ee9e73faed916d2238a8b6a0d (patch) | |
tree | bf490d104276142e914f1245bbc9f44cb0d2bc9b | |
parent | 6409c4da289d6905f7ae2bd0630438368439bda2 (diff) |
tracing: implement trace_clock_*() APIs
Impact: implement new tracing timestamp APIs
Add three trace clock variants, with differing scalability/precision
tradeoffs:
- local: CPU-local trace clock
- medium: scalable global clock with some jitter
- global: globally monotonic, serialized clock
Make the ring-buffer use the local trace clock internally.
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/trace_clock.h | 19 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_clock.c | 101 |
4 files changed, 123 insertions, 3 deletions
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h new file mode 100644 index 000000000000..7a8130384087 --- /dev/null +++ b/include/linux/trace_clock.h | |||
@@ -0,0 +1,19 @@ | |||
1 | #ifndef _LINUX_TRACE_CLOCK_H | ||
2 | #define _LINUX_TRACE_CLOCK_H | ||
3 | |||
4 | /* | ||
5 | * 3 trace clock variants, with differing scalability/precision | ||
6 | * tradeoffs: | ||
7 | * | ||
8 | * - local: CPU-local trace clock | ||
9 | * - medium: scalable global clock with some jitter | ||
10 | * - global: globally monotonic, serialized clock | ||
11 | */ | ||
12 | #include <linux/compiler.h> | ||
13 | #include <linux/types.h> | ||
14 | |||
15 | extern u64 notrace trace_clock_local(void); | ||
16 | extern u64 notrace trace_clock(void); | ||
17 | extern u64 notrace trace_clock_global(void); | ||
18 | |||
19 | #endif /* _LINUX_TRACE_CLOCK_H */ | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 664b6c0dc75a..c931fe0560cb 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o | |||
19 | obj-$(CONFIG_RING_BUFFER) += ring_buffer.o | 19 | obj-$(CONFIG_RING_BUFFER) += ring_buffer.o |
20 | 20 | ||
21 | obj-$(CONFIG_TRACING) += trace.o | 21 | obj-$(CONFIG_TRACING) += trace.o |
22 | obj-$(CONFIG_TRACING) += trace_clock.o | ||
22 | obj-$(CONFIG_TRACING) += trace_output.o | 23 | obj-$(CONFIG_TRACING) += trace_output.o |
23 | obj-$(CONFIG_TRACING) += trace_stat.o | 24 | obj-$(CONFIG_TRACING) += trace_stat.o |
24 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o | 25 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8f19f1aa42b0..a8c275c01e83 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/ring_buffer.h> | 6 | #include <linux/ring_buffer.h> |
7 | #include <linux/trace_clock.h> | ||
7 | #include <linux/ftrace_irq.h> | 8 | #include <linux/ftrace_irq.h> |
8 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
9 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
@@ -12,7 +13,6 @@ | |||
12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
13 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
14 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
15 | #include <linux/sched.h> /* used for sched_clock() (for now) */ | ||
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/hash.h> | 17 | #include <linux/hash.h> |
18 | #include <linux/list.h> | 18 | #include <linux/list.h> |
@@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on); | |||
112 | /* Up this if you want to test the TIME_EXTENTS and normalization */ | 112 | /* Up this if you want to test the TIME_EXTENTS and normalization */ |
113 | #define DEBUG_SHIFT 0 | 113 | #define DEBUG_SHIFT 0 |
114 | 114 | ||
115 | /* FIXME!!! */ | ||
116 | u64 ring_buffer_time_stamp(int cpu) | 115 | u64 ring_buffer_time_stamp(int cpu) |
117 | { | 116 | { |
118 | u64 time; | 117 | u64 time; |
119 | 118 | ||
120 | preempt_disable_notrace(); | 119 | preempt_disable_notrace(); |
121 | /* shift to debug/test normalization and TIME_EXTENTS */ | 120 | /* shift to debug/test normalization and TIME_EXTENTS */ |
122 | time = sched_clock() << DEBUG_SHIFT; | 121 | time = trace_clock_local() << DEBUG_SHIFT; |
123 | preempt_enable_no_resched_notrace(); | 122 | preempt_enable_no_resched_notrace(); |
124 | 123 | ||
125 | return time; | 124 | return time; |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c new file mode 100644 index 000000000000..2d4953f93560 --- /dev/null +++ b/kernel/trace/trace_clock.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * tracing clocks | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
5 | * | ||
6 | * Implements 3 trace clock variants, with differing scalability/precision | ||
7 | * tradeoffs: | ||
8 | * | ||
9 | * - local: CPU-local trace clock | ||
10 | * - medium: scalable global clock with some jitter | ||
11 | * - global: globally monotonic, serialized clock | ||
12 | * | ||
13 | * Tracer plugins will chose a default from these clocks. | ||
14 | */ | ||
15 | #include <linux/spinlock.h> | ||
16 | #include <linux/hardirq.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/ktime.h> | ||
21 | |||
22 | /* | ||
23 | * trace_clock_local(): the simplest and least coherent tracing clock. | ||
24 | * | ||
25 | * Useful for tracing that does not cross to other CPUs nor | ||
26 | * does it go through idle events. | ||
27 | */ | ||
28 | u64 notrace trace_clock_local(void) | ||
29 | { | ||
30 | /* | ||
31 | * sched_clock() is an architecture implemented, fast, scalable, | ||
32 | * lockless clock. It is not guaranteed to be coherent across | ||
33 | * CPUs, nor across CPU idle events. | ||
34 | */ | ||
35 | return sched_clock(); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * trace_clock(): 'inbetween' trace clock. Not completely serialized, | ||
40 | * but not completely incorrect when crossing CPUs either. | ||
41 | * | ||
42 | * This is based on cpu_clock(), which will allow at most ~1 jiffy of | ||
43 | * jitter between CPUs. So it's a pretty scalable clock, but there | ||
44 | * can be offsets in the trace data. | ||
45 | */ | ||
46 | u64 notrace trace_clock(void) | ||
47 | { | ||
48 | return cpu_clock(raw_smp_processor_id()); | ||
49 | } | ||
50 | |||
51 | |||
52 | /* | ||
53 | * trace_clock_global(): special globally coherent trace clock | ||
54 | * | ||
55 | * It has higher overhead than the other trace clocks but is still | ||
56 | * an order of magnitude faster than GTOD derived hardware clocks. | ||
57 | * | ||
58 | * Used by plugins that need globally coherent timestamps. | ||
59 | */ | ||
60 | |||
61 | static u64 prev_trace_clock_time; | ||
62 | |||
63 | static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp = | ||
64 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | ||
65 | |||
66 | u64 notrace trace_clock_global(void) | ||
67 | { | ||
68 | unsigned long flags; | ||
69 | int this_cpu; | ||
70 | u64 now; | ||
71 | |||
72 | raw_local_irq_save(flags); | ||
73 | |||
74 | this_cpu = raw_smp_processor_id(); | ||
75 | now = cpu_clock(this_cpu); | ||
76 | /* | ||
77 | * If in an NMI context then dont risk lockups and return the | ||
78 | * cpu_clock() time: | ||
79 | */ | ||
80 | if (unlikely(in_nmi())) | ||
81 | goto out; | ||
82 | |||
83 | __raw_spin_lock(&trace_clock_lock); | ||
84 | |||
85 | /* | ||
86 | * TODO: if this happens often then maybe we should reset | ||
87 | * my_scd->clock to prev_trace_clock_time+1, to make sure | ||
88 | * we start ticking with the local clock from now on? | ||
89 | */ | ||
90 | if ((s64)(now - prev_trace_clock_time) < 0) | ||
91 | now = prev_trace_clock_time + 1; | ||
92 | |||
93 | prev_trace_clock_time = now; | ||
94 | |||
95 | __raw_spin_unlock(&trace_clock_lock); | ||
96 | |||
97 | out: | ||
98 | raw_local_irq_restore(flags); | ||
99 | |||
100 | return now; | ||
101 | } | ||