diff options
-rw-r--r-- | kernel/trace/Kconfig | 30 | ||||
-rw-r--r-- | kernel/trace/Makefile | 3 | ||||
-rw-r--r-- | kernel/trace/trace_benchmark.c | 176 | ||||
-rw-r--r-- | kernel/trace/trace_benchmark.h | 41 |
4 files changed, 250 insertions, 0 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8639819f6cef..d4409356f40d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -535,6 +535,36 @@ config MMIOTRACE_TEST | |||
535 | 535 | ||
536 | Say N, unless you absolutely know what you are doing. | 536 | Say N, unless you absolutely know what you are doing. |
537 | 537 | ||
538 | config TRACEPOINT_BENCHMARK | ||
539 | bool "Add tracepoint that benchmarks tracepoints" | ||
540 | help | ||
541 | This option creates the tracepoint "benchmark:benchmark_event". | ||
542 | When the tracepoint is enabled, it kicks off a kernel thread that | ||
543 | goes into an infinite loop (calling cond_sched() to let other tasks | ||
544 | run), and calls the tracepoint. Each iteration will record the time | ||
545 | it took to write to the tracepoint and the next iteration that | ||
546 | data will be passed to the tracepoint itself. That is, the tracepoint | ||
547 | will report the time it took to do the previous tracepoint. | ||
548 | The string written to the tracepoint is a static string of 128 bytes | ||
549 | to keep the time the same. The initial string is simply a write of | ||
550 | "START". The second string records the cold cache time of the first | ||
551 | write which is not added to the rest of the calculations. | ||
552 | |||
553 | As it is a tight loop, it benchmarks as hot cache. That's fine because | ||
554 | we care most about hot paths that are probably in cache already. | ||
555 | |||
556 | An example of the output: | ||
557 | |||
558 | START | ||
559 | first=3672 [COLD CACHED] | ||
560 | last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712 | ||
561 | last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337 | ||
562 | last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064 | ||
563 | last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411 | ||
564 | last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389 | ||
565 | last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666 | ||
566 | |||
567 | |||
538 | config RING_BUFFER_BENCHMARK | 568 | config RING_BUFFER_BENCHMARK |
539 | tristate "Ring buffer benchmark stress tester" | 569 | tristate "Ring buffer benchmark stress tester" |
540 | depends on RING_BUFFER | 570 | depends on RING_BUFFER |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 1378e84fbe39..2611613f14f1 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -17,6 +17,7 @@ ifdef CONFIG_TRACING_BRANCHES | |||
17 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING | 17 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING |
18 | endif | 18 | endif |
19 | 19 | ||
20 | CFLAGS_trace_benchmark.o := -I$(src) | ||
20 | CFLAGS_trace_events_filter.o := -I$(src) | 21 | CFLAGS_trace_events_filter.o := -I$(src) |
21 | 22 | ||
22 | obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o | 23 | obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o |
@@ -62,4 +63,6 @@ endif | |||
62 | obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o | 63 | obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o |
63 | obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o | 64 | obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o |
64 | 65 | ||
66 | obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o | ||
67 | |||
65 | libftrace-y := ftrace.o | 68 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c new file mode 100644 index 000000000000..7dc1c42dfee2 --- /dev/null +++ b/kernel/trace/trace_benchmark.c | |||
@@ -0,0 +1,176 @@ | |||
1 | #include <linux/delay.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/kthread.h> | ||
4 | #include <linux/trace_clock.h> | ||
5 | |||
6 | #define CREATE_TRACE_POINTS | ||
7 | #include "trace_benchmark.h" | ||
8 | |||
9 | static struct task_struct *bm_event_thread; | ||
10 | |||
11 | static char bm_str[BENCHMARK_EVENT_STRLEN] = "START"; | ||
12 | |||
13 | static u64 bm_total; | ||
14 | static u64 bm_totalsq; | ||
15 | static u64 bm_last; | ||
16 | static u64 bm_max; | ||
17 | static u64 bm_min; | ||
18 | static u64 bm_first; | ||
19 | static s64 bm_cnt; | ||
20 | |||
21 | /* | ||
22 | * This gets called in a loop recording the time it took to write | ||
23 | * the tracepoint. What it writes is the time statistics of the last | ||
24 | * tracepoint write. As there is nothing to write the first time | ||
25 | * it simply writes "START". As the first write is cold cache and | ||
26 | * the rest is hot, we save off that time in bm_first and it is | ||
27 | * reported as "first", which is shown in the second write to the | ||
28 | * tracepoint. The "first" field is writen within the statics from | ||
29 | * then on but never changes. | ||
30 | */ | ||
31 | static void trace_do_benchmark(void) | ||
32 | { | ||
33 | u64 start; | ||
34 | u64 stop; | ||
35 | u64 delta; | ||
36 | s64 stddev; | ||
37 | u64 seed; | ||
38 | u64 seedsq; | ||
39 | u64 last_seed; | ||
40 | unsigned int avg; | ||
41 | unsigned int std = 0; | ||
42 | |||
43 | /* Only run if the tracepoint is actually active */ | ||
44 | if (!trace_benchmark_event_enabled()) | ||
45 | return; | ||
46 | |||
47 | local_irq_disable(); | ||
48 | start = trace_clock_local(); | ||
49 | trace_benchmark_event(bm_str); | ||
50 | stop = trace_clock_local(); | ||
51 | local_irq_enable(); | ||
52 | |||
53 | bm_cnt++; | ||
54 | |||
55 | delta = stop - start; | ||
56 | |||
57 | /* | ||
58 | * The first read is cold cached, keep it separate from the | ||
59 | * other calculations. | ||
60 | */ | ||
61 | if (bm_cnt == 1) { | ||
62 | bm_first = delta; | ||
63 | scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, | ||
64 | "first=%llu [COLD CACHED]", bm_first); | ||
65 | return; | ||
66 | } | ||
67 | |||
68 | bm_last = delta; | ||
69 | |||
70 | bm_total += delta; | ||
71 | bm_totalsq += delta * delta; | ||
72 | |||
73 | if (delta > bm_max) | ||
74 | bm_max = delta; | ||
75 | if (!bm_min || delta < bm_min) | ||
76 | bm_min = delta; | ||
77 | |||
78 | if (bm_cnt > 1) { | ||
79 | /* | ||
80 | * Apply Welford's method to calculate standard deviation: | ||
81 | * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) | ||
82 | */ | ||
83 | stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total; | ||
84 | do_div(stddev, bm_cnt); | ||
85 | do_div(stddev, bm_cnt - 1); | ||
86 | } else | ||
87 | stddev = 0; | ||
88 | |||
89 | delta = bm_total; | ||
90 | do_div(delta, bm_cnt); | ||
91 | avg = delta; | ||
92 | |||
93 | if (stddev > 0) { | ||
94 | int i = 0; | ||
95 | /* | ||
96 | * stddev is the square of standard deviation but | ||
97 | * we want the actualy number. Use the average | ||
98 | * as our seed to find the std. | ||
99 | * | ||
100 | * The next try is: | ||
101 | * x = (x + N/x) / 2 | ||
102 | * | ||
103 | * Where N is the squared number to find the square | ||
104 | * root of. | ||
105 | */ | ||
106 | seed = avg; | ||
107 | do { | ||
108 | last_seed = seed; | ||
109 | seed = stddev; | ||
110 | if (!last_seed) | ||
111 | break; | ||
112 | do_div(seed, last_seed); | ||
113 | seed += last_seed; | ||
114 | do_div(seed, 2); | ||
115 | } while (i++ < 10 && last_seed != seed); | ||
116 | |||
117 | std = seed; | ||
118 | } | ||
119 | |||
120 | scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, | ||
121 | "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld", | ||
122 | bm_last, bm_first, bm_max, bm_min, avg, std, stddev); | ||
123 | } | ||
124 | |||
125 | static int benchmark_event_kthread(void *arg) | ||
126 | { | ||
127 | /* sleep a bit to make sure the tracepoint gets activated */ | ||
128 | msleep(100); | ||
129 | |||
130 | while (!kthread_should_stop()) { | ||
131 | |||
132 | trace_do_benchmark(); | ||
133 | |||
134 | /* | ||
135 | * We don't go to sleep, but let others | ||
136 | * run as well. | ||
137 | */ | ||
138 | cond_resched(); | ||
139 | } | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * When the benchmark tracepoint is enabled, it calls this | ||
146 | * function and the thread that calls the tracepoint is created. | ||
147 | */ | ||
148 | void trace_benchmark_reg(void) | ||
149 | { | ||
150 | bm_event_thread = kthread_run(benchmark_event_kthread, | ||
151 | NULL, "event_benchmark"); | ||
152 | WARN_ON(!bm_event_thread); | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * When the benchmark tracepoint is disabled, it calls this | ||
157 | * function and the thread that calls the tracepoint is deleted | ||
158 | * and all the numbers are reset. | ||
159 | */ | ||
160 | void trace_benchmark_unreg(void) | ||
161 | { | ||
162 | if (!bm_event_thread) | ||
163 | return; | ||
164 | |||
165 | kthread_stop(bm_event_thread); | ||
166 | |||
167 | strcpy(bm_str, "START"); | ||
168 | bm_total = 0; | ||
169 | bm_totalsq = 0; | ||
170 | bm_last = 0; | ||
171 | bm_max = 0; | ||
172 | bm_min = 0; | ||
173 | bm_cnt = 0; | ||
174 | /* bm_first doesn't need to be reset but reset it anyway */ | ||
175 | bm_first = 0; | ||
176 | } | ||
diff --git a/kernel/trace/trace_benchmark.h b/kernel/trace/trace_benchmark.h new file mode 100644 index 000000000000..3c1df1df4e29 --- /dev/null +++ b/kernel/trace/trace_benchmark.h | |||
@@ -0,0 +1,41 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM benchmark | ||
3 | |||
4 | #if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_BENCHMARK_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | extern void trace_benchmark_reg(void); | ||
10 | extern void trace_benchmark_unreg(void); | ||
11 | |||
12 | #define BENCHMARK_EVENT_STRLEN 128 | ||
13 | |||
14 | TRACE_EVENT_FN(benchmark_event, | ||
15 | |||
16 | TP_PROTO(const char *str), | ||
17 | |||
18 | TP_ARGS(str), | ||
19 | |||
20 | TP_STRUCT__entry( | ||
21 | __array( char, str, BENCHMARK_EVENT_STRLEN ) | ||
22 | ), | ||
23 | |||
24 | TP_fast_assign( | ||
25 | memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN); | ||
26 | ), | ||
27 | |||
28 | TP_printk("%s", __entry->str), | ||
29 | |||
30 | trace_benchmark_reg, trace_benchmark_unreg | ||
31 | ); | ||
32 | |||
33 | #endif /* _TRACE_BENCHMARK_H */ | ||
34 | |||
35 | #undef TRACE_INCLUDE_FILE | ||
36 | #undef TRACE_INCLUDE_PATH | ||
37 | #define TRACE_INCLUDE_PATH . | ||
38 | #define TRACE_INCLUDE_FILE trace_benchmark | ||
39 | |||
40 | /* This part must be outside protection */ | ||
41 | #include <trace/define_trace.h> | ||