diff options
author | Steven Rostedt <rostedt@goodmis.org> | 2008-11-12 00:14:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-11-12 05:52:02 -0500 |
commit | 1f0d69a9fc815db82f15722bf05227190b1d714d (patch) | |
tree | 21e0a9664fd47827d9168938a965b39b85b6e287 | |
parent | cb9382e5a94e54d0356d730954396c746ae66d6e (diff) |
tracing: profile likely and unlikely annotations
Impact: new unlikely/likely profiler
Andrew Morton recently suggested having an in-kernel way to profile
likely and unlikely macros. This patch achieves that goal.
When configured, every(*) likely and unlikely macro gets a counter attached
to it. When the condition is hit, the hit and misses of that condition
are recorded. These numbers can later be retrieved by:
/debugfs/tracing/profile_likely - All likely markers
/debugfs/tracing/profile_unlikely - All unlikely markers.
# cat /debug/tracing/profile_unlikely | head
correct incorrect % Function File Line
------- --------- - -------- ---- ----
2167 0 0 do_arch_prctl process_64.c 832
0 0 0 do_arch_prctl process_64.c 804
2670 0 0 IS_ERR err.h 34
71230 5693 7 __switch_to process_64.c 673
76919 0 0 __switch_to process_64.c 639
43184 33743 43 __switch_to process_64.c 624
12740 64181 83 __switch_to process_64.c 594
12740 64174 83 __switch_to process_64.c 590
# cat /debug/tracing/profile_unlikely | \
awk '{ if ($3 > 25) print $0; }' |head -20
44963 35259 43 __switch_to process_64.c 624
12762 67454 84 __switch_to process_64.c 594
12762 67447 84 __switch_to process_64.c 590
1478 595 28 syscall_get_error syscall.h 51
0 2821 100 syscall_trace_leave ptrace.c 1567
0 1 100 native_smp_prepare_cpus smpboot.c 1237
86338 265881 75 calc_delta_fair sched_fair.c 408
210410 108540 34 calc_delta_mine sched.c 1267
0 54550 100 sched_info_queued sched_stats.h 222
51899 66435 56 pick_next_task_fair sched_fair.c 1422
6 10 62 yield_task_fair sched_fair.c 982
7325 2692 26 rt_policy sched.c 144
0 1270 100 pre_schedule_rt sched_rt.c 1261
1268 48073 97 pick_next_task_rt sched_rt.c 884
0 45181 100 sched_info_dequeued sched_stats.h 177
0 15 100 sched_move_task sched.c 8700
0 15 100 sched_move_task sched.c 8690
53167 33217 38 schedule sched.c 4457
0 80208 100 sched_info_switch sched_stats.h 270
30585 49631 61 context_switch sched.c 2619
# cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }'
39900 36577 47 pick_next_task sched.c 4397
20824 15233 42 switch_mm mmu_context_64.h 18
0 7 100 __cancel_work_timer workqueue.c 560
617 66484 99 clocksource_adjust timekeeping.c 456
0 346340 100 audit_syscall_exit auditsc.c 1570
38 347350 99 audit_get_context auditsc.c 732
0 345244 100 audit_syscall_entry auditsc.c 1541
38 1017 96 audit_free auditsc.c 1446
0 1090 100 audit_alloc auditsc.c 862
2618 1090 29 audit_alloc auditsc.c 858
0 6 100 move_masked_irq migration.c 9
1 198 99 probe_sched_wakeup trace_sched_switch.c 58
2 2 50 probe_wakeup trace_sched_wakeup.c 227
0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144
4514 2090 31 __grab_cache_page filemap.c 2149
12882 228786 94 mapping_unevictable pagemap.h 50
4 11 73 __flush_cpu_slab slub.c 1466
627757 330451 34 slab_free slub.c 1731
2959 61245 95 dentry_lru_del_init dcache.c 153
946 1217 56 load_elf_binary binfmt_elf.c 904
102 82 44 disk_put_part genhd.h 206
1 1 50 dst_gc_task dst.c 82
0 19 100 tcp_mss_split_point tcp_output.c 1126
As you can see by the above, there's a bit of work to do in rethinking
the use of some unlikelys and likelys. Note: the unlikely case had 71 hits
that were more than 25%.
Note: After submitting my first version of this patch, Andrew Morton
showed me a version written by Daniel Walker, where I picked up
the following ideas from:
1) Using __builtin_constant_p to avoid profiling fixed values.
2) Using __FILE__ instead of instruction pointers.
3) Using the preprocessor to stop all profiling of likely
annotations from vsyscall_64.c.
Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar
for their feed back on this patch.
(*) Not ever unlikely is recorded, those that are used by vsyscalls
(a few of them) had to have profiling disabled.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 8 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 14 | ||||
-rw-r--r-- | include/linux/compiler.h | 61 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 16 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/trace_unlikely.c | 164 |
6 files changed, 261 insertions, 3 deletions
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..2f90202e59b3 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -17,6 +17,14 @@ | |||
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | /* Protect userspace from profiling */ | ||
21 | #ifdef CONFIG_TRACE_UNLIKELY_PROFILE | ||
22 | # undef likely | ||
23 | # undef unlikely | ||
24 | # define likely(x) likely_notrace(x) | ||
25 | # define unlikely(x) unlikely_notrace(x) | ||
26 | #endif | ||
27 | |||
20 | #include <linux/time.h> | 28 | #include <linux/time.h> |
21 | #include <linux/init.h> | 29 | #include <linux/init.h> |
22 | #include <linux/kernel.h> | 30 | #include <linux/kernel.h> |
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 80744606bad1..e10beb5335c9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
@@ -45,6 +45,17 @@ | |||
45 | #define MCOUNT_REC() | 45 | #define MCOUNT_REC() |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #ifdef CONFIG_TRACE_UNLIKELY_PROFILE | ||
49 | #define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ | ||
50 | *(_ftrace_likely) \ | ||
51 | VMLINUX_SYMBOL(__stop_likely_profile) = .; \ | ||
52 | VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ | ||
53 | *(_ftrace_unlikely) \ | ||
54 | VMLINUX_SYMBOL(__stop_unlikely_profile) = .; | ||
55 | #else | ||
56 | #define LIKELY_PROFILE() | ||
57 | #endif | ||
58 | |||
48 | /* .data section */ | 59 | /* .data section */ |
49 | #define DATA_DATA \ | 60 | #define DATA_DATA \ |
50 | *(.data) \ | 61 | *(.data) \ |
@@ -62,7 +73,8 @@ | |||
62 | VMLINUX_SYMBOL(__stop___markers) = .; \ | 73 | VMLINUX_SYMBOL(__stop___markers) = .; \ |
63 | VMLINUX_SYMBOL(__start___tracepoints) = .; \ | 74 | VMLINUX_SYMBOL(__start___tracepoints) = .; \ |
64 | *(__tracepoints) \ | 75 | *(__tracepoints) \ |
65 | VMLINUX_SYMBOL(__stop___tracepoints) = .; | 76 | VMLINUX_SYMBOL(__stop___tracepoints) = .; \ |
77 | LIKELY_PROFILE() | ||
66 | 78 | ||
67 | #define RO_DATA(align) \ | 79 | #define RO_DATA(align) \ |
68 | . = ALIGN((align)); \ | 80 | . = ALIGN((align)); \ |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 98115d9d04da..935e30cfaf3c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
@@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *); | |||
59 | * specific implementations come from the above header files | 59 | * specific implementations come from the above header files |
60 | */ | 60 | */ |
61 | 61 | ||
62 | #define likely(x) __builtin_expect(!!(x), 1) | 62 | #ifdef CONFIG_TRACE_UNLIKELY_PROFILE |
63 | #define unlikely(x) __builtin_expect(!!(x), 0) | 63 | struct ftrace_likely_data { |
64 | const char *func; | ||
65 | const char *file; | ||
66 | unsigned line; | ||
67 | unsigned long correct; | ||
68 | unsigned long incorrect; | ||
69 | }; | ||
70 | void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); | ||
71 | |||
72 | #define likely_notrace(x) __builtin_expect(!!(x), 1) | ||
73 | #define unlikely_notrace(x) __builtin_expect(!!(x), 0) | ||
74 | |||
75 | #define likely_check(x) ({ \ | ||
76 | int ______r; \ | ||
77 | static struct ftrace_likely_data \ | ||
78 | __attribute__((__aligned__(4))) \ | ||
79 | __attribute__((section("_ftrace_likely"))) \ | ||
80 | ______f = { \ | ||
81 | .func = __func__, \ | ||
82 | .file = __FILE__, \ | ||
83 | .line = __LINE__, \ | ||
84 | }; \ | ||
85 | ______f.line = __LINE__; \ | ||
86 | ______r = likely_notrace(x); \ | ||
87 | ftrace_likely_update(&______f, ______r, 1); \ | ||
88 | ______r; \ | ||
89 | }) | ||
90 | #define unlikely_check(x) ({ \ | ||
91 | int ______r; \ | ||
92 | static struct ftrace_likely_data \ | ||
93 | __attribute__((__aligned__(4))) \ | ||
94 | __attribute__((section("_ftrace_unlikely"))) \ | ||
95 | ______f = { \ | ||
96 | .func = __func__, \ | ||
97 | .file = __FILE__, \ | ||
98 | .line = __LINE__, \ | ||
99 | }; \ | ||
100 | ______f.line = __LINE__; \ | ||
101 | ______r = unlikely_notrace(x); \ | ||
102 | ftrace_likely_update(&______f, ______r, 0); \ | ||
103 | ______r; \ | ||
104 | }) | ||
105 | |||
106 | /* | ||
107 | * Using __builtin_constant_p(x) to ignore cases where the return | ||
108 | * value is always the same. This idea is taken from a similar patch | ||
109 | * written by Daniel Walker. | ||
110 | */ | ||
111 | # ifndef likely | ||
112 | # define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) | ||
113 | # endif | ||
114 | # ifndef unlikely | ||
115 | # define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) | ||
116 | # endif | ||
117 | #else | ||
118 | # define likely(x) __builtin_expect(!!(x), 1) | ||
119 | # define unlikely(x) __builtin_expect(!!(x), 0) | ||
120 | #endif | ||
64 | 121 | ||
65 | /* Optimization barrier */ | 122 | /* Optimization barrier */ |
66 | #ifndef barrier | 123 | #ifndef barrier |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d986216c8327..a604f24c755f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -159,6 +159,22 @@ config BOOT_TRACER | |||
159 | selected, because the self-tests are an initcall as well and that | 159 | selected, because the self-tests are an initcall as well and that |
160 | would invalidate the boot trace. ) | 160 | would invalidate the boot trace. ) |
161 | 161 | ||
162 | config TRACE_UNLIKELY_PROFILE | ||
163 | bool "Trace likely/unlikely profiler" | ||
164 | depends on DEBUG_KERNEL | ||
165 | select TRACING | ||
166 | help | ||
167 | This tracer profiles all the the likely and unlikely macros | ||
168 | in the kernel. It will display the results in: | ||
169 | |||
170 | /debugfs/tracing/profile_likely | ||
171 | /debugfs/tracing/profile_unlikely | ||
172 | |||
173 | Note: this will add a significant overhead, only turn this | ||
174 | on if you need to profile the system's use of these macros. | ||
175 | |||
176 | Say N if unsure. | ||
177 | |||
162 | config STACK_TRACER | 178 | config STACK_TRACER |
163 | bool "Trace max stack" | 179 | bool "Trace max stack" |
164 | depends on HAVE_FUNCTION_TRACER | 180 | depends on HAVE_FUNCTION_TRACER |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 3e1f361bbc17..98e70ee27986 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -25,5 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |||
25 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | 25 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o |
26 | obj-$(CONFIG_BOOT_TRACER) += trace_boot.o | 26 | obj-$(CONFIG_BOOT_TRACER) += trace_boot.o |
27 | obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o | 27 | obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o |
28 | obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o | ||
28 | 29 | ||
29 | libftrace-y := ftrace.o | 30 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c new file mode 100644 index 000000000000..94932696069f --- /dev/null +++ b/kernel/trace/trace_unlikely.c | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * unlikely profiler | ||
3 | * | ||
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | ||
5 | */ | ||
6 | #include <linux/kallsyms.h> | ||
7 | #include <linux/seq_file.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/uaccess.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ftrace.h> | ||
13 | #include <linux/hash.h> | ||
14 | #include <linux/fs.h> | ||
15 | #include <asm/local.h> | ||
16 | #include "trace.h" | ||
17 | |||
18 | void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) | ||
19 | { | ||
20 | /* FIXME: Make this atomic! */ | ||
21 | if (val == expect) | ||
22 | f->correct++; | ||
23 | else | ||
24 | f->incorrect++; | ||
25 | } | ||
26 | EXPORT_SYMBOL(ftrace_likely_update); | ||
27 | |||
28 | struct ftrace_pointer { | ||
29 | void *start; | ||
30 | void *stop; | ||
31 | }; | ||
32 | |||
33 | static void * | ||
34 | t_next(struct seq_file *m, void *v, loff_t *pos) | ||
35 | { | ||
36 | struct ftrace_pointer *f = m->private; | ||
37 | struct ftrace_likely_data *p = v; | ||
38 | |||
39 | (*pos)++; | ||
40 | |||
41 | if (v == (void *)1) | ||
42 | return f->start; | ||
43 | |||
44 | ++p; | ||
45 | |||
46 | if ((void *)p >= (void *)f->stop) | ||
47 | return NULL; | ||
48 | |||
49 | return p; | ||
50 | } | ||
51 | |||
52 | static void *t_start(struct seq_file *m, loff_t *pos) | ||
53 | { | ||
54 | void *t = (void *)1; | ||
55 | loff_t l = 0; | ||
56 | |||
57 | for (; t && l < *pos; t = t_next(m, t, &l)) | ||
58 | ; | ||
59 | |||
60 | return t; | ||
61 | } | ||
62 | |||
63 | static void t_stop(struct seq_file *m, void *p) | ||
64 | { | ||
65 | } | ||
66 | |||
67 | static int t_show(struct seq_file *m, void *v) | ||
68 | { | ||
69 | struct ftrace_likely_data *p = v; | ||
70 | const char *f; | ||
71 | unsigned long percent; | ||
72 | |||
73 | if (v == (void *)1) { | ||
74 | seq_printf(m, " correct incorrect %% " | ||
75 | " Function " | ||
76 | " File Line\n" | ||
77 | " ------- --------- - " | ||
78 | " -------- " | ||
79 | " ---- ----\n"); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | /* Only print the file, not the path */ | ||
84 | f = p->file + strlen(p->file); | ||
85 | while (f >= p->file && *f != '/') | ||
86 | f--; | ||
87 | f++; | ||
88 | |||
89 | if (p->correct) { | ||
90 | percent = p->incorrect * 100; | ||
91 | percent /= p->correct + p->incorrect; | ||
92 | } else | ||
93 | percent = p->incorrect ? 100 : 0; | ||
94 | |||
95 | seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent); | ||
96 | seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static struct seq_operations tracing_likely_seq_ops = { | ||
101 | .start = t_start, | ||
102 | .next = t_next, | ||
103 | .stop = t_stop, | ||
104 | .show = t_show, | ||
105 | }; | ||
106 | |||
107 | static int tracing_likely_open(struct inode *inode, struct file *file) | ||
108 | { | ||
109 | int ret; | ||
110 | |||
111 | ret = seq_open(file, &tracing_likely_seq_ops); | ||
112 | if (!ret) { | ||
113 | struct seq_file *m = file->private_data; | ||
114 | m->private = (void *)inode->i_private; | ||
115 | } | ||
116 | |||
117 | return ret; | ||
118 | } | ||
119 | |||
120 | static struct file_operations tracing_likely_fops = { | ||
121 | .open = tracing_likely_open, | ||
122 | .read = seq_read, | ||
123 | .llseek = seq_lseek, | ||
124 | }; | ||
125 | |||
126 | extern unsigned long __start_likely_profile[]; | ||
127 | extern unsigned long __stop_likely_profile[]; | ||
128 | extern unsigned long __start_unlikely_profile[]; | ||
129 | extern unsigned long __stop_unlikely_profile[]; | ||
130 | |||
131 | static struct ftrace_pointer ftrace_likely_pos = { | ||
132 | .start = __start_likely_profile, | ||
133 | .stop = __stop_likely_profile, | ||
134 | }; | ||
135 | |||
136 | static struct ftrace_pointer ftrace_unlikely_pos = { | ||
137 | .start = __start_unlikely_profile, | ||
138 | .stop = __stop_unlikely_profile, | ||
139 | }; | ||
140 | |||
141 | static __init int ftrace_unlikely_init(void) | ||
142 | { | ||
143 | struct dentry *d_tracer; | ||
144 | struct dentry *entry; | ||
145 | |||
146 | d_tracer = tracing_init_dentry(); | ||
147 | |||
148 | entry = debugfs_create_file("profile_likely", 0444, d_tracer, | ||
149 | &ftrace_likely_pos, | ||
150 | &tracing_likely_fops); | ||
151 | if (!entry) | ||
152 | pr_warning("Could not create debugfs 'profile_likely' entry\n"); | ||
153 | |||
154 | entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, | ||
155 | &ftrace_unlikely_pos, | ||
156 | &tracing_likely_fops); | ||
157 | if (!entry) | ||
158 | pr_warning("Could not create debugfs" | ||
159 | " 'profile_unlikely' entry\n"); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | device_initcall(ftrace_unlikely_init); | ||