diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-11-06 14:35:57 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-11-07 08:40:47 -0500 |
commit | 77170988ff67fb959602ab4df296ae676f556a59 (patch) | |
tree | 8e817e2d881e54cdeada21d1444b704d1a49bfcf /tools | |
parent | a614d01bdd0cc8200d917da25f5a3d539b944193 (diff) |
perf trace: Don't relookup fields by name in each sample
Instead do the lookups just when creating the tracepoints, initially for
the most common, raw_syscalls:sys_{enter,exit}.
It works by having evsel->priv have a per tracepoint structure with
entries for the fields, for direct access, with the offset and a
function to get the value from the sample, doing the swap if needed.
Using a simple workload that does M millions write syscalls, we go from:
# perf stat -i -e cycles /tmp/oldperf trace ./sc_hello 100 > /dev/null
Performance counter stats for '/tmp/oldperf trace ./sc_hello 100':
8,366,771,459 cycles
2.668025928 seconds time elapsed
# perf stat -i -e cycles perf trace ./sc_hello 100 > /dev/null
Performance counter stats for 'perf trace ./sc_hello 100':
8,345,187,650 cycles
2.631748425 seconds time elapsed
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-eyfhvoo510a5i10b27dnvm88@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-trace.c | 199 |
1 files changed, 188 insertions, 11 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee59df300a34..329b7832b5da 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -35,6 +35,189 @@ | |||
35 | # define MADV_UNMERGEABLE 13 | 35 | # define MADV_UNMERGEABLE 13 |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | struct tp_field { | ||
39 | int offset; | ||
40 | union { | ||
41 | u64 (*integer)(struct tp_field *field, struct perf_sample *sample); | ||
42 | void *(*pointer)(struct tp_field *field, struct perf_sample *sample); | ||
43 | }; | ||
44 | }; | ||
45 | |||
46 | #define TP_UINT_FIELD(bits) \ | ||
47 | static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ | ||
48 | { \ | ||
49 | return *(u##bits *)(sample->raw_data + field->offset); \ | ||
50 | } | ||
51 | |||
52 | TP_UINT_FIELD(8); | ||
53 | TP_UINT_FIELD(16); | ||
54 | TP_UINT_FIELD(32); | ||
55 | TP_UINT_FIELD(64); | ||
56 | |||
57 | #define TP_UINT_FIELD__SWAPPED(bits) \ | ||
58 | static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ | ||
59 | { \ | ||
60 | u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ | ||
61 | return bswap_##bits(value);\ | ||
62 | } | ||
63 | |||
64 | TP_UINT_FIELD__SWAPPED(16); | ||
65 | TP_UINT_FIELD__SWAPPED(32); | ||
66 | TP_UINT_FIELD__SWAPPED(64); | ||
67 | |||
68 | static int tp_field__init_uint(struct tp_field *field, | ||
69 | struct format_field *format_field, | ||
70 | bool needs_swap) | ||
71 | { | ||
72 | field->offset = format_field->offset; | ||
73 | |||
74 | switch (format_field->size) { | ||
75 | case 1: | ||
76 | field->integer = tp_field__u8; | ||
77 | break; | ||
78 | case 2: | ||
79 | field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; | ||
80 | break; | ||
81 | case 4: | ||
82 | field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; | ||
83 | break; | ||
84 | case 8: | ||
85 | field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; | ||
86 | break; | ||
87 | default: | ||
88 | return -1; | ||
89 | } | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) | ||
95 | { | ||
96 | return sample->raw_data + field->offset; | ||
97 | } | ||
98 | |||
99 | static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) | ||
100 | { | ||
101 | field->offset = format_field->offset; | ||
102 | field->pointer = tp_field__ptr; | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | struct syscall_tp { | ||
107 | struct tp_field id; | ||
108 | union { | ||
109 | struct tp_field args, ret; | ||
110 | }; | ||
111 | }; | ||
112 | |||
113 | static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, | ||
114 | struct tp_field *field, | ||
115 | const char *name) | ||
116 | { | ||
117 | struct format_field *format_field = perf_evsel__field(evsel, name); | ||
118 | |||
119 | if (format_field == NULL) | ||
120 | return -1; | ||
121 | |||
122 | return tp_field__init_uint(field, format_field, evsel->needs_swap); | ||
123 | } | ||
124 | |||
125 | #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ | ||
126 | ({ struct syscall_tp *sc = evsel->priv;\ | ||
127 | perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) | ||
128 | |||
129 | static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, | ||
130 | struct tp_field *field, | ||
131 | const char *name) | ||
132 | { | ||
133 | struct format_field *format_field = perf_evsel__field(evsel, name); | ||
134 | |||
135 | if (format_field == NULL) | ||
136 | return -1; | ||
137 | |||
138 | return tp_field__init_ptr(field, format_field); | ||
139 | } | ||
140 | |||
141 | #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ | ||
142 | ({ struct syscall_tp *sc = evsel->priv;\ | ||
143 | perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) | ||
144 | |||
145 | static void perf_evsel__delete_priv(struct perf_evsel *evsel) | ||
146 | { | ||
147 | free(evsel->priv); | ||
148 | evsel->priv = NULL; | ||
149 | perf_evsel__delete(evsel); | ||
150 | } | ||
151 | |||
152 | static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, | ||
153 | void *handler, int idx) | ||
154 | { | ||
155 | struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx); | ||
156 | |||
157 | if (evsel) { | ||
158 | evsel->priv = malloc(sizeof(struct syscall_tp)); | ||
159 | |||
160 | if (evsel->priv == NULL) | ||
161 | goto out_delete; | ||
162 | |||
163 | if (perf_evsel__init_sc_tp_uint_field(evsel, id)) | ||
164 | goto out_delete; | ||
165 | |||
166 | evsel->handler = handler; | ||
167 | } | ||
168 | |||
169 | return evsel; | ||
170 | |||
171 | out_delete: | ||
172 | perf_evsel__delete_priv(evsel); | ||
173 | return NULL; | ||
174 | } | ||
175 | |||
176 | #define perf_evsel__sc_tp_uint(evsel, name, sample) \ | ||
177 | ({ struct syscall_tp *fields = evsel->priv; \ | ||
178 | fields->name.integer(&fields->name, sample); }) | ||
179 | |||
180 | #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ | ||
181 | ({ struct syscall_tp *fields = evsel->priv; \ | ||
182 | fields->name.pointer(&fields->name, sample); }) | ||
183 | |||
184 | static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, | ||
185 | void *sys_enter_handler, | ||
186 | void *sys_exit_handler) | ||
187 | { | ||
188 | int ret = -1; | ||
189 | int idx = evlist->nr_entries; | ||
190 | struct perf_evsel *sys_enter, *sys_exit; | ||
191 | |||
192 | sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++); | ||
193 | if (sys_enter == NULL) | ||
194 | goto out; | ||
195 | |||
196 | if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) | ||
197 | goto out_delete_sys_enter; | ||
198 | |||
199 | sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++); | ||
200 | if (sys_exit == NULL) | ||
201 | goto out_delete_sys_enter; | ||
202 | |||
203 | if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) | ||
204 | goto out_delete_sys_exit; | ||
205 | |||
206 | perf_evlist__add(evlist, sys_enter); | ||
207 | perf_evlist__add(evlist, sys_exit); | ||
208 | |||
209 | ret = 0; | ||
210 | out: | ||
211 | return ret; | ||
212 | |||
213 | out_delete_sys_exit: | ||
214 | perf_evsel__delete_priv(sys_exit); | ||
215 | out_delete_sys_enter: | ||
216 | perf_evsel__delete_priv(sys_enter); | ||
217 | goto out; | ||
218 | } | ||
219 | |||
220 | |||
38 | struct syscall_arg { | 221 | struct syscall_arg { |
39 | unsigned long val; | 222 | unsigned long val; |
40 | struct thread *thread; | 223 | struct thread *thread; |
@@ -1392,7 +1575,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |||
1392 | void *args; | 1575 | void *args; |
1393 | size_t printed = 0; | 1576 | size_t printed = 0; |
1394 | struct thread *thread; | 1577 | struct thread *thread; |
1395 | int id = perf_evsel__intval(evsel, sample, "id"); | 1578 | int id = perf_evsel__sc_tp_uint(evsel, id, sample); |
1396 | struct syscall *sc = trace__syscall_info(trace, evsel, id); | 1579 | struct syscall *sc = trace__syscall_info(trace, evsel, id); |
1397 | struct thread_trace *ttrace; | 1580 | struct thread_trace *ttrace; |
1398 | 1581 | ||
@@ -1407,12 +1590,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |||
1407 | if (ttrace == NULL) | 1590 | if (ttrace == NULL) |
1408 | return -1; | 1591 | return -1; |
1409 | 1592 | ||
1410 | args = perf_evsel__rawptr(evsel, sample, "args"); | 1593 | args = perf_evsel__sc_tp_ptr(evsel, args, sample); |
1411 | if (args == NULL) { | ||
1412 | fprintf(trace->output, "Problems reading syscall arguments\n"); | ||
1413 | return -1; | ||
1414 | } | ||
1415 | |||
1416 | ttrace = thread->priv; | 1594 | ttrace = thread->priv; |
1417 | 1595 | ||
1418 | if (ttrace->entry_str == NULL) { | 1596 | if (ttrace->entry_str == NULL) { |
@@ -1445,7 +1623,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | |||
1445 | int ret; | 1623 | int ret; |
1446 | u64 duration = 0; | 1624 | u64 duration = 0; |
1447 | struct thread *thread; | 1625 | struct thread *thread; |
1448 | int id = perf_evsel__intval(evsel, sample, "id"); | 1626 | int id = perf_evsel__sc_tp_uint(evsel, id, sample); |
1449 | struct syscall *sc = trace__syscall_info(trace, evsel, id); | 1627 | struct syscall *sc = trace__syscall_info(trace, evsel, id); |
1450 | struct thread_trace *ttrace; | 1628 | struct thread_trace *ttrace; |
1451 | 1629 | ||
@@ -1463,7 +1641,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | |||
1463 | if (trace->summary) | 1641 | if (trace->summary) |
1464 | thread__update_stats(ttrace, id, sample); | 1642 | thread__update_stats(ttrace, id, sample); |
1465 | 1643 | ||
1466 | ret = perf_evsel__intval(evsel, sample, "ret"); | 1644 | ret = perf_evsel__sc_tp_uint(evsel, ret, sample); |
1467 | 1645 | ||
1468 | if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { | 1646 | if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { |
1469 | trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); | 1647 | trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); |
@@ -1675,8 +1853,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
1675 | goto out; | 1853 | goto out; |
1676 | } | 1854 | } |
1677 | 1855 | ||
1678 | if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || | 1856 | if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) |
1679 | perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) | ||
1680 | goto out_error_tp; | 1857 | goto out_error_tp; |
1681 | 1858 | ||
1682 | perf_evlist__add_vfs_getname(evlist); | 1859 | perf_evlist__add_vfs_getname(evlist); |