diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2018-10-18 15:38:27 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2018-10-19 10:58:33 -0400 |
commit | 5067a8cdd4ce3588fca2e0ee554f0f081650de8f (patch) | |
tree | 8e3b7c085e1d120dfd7d5ac14ca21fc1229637d8 /tools | |
parent | 4ba8b3ebf4f8f583c2c01da20e4d110a5881ffdd (diff) |
perf trace: Introduce --max-events
Allow stopping tracing after a number of events take place, considering
strace-like syscalls formatting as one event per enter/exit pair or when
in a multi-process tracing session a syscall is interrupted and printed
ending with '...'.
Examples included in the documentation:
Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
$ perf trace -e open* --max-events 4
[root@jouet perf]# trace -e open* --max-events 4
2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
$
Trace the first minor page fault when running a workload:
# perf trace -F min --max-stack=7 --max-events 1 sleep 1
0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
__clear_user ([kernel.kallsyms])
load_elf_binary ([kernel.kallsyms])
search_binary_handler ([kernel.kallsyms])
__do_execve_file.isra.33 ([kernel.kallsyms])
__x64_sys_execve ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
#
Trace the next min page page fault to take place on the first CPU:
# perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
js::gc::FreeSpan::initAsEmpty (inlined)
js::gc::Arena::setAsNotAllocated (inlined)
js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
JSThinInlineString::new_<(js::AllowGC)1> (inlined)
AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
[0x18b26e6bc2bd] (/tmp/perf-17136.map)
Tracing the next four ext4 operations on a specific CPU:
# perf trace -e ext4:*/call-graph=fp/ --max-events 4 --cpu 3
0.000 mutt/3849 ext4:ext4_es_lookup_extent_enter:dev 253,2 ino 57277 lblk 0
ext4_es_lookup_extent ([kernel.kallsyms])
read (/usr/lib64/libc-2.26.so)
0.097 mutt/3849 ext4:ext4_es_lookup_extent_exit:dev 253,2 ino 57277 found 0 [0/0) 0
ext4_es_lookup_extent ([kernel.kallsyms])
read (/usr/lib64/libc-2.26.so)
0.141 mutt/3849 ext4:ext4_ext_map_blocks_enter:dev 253,2 ino 57277 lblk 0 len 1 flags
ext4_ext_map_blocks ([kernel.kallsyms])
read (/usr/lib64/libc-2.26.so)
0.184 mutt/3849 ext4:ext4_ext_load_extent:dev 253,2 ino 57277 lblk 1516511 pblk 18446744071750013657
__read_extent_tree_block ([kernel.kallsyms])
__read_extent_tree_block ([kernel.kallsyms])
ext4_find_extent ([kernel.kallsyms])
ext4_ext_map_blocks ([kernel.kallsyms])
ext4_map_blocks ([kernel.kallsyms])
ext4_mpage_readpages ([kernel.kallsyms])
read_pages ([kernel.kallsyms])
__do_page_cache_readahead ([kernel.kallsyms])
ondemand_readahead ([kernel.kallsyms])
generic_file_read_iter ([kernel.kallsyms])
__vfs_read ([kernel.kallsyms])
vfs_read ([kernel.kallsyms])
ksys_read ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
read (/usr/lib64/libc-2.26.so)
#
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Rudá Moura <ruda.moura@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-sweh107bs7ol5bzls0m4tqdz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-trace.txt | 47 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 21 |
2 files changed, 68 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 115db9e06ecd..0d1a1cd4d328 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt | |||
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. | |||
171 | --kernel-syscall-graph:: | 171 | --kernel-syscall-graph:: |
172 | Show the kernel callchains on the syscall exit path. | 172 | Show the kernel callchains on the syscall exit path. |
173 | 173 | ||
174 | --max-events=N:: | ||
175 | Stop after processing N events. Note that strace-like events are considered | ||
176 | only at exit time or when a syscall is interrupted, i.e. in those cases this | ||
177 | option is equivalent to the number of lines printed. | ||
178 | |||
174 | --max-stack:: | 179 | --max-stack:: |
175 | Set the stack depth limit when parsing the callchain, anything | 180 | Set the stack depth limit when parsing the callchain, anything |
176 | beyond the specified depth will be ignored. Note that at this point | 181 | beyond the specified depth will be ignored. Note that at this point |
@@ -238,6 +243,48 @@ Trace syscalls, major and minor pagefaults: | |||
238 | As you can see, there was major pagefault in python process, from | 243 | As you can see, there was major pagefault in python process, from |
239 | CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. | 244 | CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. |
240 | 245 | ||
246 | Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here): | ||
247 | |||
248 | $ perf trace -e open* --max-events 4 | ||
249 | [root@jouet perf]# trace -e open* --max-events 4 | ||
250 | 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31 | ||
251 | 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 | ||
252 | 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 | ||
253 | 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 | ||
254 | $ | ||
255 | |||
256 | Trace the first minor page fault when running a workload: | ||
257 | |||
258 | # perf trace -F min --max-stack=7 --max-events 1 sleep 1 | ||
259 | 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k) | ||
260 | __clear_user ([kernel.kallsyms]) | ||
261 | load_elf_binary ([kernel.kallsyms]) | ||
262 | search_binary_handler ([kernel.kallsyms]) | ||
263 | __do_execve_file.isra.33 ([kernel.kallsyms]) | ||
264 | __x64_sys_execve ([kernel.kallsyms]) | ||
265 | do_syscall_64 ([kernel.kallsyms]) | ||
266 | entry_SYSCALL_64 ([kernel.kallsyms]) | ||
267 | # | ||
268 | |||
269 | Trace the next min page page fault to take place on the first CPU: | ||
270 | |||
271 | # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0 | ||
272 | 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.) | ||
273 | js::gc::FreeSpan::initAsEmpty (inlined) | ||
274 | js::gc::Arena::setAsNotAllocated (inlined) | ||
275 | js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so) | ||
276 | js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so) | ||
277 | js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so) | ||
278 | js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so) | ||
279 | js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined) | ||
280 | js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so) | ||
281 | js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined) | ||
282 | JSThinInlineString::new_<(js::AllowGC)1> (inlined) | ||
283 | AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined) | ||
284 | js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so) | ||
285 | [0x18b26e6bc2bd] (/tmp/perf-17136.map) | ||
286 | # | ||
287 | |||
241 | SEE ALSO | 288 | SEE ALSO |
242 | -------- | 289 | -------- |
243 | linkperf:perf-record[1], linkperf:perf-script[1] | 290 | linkperf:perf-record[1], linkperf:perf-script[1] |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 90289f31dd87..74638034861c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -89,6 +89,8 @@ struct trace { | |||
89 | u64 base_time; | 89 | u64 base_time; |
90 | FILE *output; | 90 | FILE *output; |
91 | unsigned long nr_events; | 91 | unsigned long nr_events; |
92 | unsigned long nr_events_printed; | ||
93 | unsigned long max_events; | ||
92 | struct strlist *ev_qualifier; | 94 | struct strlist *ev_qualifier; |
93 | struct { | 95 | struct { |
94 | size_t nr; | 96 | size_t nr; |
@@ -1664,6 +1666,8 @@ static int trace__printf_interrupted_entry(struct trace *trace) | |||
1664 | printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); | 1666 | printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); |
1665 | ttrace->entry_pending = false; | 1667 | ttrace->entry_pending = false; |
1666 | 1668 | ||
1669 | ++trace->nr_events_printed; | ||
1670 | |||
1667 | return printed; | 1671 | return printed; |
1668 | } | 1672 | } |
1669 | 1673 | ||
@@ -1940,6 +1944,13 @@ errno_print: { | |||
1940 | 1944 | ||
1941 | fputc('\n', trace->output); | 1945 | fputc('\n', trace->output); |
1942 | 1946 | ||
1947 | /* | ||
1948 | * We only consider an 'event' for the sake of --max-events a non-filtered | ||
1949 | * sys_enter + sys_exit and other tracepoint events. | ||
1950 | */ | ||
1951 | if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX) | ||
1952 | interrupted = true; | ||
1953 | |||
1943 | if (callchain_ret > 0) | 1954 | if (callchain_ret > 0) |
1944 | trace__fprintf_callchain(trace, sample); | 1955 | trace__fprintf_callchain(trace, sample); |
1945 | else if (callchain_ret < 0) | 1956 | else if (callchain_ret < 0) |
@@ -2072,6 +2083,7 @@ static void bpf_output__fprintf(struct trace *trace, | |||
2072 | { | 2083 | { |
2073 | binary__fprintf(sample->raw_data, sample->raw_size, 8, | 2084 | binary__fprintf(sample->raw_data, sample->raw_size, 8, |
2074 | bpf_output__printer, NULL, trace->output); | 2085 | bpf_output__printer, NULL, trace->output); |
2086 | ++trace->nr_events_printed; | ||
2075 | } | 2087 | } |
2076 | 2088 | ||
2077 | static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, | 2089 | static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, |
@@ -2127,6 +2139,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, | |||
2127 | event_format__fprintf(evsel->tp_format, sample->cpu, | 2139 | event_format__fprintf(evsel->tp_format, sample->cpu, |
2128 | sample->raw_data, sample->raw_size, | 2140 | sample->raw_data, sample->raw_size, |
2129 | trace->output); | 2141 | trace->output); |
2142 | ++trace->nr_events_printed; | ||
2130 | } | 2143 | } |
2131 | } | 2144 | } |
2132 | 2145 | ||
@@ -2225,6 +2238,8 @@ static int trace__pgfault(struct trace *trace, | |||
2225 | trace__fprintf_callchain(trace, sample); | 2238 | trace__fprintf_callchain(trace, sample); |
2226 | else if (callchain_ret < 0) | 2239 | else if (callchain_ret < 0) |
2227 | pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); | 2240 | pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); |
2241 | |||
2242 | ++trace->nr_events_printed; | ||
2228 | out: | 2243 | out: |
2229 | err = 0; | 2244 | err = 0; |
2230 | out_put: | 2245 | out_put: |
@@ -2402,6 +2417,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st | |||
2402 | tracepoint_handler handler = evsel->handler; | 2417 | tracepoint_handler handler = evsel->handler; |
2403 | handler(trace, evsel, event, sample); | 2418 | handler(trace, evsel, event, sample); |
2404 | } | 2419 | } |
2420 | |||
2421 | if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX) | ||
2422 | interrupted = true; | ||
2405 | } | 2423 | } |
2406 | 2424 | ||
2407 | static int trace__add_syscall_newtp(struct trace *trace) | 2425 | static int trace__add_syscall_newtp(struct trace *trace) |
@@ -3249,6 +3267,7 @@ int cmd_trace(int argc, const char **argv) | |||
3249 | .trace_syscalls = false, | 3267 | .trace_syscalls = false, |
3250 | .kernel_syscallchains = false, | 3268 | .kernel_syscallchains = false, |
3251 | .max_stack = UINT_MAX, | 3269 | .max_stack = UINT_MAX, |
3270 | .max_events = ULONG_MAX, | ||
3252 | }; | 3271 | }; |
3253 | const char *output_name = NULL; | 3272 | const char *output_name = NULL; |
3254 | const struct option trace_options[] = { | 3273 | const struct option trace_options[] = { |
@@ -3301,6 +3320,8 @@ int cmd_trace(int argc, const char **argv) | |||
3301 | &record_parse_callchain_opt), | 3320 | &record_parse_callchain_opt), |
3302 | OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, | 3321 | OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, |
3303 | "Show the kernel callchains on the syscall exit path"), | 3322 | "Show the kernel callchains on the syscall exit path"), |
3323 | OPT_ULONG(0, "max-events", &trace.max_events, | ||
3324 | "Set the maximum number of events to print, exit after that is reached. "), | ||
3304 | OPT_UINTEGER(0, "min-stack", &trace.min_stack, | 3325 | OPT_UINTEGER(0, "min-stack", &trace.min_stack, |
3305 | "Set the minimum stack depth when parsing the callchain, " | 3326 | "Set the minimum stack depth when parsing the callchain, " |
3306 | "anything below the specified depth will be ignored."), | 3327 | "anything below the specified depth will be ignored."), |