diff options
author | Stanislav Fomichev <stfomichev@yandex-team.ru> | 2014-06-26 12:14:25 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2014-06-26 15:07:43 -0400 |
commit | 598d02c5a07b60e5c824184cdaf697b70f3c452a (patch) | |
tree | 65c6f20c05c126e448bee268a8052b5eda54348e /tools/perf | |
parent | 0c82adcf141935b6312593a53f87342dbb12b704 (diff) |
perf trace: Add support for pagefault tracing
This patch adds optional pagefault tracing support to 'perf trace'.
Using -F/--pf option user can specify whether he wants minor, major or
all pagefault events to be traced. This patch adds only live mode,
record and replace will come in a separate patch.
Example output:
1756272.905 ( 0.000 ms): curl/5937 majfault [0x7fa7261978b6] => /usr/lib/x86_64-linux-gnu/libkrb5.so.26.0.0@0x85288 (d.)
1862866.036 ( 0.000 ms): wget/8460 majfault [__clear_user+0x3f] => 0x659cb4 (?k)
Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1403799268-1367-3-git-send-email-stfomichev@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-trace.txt | 39 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 125 |
2 files changed, 163 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index fae38d9a44a4..72397d9aa2ec 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt | |||
@@ -107,6 +107,45 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. | |||
107 | Show tool stats such as number of times fd->pathname was discovered thru | 107 | Show tool stats such as number of times fd->pathname was discovered thru |
108 | hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. | 108 | hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. |
109 | 109 | ||
110 | -F=[all|min|maj]:: | ||
111 | --pf=[all|min|maj]:: | ||
112 | Trace pagefaults. Optionally, you can specify whether you want minor, | ||
113 | major or all pagefaults. Default value is maj. | ||
114 | |||
115 | PAGEFAULTS | ||
116 | ---------- | ||
117 | |||
118 | When tracing pagefaults, the format of the trace is as follows: | ||
119 | |||
120 | <min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>). | ||
121 | |||
122 | - min/maj indicates whether fault event is minor or major; | ||
123 | - ip.symbol shows symbol for instruction pointer (the code that generated the | ||
124 | fault); if no debug symbols available, perf trace will print raw IP; | ||
125 | - addr.dso shows DSO for the faulted address; | ||
126 | - map type is either 'd' for non-executable maps or 'x' for executable maps; | ||
127 | - addr level is either 'k' for kernel dso or '.' for user dso. | ||
128 | |||
129 | For symbols resolution you may need to install debugging symbols. | ||
130 | |||
131 | Please be aware that duration is currently always 0 and doesn't reflect actual | ||
132 | time it took for fault to be handled! | ||
133 | |||
134 | When --verbose specified, perf trace tries to print all available information | ||
135 | for both IP and fault address in the form of dso@symbol+offset. | ||
136 | |||
137 | EXAMPLES | ||
138 | -------- | ||
139 | |||
140 | Trace syscalls, major and minor pagefaults: | ||
141 | |||
142 | $ perf trace -F all | ||
143 | |||
144 | 1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.) | ||
145 | |||
146 | As you can see, there was major pagefault in python process, from | ||
147 | CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. | ||
148 | |||
110 | SEE ALSO | 149 | SEE ALSO |
111 | -------- | 150 | -------- |
112 | linkperf:perf-record[1], linkperf:perf-script[1] | 151 | linkperf:perf-record[1], linkperf:perf-script[1] |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4a9e26b731fe..1985c3b8cc06 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -1178,6 +1178,9 @@ fail: | |||
1178 | return NULL; | 1178 | return NULL; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | #define TRACE_PFMAJ (1 << 0) | ||
1182 | #define TRACE_PFMIN (1 << 1) | ||
1183 | |||
1181 | struct trace { | 1184 | struct trace { |
1182 | struct perf_tool tool; | 1185 | struct perf_tool tool; |
1183 | struct { | 1186 | struct { |
@@ -1212,6 +1215,7 @@ struct trace { | |||
1212 | bool summary_only; | 1215 | bool summary_only; |
1213 | bool show_comm; | 1216 | bool show_comm; |
1214 | bool show_tool_stats; | 1217 | bool show_tool_stats; |
1218 | int trace_pgfaults; | ||
1215 | }; | 1219 | }; |
1216 | 1220 | ||
1217 | static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) | 1221 | static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) |
@@ -1773,6 +1777,68 @@ out_dump: | |||
1773 | return 0; | 1777 | return 0; |
1774 | } | 1778 | } |
1775 | 1779 | ||
1780 | static void print_location(FILE *f, struct perf_sample *sample, | ||
1781 | struct addr_location *al, | ||
1782 | bool print_dso, bool print_sym) | ||
1783 | { | ||
1784 | |||
1785 | if ((verbose || print_dso) && al->map) | ||
1786 | fprintf(f, "%s@", al->map->dso->long_name); | ||
1787 | |||
1788 | if ((verbose || print_sym) && al->sym) | ||
1789 | fprintf(f, "%s+0x%lx", al->sym->name, | ||
1790 | al->addr - al->sym->start); | ||
1791 | else if (al->map) | ||
1792 | fprintf(f, "0x%lx", al->addr); | ||
1793 | else | ||
1794 | fprintf(f, "0x%lx", sample->addr); | ||
1795 | } | ||
1796 | |||
1797 | static int trace__pgfault(struct trace *trace, | ||
1798 | struct perf_evsel *evsel, | ||
1799 | union perf_event *event, | ||
1800 | struct perf_sample *sample) | ||
1801 | { | ||
1802 | struct thread *thread; | ||
1803 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
1804 | struct addr_location al; | ||
1805 | char map_type = 'd'; | ||
1806 | |||
1807 | thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); | ||
1808 | |||
1809 | thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION, | ||
1810 | sample->ip, &al); | ||
1811 | |||
1812 | trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); | ||
1813 | |||
1814 | fprintf(trace->output, "%sfault [", | ||
1815 | evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? | ||
1816 | "maj" : "min"); | ||
1817 | |||
1818 | print_location(trace->output, sample, &al, false, true); | ||
1819 | |||
1820 | fprintf(trace->output, "] => "); | ||
1821 | |||
1822 | thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE, | ||
1823 | sample->addr, &al); | ||
1824 | |||
1825 | if (!al.map) { | ||
1826 | thread__find_addr_location(thread, trace->host, cpumode, | ||
1827 | MAP__FUNCTION, sample->addr, &al); | ||
1828 | |||
1829 | if (al.map) | ||
1830 | map_type = 'x'; | ||
1831 | else | ||
1832 | map_type = '?'; | ||
1833 | } | ||
1834 | |||
1835 | print_location(trace->output, sample, &al, true, false); | ||
1836 | |||
1837 | fprintf(trace->output, " (%c%c)\n", map_type, al.level); | ||
1838 | |||
1839 | return 0; | ||
1840 | } | ||
1841 | |||
1776 | static bool skip_sample(struct trace *trace, struct perf_sample *sample) | 1842 | static bool skip_sample(struct trace *trace, struct perf_sample *sample) |
1777 | { | 1843 | { |
1778 | if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || | 1844 | if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || |
@@ -1887,6 +1953,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) | |||
1887 | perf_evlist__add(evlist, evsel); | 1953 | perf_evlist__add(evlist, evsel); |
1888 | } | 1954 | } |
1889 | 1955 | ||
1956 | static int perf_evlist__add_pgfault(struct perf_evlist *evlist, | ||
1957 | u64 config) | ||
1958 | { | ||
1959 | struct perf_evsel *evsel; | ||
1960 | struct perf_event_attr attr = { | ||
1961 | .type = PERF_TYPE_SOFTWARE, | ||
1962 | .mmap_data = 1, | ||
1963 | .sample_period = 1, | ||
1964 | }; | ||
1965 | |||
1966 | attr.config = config; | ||
1967 | |||
1968 | event_attr_init(&attr); | ||
1969 | |||
1970 | evsel = perf_evsel__new(&attr); | ||
1971 | if (!evsel) | ||
1972 | return -ENOMEM; | ||
1973 | |||
1974 | evsel->handler = trace__pgfault; | ||
1975 | perf_evlist__add(evlist, evsel); | ||
1976 | |||
1977 | return 0; | ||
1978 | } | ||
1979 | |||
1890 | static int trace__run(struct trace *trace, int argc, const char **argv) | 1980 | static int trace__run(struct trace *trace, int argc, const char **argv) |
1891 | { | 1981 | { |
1892 | struct perf_evlist *evlist = perf_evlist__new(); | 1982 | struct perf_evlist *evlist = perf_evlist__new(); |
@@ -1907,6 +1997,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
1907 | 1997 | ||
1908 | perf_evlist__add_vfs_getname(evlist); | 1998 | perf_evlist__add_vfs_getname(evlist); |
1909 | 1999 | ||
2000 | if ((trace->trace_pgfaults & TRACE_PFMAJ) && | ||
2001 | perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) | ||
2002 | goto out_error_tp; | ||
2003 | |||
2004 | if ((trace->trace_pgfaults & TRACE_PFMIN) && | ||
2005 | perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) | ||
2006 | goto out_error_tp; | ||
2007 | |||
1910 | if (trace->sched && | 2008 | if (trace->sched && |
1911 | perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", | 2009 | perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", |
1912 | trace__sched_stat_runtime)) | 2010 | trace__sched_stat_runtime)) |
@@ -1987,7 +2085,8 @@ again: | |||
1987 | goto next_event; | 2085 | goto next_event; |
1988 | } | 2086 | } |
1989 | 2087 | ||
1990 | if (sample.raw_data == NULL) { | 2088 | if (evsel->attr.type == PERF_TYPE_TRACEPOINT && |
2089 | sample.raw_data == NULL) { | ||
1991 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", | 2090 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", |
1992 | perf_evsel__name(evsel), sample.tid, | 2091 | perf_evsel__name(evsel), sample.tid, |
1993 | sample.cpu, sample.raw_size); | 2092 | sample.cpu, sample.raw_size); |
@@ -2269,6 +2368,23 @@ static int trace__open_output(struct trace *trace, const char *filename) | |||
2269 | return trace->output == NULL ? -errno : 0; | 2368 | return trace->output == NULL ? -errno : 0; |
2270 | } | 2369 | } |
2271 | 2370 | ||
2371 | static int parse_pagefaults(const struct option *opt, const char *str, | ||
2372 | int unset __maybe_unused) | ||
2373 | { | ||
2374 | int *trace_pgfaults = opt->value; | ||
2375 | |||
2376 | if (strcmp(str, "all") == 0) | ||
2377 | *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; | ||
2378 | else if (strcmp(str, "maj") == 0) | ||
2379 | *trace_pgfaults |= TRACE_PFMAJ; | ||
2380 | else if (strcmp(str, "min") == 0) | ||
2381 | *trace_pgfaults |= TRACE_PFMIN; | ||
2382 | else | ||
2383 | return -1; | ||
2384 | |||
2385 | return 0; | ||
2386 | } | ||
2387 | |||
2272 | int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | 2388 | int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) |
2273 | { | 2389 | { |
2274 | const char * const trace_usage[] = { | 2390 | const char * const trace_usage[] = { |
@@ -2335,6 +2451,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2335 | "Show only syscall summary with statistics"), | 2451 | "Show only syscall summary with statistics"), |
2336 | OPT_BOOLEAN('S', "with-summary", &trace.summary, | 2452 | OPT_BOOLEAN('S', "with-summary", &trace.summary, |
2337 | "Show all syscalls and summary with statistics"), | 2453 | "Show all syscalls and summary with statistics"), |
2454 | OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", | ||
2455 | "Trace pagefaults", parse_pagefaults, "maj"), | ||
2338 | OPT_END() | 2456 | OPT_END() |
2339 | }; | 2457 | }; |
2340 | int err; | 2458 | int err; |
@@ -2349,6 +2467,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2349 | if (trace.summary_only) | 2467 | if (trace.summary_only) |
2350 | trace.summary = trace.summary_only; | 2468 | trace.summary = trace.summary_only; |
2351 | 2469 | ||
2470 | if (trace.trace_pgfaults) { | ||
2471 | trace.opts.sample_address = true; | ||
2472 | trace.opts.sample_time = true; | ||
2473 | } | ||
2474 | |||
2352 | if (output_name != NULL) { | 2475 | if (output_name != NULL) { |
2353 | err = trace__open_output(&trace, output_name); | 2476 | err = trace__open_output(&trace, output_name); |
2354 | if (err < 0) { | 2477 | if (err < 0) { |