diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/perf/Documentation/perf-trace.txt | 39 | ||||
| -rw-r--r-- | tools/perf/builtin-trace.c | 125 |
2 files changed, 163 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index fae38d9a44a4..72397d9aa2ec 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt | |||
| @@ -107,6 +107,45 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. | |||
| 107 | Show tool stats such as number of times fd->pathname was discovered thru | 107 | Show tool stats such as number of times fd->pathname was discovered thru |
| 108 | hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. | 108 | hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. |
| 109 | 109 | ||
| 110 | -F=[all|min|maj]:: | ||
| 111 | --pf=[all|min|maj]:: | ||
| 112 | Trace pagefaults. Optionally, you can specify whether you want minor, | ||
| 113 | major or all pagefaults. Default value is maj. | ||
| 114 | |||
| 115 | PAGEFAULTS | ||
| 116 | ---------- | ||
| 117 | |||
| 118 | When tracing pagefaults, the format of the trace is as follows: | ||
| 119 | |||
| 120 | <min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>). | ||
| 121 | |||
| 122 | - min/maj indicates whether fault event is minor or major; | ||
| 123 | - ip.symbol shows symbol for instruction pointer (the code that generated the | ||
| 124 | fault); if no debug symbols available, perf trace will print raw IP; | ||
| 125 | - addr.dso shows DSO for the faulted address; | ||
| 126 | - map type is either 'd' for non-executable maps or 'x' for executable maps; | ||
| 127 | - addr level is either 'k' for kernel dso or '.' for user dso. | ||
| 128 | |||
| 129 | For symbols resolution you may need to install debugging symbols. | ||
| 130 | |||
| 131 | Please be aware that duration is currently always 0 and doesn't reflect actual | ||
| 132 | time it took for fault to be handled! | ||
| 133 | |||
| 134 | When --verbose specified, perf trace tries to print all available information | ||
| 135 | for both IP and fault address in the form of dso@symbol+offset. | ||
| 136 | |||
| 137 | EXAMPLES | ||
| 138 | -------- | ||
| 139 | |||
| 140 | Trace syscalls, major and minor pagefaults: | ||
| 141 | |||
| 142 | $ perf trace -F all | ||
| 143 | |||
| 144 | 1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.) | ||
| 145 | |||
| 146 | As you can see, there was major pagefault in python process, from | ||
| 147 | CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. | ||
| 148 | |||
| 110 | SEE ALSO | 149 | SEE ALSO |
| 111 | -------- | 150 | -------- |
| 112 | linkperf:perf-record[1], linkperf:perf-script[1] | 151 | linkperf:perf-record[1], linkperf:perf-script[1] |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4a9e26b731fe..1985c3b8cc06 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
| @@ -1178,6 +1178,9 @@ fail: | |||
| 1178 | return NULL; | 1178 | return NULL; |
| 1179 | } | 1179 | } |
| 1180 | 1180 | ||
| 1181 | #define TRACE_PFMAJ (1 << 0) | ||
| 1182 | #define TRACE_PFMIN (1 << 1) | ||
| 1183 | |||
| 1181 | struct trace { | 1184 | struct trace { |
| 1182 | struct perf_tool tool; | 1185 | struct perf_tool tool; |
| 1183 | struct { | 1186 | struct { |
| @@ -1212,6 +1215,7 @@ struct trace { | |||
| 1212 | bool summary_only; | 1215 | bool summary_only; |
| 1213 | bool show_comm; | 1216 | bool show_comm; |
| 1214 | bool show_tool_stats; | 1217 | bool show_tool_stats; |
| 1218 | int trace_pgfaults; | ||
| 1215 | }; | 1219 | }; |
| 1216 | 1220 | ||
| 1217 | static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) | 1221 | static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) |
| @@ -1773,6 +1777,68 @@ out_dump: | |||
| 1773 | return 0; | 1777 | return 0; |
| 1774 | } | 1778 | } |
| 1775 | 1779 | ||
| 1780 | static void print_location(FILE *f, struct perf_sample *sample, | ||
| 1781 | struct addr_location *al, | ||
| 1782 | bool print_dso, bool print_sym) | ||
| 1783 | { | ||
| 1784 | |||
| 1785 | if ((verbose || print_dso) && al->map) | ||
| 1786 | fprintf(f, "%s@", al->map->dso->long_name); | ||
| 1787 | |||
| 1788 | if ((verbose || print_sym) && al->sym) | ||
| 1789 | fprintf(f, "%s+0x%lx", al->sym->name, | ||
| 1790 | al->addr - al->sym->start); | ||
| 1791 | else if (al->map) | ||
| 1792 | fprintf(f, "0x%lx", al->addr); | ||
| 1793 | else | ||
| 1794 | fprintf(f, "0x%lx", sample->addr); | ||
| 1795 | } | ||
| 1796 | |||
| 1797 | static int trace__pgfault(struct trace *trace, | ||
| 1798 | struct perf_evsel *evsel, | ||
| 1799 | union perf_event *event, | ||
| 1800 | struct perf_sample *sample) | ||
| 1801 | { | ||
| 1802 | struct thread *thread; | ||
| 1803 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
| 1804 | struct addr_location al; | ||
| 1805 | char map_type = 'd'; | ||
| 1806 | |||
| 1807 | thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); | ||
| 1808 | |||
| 1809 | thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION, | ||
| 1810 | sample->ip, &al); | ||
| 1811 | |||
| 1812 | trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); | ||
| 1813 | |||
| 1814 | fprintf(trace->output, "%sfault [", | ||
| 1815 | evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? | ||
| 1816 | "maj" : "min"); | ||
| 1817 | |||
| 1818 | print_location(trace->output, sample, &al, false, true); | ||
| 1819 | |||
| 1820 | fprintf(trace->output, "] => "); | ||
| 1821 | |||
| 1822 | thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE, | ||
| 1823 | sample->addr, &al); | ||
| 1824 | |||
| 1825 | if (!al.map) { | ||
| 1826 | thread__find_addr_location(thread, trace->host, cpumode, | ||
| 1827 | MAP__FUNCTION, sample->addr, &al); | ||
| 1828 | |||
| 1829 | if (al.map) | ||
| 1830 | map_type = 'x'; | ||
| 1831 | else | ||
| 1832 | map_type = '?'; | ||
| 1833 | } | ||
| 1834 | |||
| 1835 | print_location(trace->output, sample, &al, true, false); | ||
| 1836 | |||
| 1837 | fprintf(trace->output, " (%c%c)\n", map_type, al.level); | ||
| 1838 | |||
| 1839 | return 0; | ||
| 1840 | } | ||
| 1841 | |||
| 1776 | static bool skip_sample(struct trace *trace, struct perf_sample *sample) | 1842 | static bool skip_sample(struct trace *trace, struct perf_sample *sample) |
| 1777 | { | 1843 | { |
| 1778 | if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || | 1844 | if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || |
| @@ -1887,6 +1953,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) | |||
| 1887 | perf_evlist__add(evlist, evsel); | 1953 | perf_evlist__add(evlist, evsel); |
| 1888 | } | 1954 | } |
| 1889 | 1955 | ||
| 1956 | static int perf_evlist__add_pgfault(struct perf_evlist *evlist, | ||
| 1957 | u64 config) | ||
| 1958 | { | ||
| 1959 | struct perf_evsel *evsel; | ||
| 1960 | struct perf_event_attr attr = { | ||
| 1961 | .type = PERF_TYPE_SOFTWARE, | ||
| 1962 | .mmap_data = 1, | ||
| 1963 | .sample_period = 1, | ||
| 1964 | }; | ||
| 1965 | |||
| 1966 | attr.config = config; | ||
| 1967 | |||
| 1968 | event_attr_init(&attr); | ||
| 1969 | |||
| 1970 | evsel = perf_evsel__new(&attr); | ||
| 1971 | if (!evsel) | ||
| 1972 | return -ENOMEM; | ||
| 1973 | |||
| 1974 | evsel->handler = trace__pgfault; | ||
| 1975 | perf_evlist__add(evlist, evsel); | ||
| 1976 | |||
| 1977 | return 0; | ||
| 1978 | } | ||
| 1979 | |||
| 1890 | static int trace__run(struct trace *trace, int argc, const char **argv) | 1980 | static int trace__run(struct trace *trace, int argc, const char **argv) |
| 1891 | { | 1981 | { |
| 1892 | struct perf_evlist *evlist = perf_evlist__new(); | 1982 | struct perf_evlist *evlist = perf_evlist__new(); |
| @@ -1907,6 +1997,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
| 1907 | 1997 | ||
| 1908 | perf_evlist__add_vfs_getname(evlist); | 1998 | perf_evlist__add_vfs_getname(evlist); |
| 1909 | 1999 | ||
| 2000 | if ((trace->trace_pgfaults & TRACE_PFMAJ) && | ||
| 2001 | perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) | ||
| 2002 | goto out_error_tp; | ||
| 2003 | |||
| 2004 | if ((trace->trace_pgfaults & TRACE_PFMIN) && | ||
| 2005 | perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) | ||
| 2006 | goto out_error_tp; | ||
| 2007 | |||
| 1910 | if (trace->sched && | 2008 | if (trace->sched && |
| 1911 | perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", | 2009 | perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", |
| 1912 | trace__sched_stat_runtime)) | 2010 | trace__sched_stat_runtime)) |
| @@ -1987,7 +2085,8 @@ again: | |||
| 1987 | goto next_event; | 2085 | goto next_event; |
| 1988 | } | 2086 | } |
| 1989 | 2087 | ||
| 1990 | if (sample.raw_data == NULL) { | 2088 | if (evsel->attr.type == PERF_TYPE_TRACEPOINT && |
| 2089 | sample.raw_data == NULL) { | ||
| 1991 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", | 2090 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", |
| 1992 | perf_evsel__name(evsel), sample.tid, | 2091 | perf_evsel__name(evsel), sample.tid, |
| 1993 | sample.cpu, sample.raw_size); | 2092 | sample.cpu, sample.raw_size); |
| @@ -2269,6 +2368,23 @@ static int trace__open_output(struct trace *trace, const char *filename) | |||
| 2269 | return trace->output == NULL ? -errno : 0; | 2368 | return trace->output == NULL ? -errno : 0; |
| 2270 | } | 2369 | } |
| 2271 | 2370 | ||
| 2371 | static int parse_pagefaults(const struct option *opt, const char *str, | ||
| 2372 | int unset __maybe_unused) | ||
| 2373 | { | ||
| 2374 | int *trace_pgfaults = opt->value; | ||
| 2375 | |||
| 2376 | if (strcmp(str, "all") == 0) | ||
| 2377 | *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; | ||
| 2378 | else if (strcmp(str, "maj") == 0) | ||
| 2379 | *trace_pgfaults |= TRACE_PFMAJ; | ||
| 2380 | else if (strcmp(str, "min") == 0) | ||
| 2381 | *trace_pgfaults |= TRACE_PFMIN; | ||
| 2382 | else | ||
| 2383 | return -1; | ||
| 2384 | |||
| 2385 | return 0; | ||
| 2386 | } | ||
| 2387 | |||
| 2272 | int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | 2388 | int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) |
| 2273 | { | 2389 | { |
| 2274 | const char * const trace_usage[] = { | 2390 | const char * const trace_usage[] = { |
| @@ -2335,6 +2451,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 2335 | "Show only syscall summary with statistics"), | 2451 | "Show only syscall summary with statistics"), |
| 2336 | OPT_BOOLEAN('S', "with-summary", &trace.summary, | 2452 | OPT_BOOLEAN('S', "with-summary", &trace.summary, |
| 2337 | "Show all syscalls and summary with statistics"), | 2453 | "Show all syscalls and summary with statistics"), |
| 2454 | OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", | ||
| 2455 | "Trace pagefaults", parse_pagefaults, "maj"), | ||
| 2338 | OPT_END() | 2456 | OPT_END() |
| 2339 | }; | 2457 | }; |
| 2340 | int err; | 2458 | int err; |
| @@ -2349,6 +2467,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 2349 | if (trace.summary_only) | 2467 | if (trace.summary_only) |
| 2350 | trace.summary = trace.summary_only; | 2468 | trace.summary = trace.summary_only; |
| 2351 | 2469 | ||
| 2470 | if (trace.trace_pgfaults) { | ||
| 2471 | trace.opts.sample_address = true; | ||
| 2472 | trace.opts.sample_time = true; | ||
| 2473 | } | ||
| 2474 | |||
| 2352 | if (output_name != NULL) { | 2475 | if (output_name != NULL) { |
| 2353 | err = trace__open_output(&trace, output_name); | 2476 | err = trace__open_output(&trace, output_name); |
| 2354 | if (err < 0) { | 2477 | if (err < 0) { |
