aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorStanislav Fomichev <stfomichev@yandex-team.ru>2014-06-26 12:14:25 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2014-06-26 15:07:43 -0400
commit598d02c5a07b60e5c824184cdaf697b70f3c452a (patch)
tree65c6f20c05c126e448bee268a8052b5eda54348e /tools
parent0c82adcf141935b6312593a53f87342dbb12b704 (diff)
perf trace: Add support for pagefault tracing
This patch adds optional pagefault tracing support to 'perf trace'. Using -F/--pf option user can specify whether he wants minor, major or all pagefault events to be traced. This patch adds only live mode, record and replace will come in a separate patch. Example output: 1756272.905 ( 0.000 ms): curl/5937 majfault [0x7fa7261978b6] => /usr/lib/x86_64-linux-gnu/libkrb5.so.26.0.0@0x85288 (d.) 1862866.036 ( 0.000 ms): wget/8460 majfault [__clear_user+0x3f] => 0x659cb4 (?k) Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1403799268-1367-3-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-trace.txt39
-rw-r--r--tools/perf/builtin-trace.c125
2 files changed, 163 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index fae38d9a44a4..72397d9aa2ec 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -107,6 +107,45 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
107 Show tool stats such as number of times fd->pathname was discovered thru 107 Show tool stats such as number of times fd->pathname was discovered thru
108 hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. 108 hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
109 109
110-F=[all|min|maj]::
111--pf=[all|min|maj]::
112 Trace pagefaults. Optionally, you can specify whether you want minor,
113 major or all pagefaults. Default value is maj.
114
115PAGEFAULTS
116----------
117
118When tracing pagefaults, the format of the trace is as follows:
119
120<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
121
122- min/maj indicates whether fault event is minor or major;
123- ip.symbol shows symbol for instruction pointer (the code that generated the
124 fault); if no debug symbols available, perf trace will print raw IP;
125- addr.dso shows DSO for the faulted address;
126- map type is either 'd' for non-executable maps or 'x' for executable maps;
127- addr level is either 'k' for kernel dso or '.' for user dso.
128
129For symbols resolution you may need to install debugging symbols.
130
131Please be aware that duration is currently always 0 and doesn't reflect actual
132time it took for fault to be handled!
133
134When --verbose specified, perf trace tries to print all available information
135for both IP and fault address in the form of dso@symbol+offset.
136
137EXAMPLES
138--------
139
140Trace syscalls, major and minor pagefaults:
141
142 $ perf trace -F all
143
144 1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
145
146 As you can see, there was major pagefault in python process, from
147 CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
148
110SEE ALSO 149SEE ALSO
111-------- 150--------
112linkperf:perf-record[1], linkperf:perf-script[1] 151linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4a9e26b731fe..1985c3b8cc06 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1178,6 +1178,9 @@ fail:
1178 return NULL; 1178 return NULL;
1179} 1179}
1180 1180
1181#define TRACE_PFMAJ (1 << 0)
1182#define TRACE_PFMIN (1 << 1)
1183
1181struct trace { 1184struct trace {
1182 struct perf_tool tool; 1185 struct perf_tool tool;
1183 struct { 1186 struct {
@@ -1212,6 +1215,7 @@ struct trace {
1212 bool summary_only; 1215 bool summary_only;
1213 bool show_comm; 1216 bool show_comm;
1214 bool show_tool_stats; 1217 bool show_tool_stats;
1218 int trace_pgfaults;
1215}; 1219};
1216 1220
1217static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1221static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
@@ -1773,6 +1777,68 @@ out_dump:
1773 return 0; 1777 return 0;
1774} 1778}
1775 1779
1780static void print_location(FILE *f, struct perf_sample *sample,
1781 struct addr_location *al,
1782 bool print_dso, bool print_sym)
1783{
1784
1785 if ((verbose || print_dso) && al->map)
1786 fprintf(f, "%s@", al->map->dso->long_name);
1787
1788 if ((verbose || print_sym) && al->sym)
1789 fprintf(f, "%s+0x%lx", al->sym->name,
1790 al->addr - al->sym->start);
1791 else if (al->map)
1792 fprintf(f, "0x%lx", al->addr);
1793 else
1794 fprintf(f, "0x%lx", sample->addr);
1795}
1796
1797static int trace__pgfault(struct trace *trace,
1798 struct perf_evsel *evsel,
1799 union perf_event *event,
1800 struct perf_sample *sample)
1801{
1802 struct thread *thread;
1803 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1804 struct addr_location al;
1805 char map_type = 'd';
1806
1807 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1808
1809 thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
1810 sample->ip, &al);
1811
1812 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1813
1814 fprintf(trace->output, "%sfault [",
1815 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1816 "maj" : "min");
1817
1818 print_location(trace->output, sample, &al, false, true);
1819
1820 fprintf(trace->output, "] => ");
1821
1822 thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
1823 sample->addr, &al);
1824
1825 if (!al.map) {
1826 thread__find_addr_location(thread, trace->host, cpumode,
1827 MAP__FUNCTION, sample->addr, &al);
1828
1829 if (al.map)
1830 map_type = 'x';
1831 else
1832 map_type = '?';
1833 }
1834
1835 print_location(trace->output, sample, &al, true, false);
1836
1837 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1838
1839 return 0;
1840}
1841
1776static bool skip_sample(struct trace *trace, struct perf_sample *sample) 1842static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1777{ 1843{
1778 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 1844 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
@@ -1887,6 +1953,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1887 perf_evlist__add(evlist, evsel); 1953 perf_evlist__add(evlist, evsel);
1888} 1954}
1889 1955
1956static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
1957 u64 config)
1958{
1959 struct perf_evsel *evsel;
1960 struct perf_event_attr attr = {
1961 .type = PERF_TYPE_SOFTWARE,
1962 .mmap_data = 1,
1963 .sample_period = 1,
1964 };
1965
1966 attr.config = config;
1967
1968 event_attr_init(&attr);
1969
1970 evsel = perf_evsel__new(&attr);
1971 if (!evsel)
1972 return -ENOMEM;
1973
1974 evsel->handler = trace__pgfault;
1975 perf_evlist__add(evlist, evsel);
1976
1977 return 0;
1978}
1979
1890static int trace__run(struct trace *trace, int argc, const char **argv) 1980static int trace__run(struct trace *trace, int argc, const char **argv)
1891{ 1981{
1892 struct perf_evlist *evlist = perf_evlist__new(); 1982 struct perf_evlist *evlist = perf_evlist__new();
@@ -1907,6 +1997,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
1907 1997
1908 perf_evlist__add_vfs_getname(evlist); 1998 perf_evlist__add_vfs_getname(evlist);
1909 1999
2000 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2001 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
2002 goto out_error_tp;
2003
2004 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2005 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2006 goto out_error_tp;
2007
1910 if (trace->sched && 2008 if (trace->sched &&
1911 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2009 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1912 trace__sched_stat_runtime)) 2010 trace__sched_stat_runtime))
@@ -1987,7 +2085,8 @@ again:
1987 goto next_event; 2085 goto next_event;
1988 } 2086 }
1989 2087
1990 if (sample.raw_data == NULL) { 2088 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2089 sample.raw_data == NULL) {
1991 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2090 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1992 perf_evsel__name(evsel), sample.tid, 2091 perf_evsel__name(evsel), sample.tid,
1993 sample.cpu, sample.raw_size); 2092 sample.cpu, sample.raw_size);
@@ -2269,6 +2368,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
2269 return trace->output == NULL ? -errno : 0; 2368 return trace->output == NULL ? -errno : 0;
2270} 2369}
2271 2370
2371static int parse_pagefaults(const struct option *opt, const char *str,
2372 int unset __maybe_unused)
2373{
2374 int *trace_pgfaults = opt->value;
2375
2376 if (strcmp(str, "all") == 0)
2377 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2378 else if (strcmp(str, "maj") == 0)
2379 *trace_pgfaults |= TRACE_PFMAJ;
2380 else if (strcmp(str, "min") == 0)
2381 *trace_pgfaults |= TRACE_PFMIN;
2382 else
2383 return -1;
2384
2385 return 0;
2386}
2387
2272int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2388int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2273{ 2389{
2274 const char * const trace_usage[] = { 2390 const char * const trace_usage[] = {
@@ -2335,6 +2451,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2335 "Show only syscall summary with statistics"), 2451 "Show only syscall summary with statistics"),
2336 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2452 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2337 "Show all syscalls and summary with statistics"), 2453 "Show all syscalls and summary with statistics"),
2454 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2455 "Trace pagefaults", parse_pagefaults, "maj"),
2338 OPT_END() 2456 OPT_END()
2339 }; 2457 };
2340 int err; 2458 int err;
@@ -2349,6 +2467,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2349 if (trace.summary_only) 2467 if (trace.summary_only)
2350 trace.summary = trace.summary_only; 2468 trace.summary = trace.summary_only;
2351 2469
2470 if (trace.trace_pgfaults) {
2471 trace.opts.sample_address = true;
2472 trace.opts.sample_time = true;
2473 }
2474
2352 if (output_name != NULL) { 2475 if (output_name != NULL) {
2353 err = trace__open_output(&trace, output_name); 2476 err = trace__open_output(&trace, output_name);
2354 if (err < 0) { 2477 if (err < 0) {