aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-11-23 23:09:31 -0500
committerIngo Molnar <mingo@kernel.org>2016-11-23 23:09:31 -0500
commit47414424c53a70eceb0fc6e0a35a31a2b763d5b2 (patch)
tree07979aa784313ba03712df2b85a3b3f71f1733d6 /tools/perf
parent69e6cdd0cf16f645be39038e5ccc9379e3923d00 (diff)
parenta407b0678bc1c39d70af5fdbe6421c164b69a8c0 (diff)
Merge tag 'perf-core-for-mingo-20161123' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New tool: - 'perf sched timehist' provides an analysis of scheduling events. Example usage: perf sched record -- sleep 1 perf sched timehist By default it shows the individual schedule events, including the wait time (time between sched-out and next sched-in events for the task), the task scheduling delay (time between wakeup and actually running) and run time for the task: time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) -------- ------ ---------------- --------- --------- -------- 1.874569 [0011] gcc[31949] 0.014 0.000 1.148 1.874591 [0010] gcc[31951] 0.000 0.000 0.024 1.874603 [0010] migration/10[59] 3.350 0.004 0.011 1.874604 [0011] <idle> 1.148 0.000 0.035 1.874723 [0005] <idle> 0.016 0.000 1.383 1.874746 [0005] gcc[31949] 0.153 0.078 0.022 ... Times are in msec.usec. (David Ahern, Namhyung Kim) Improvements: - Make 'perf c2c report' support -f/--force, to allow skipping the ownership check for root users, for instance, just like the other tools (Jiri Olsa) - Allow sorting cachelines by total number of HITMs, in addition to local and remote numbers (Jiri Olsa) Fixes: - Make sure errors aren't suppressed by the TUI reset at the end of a 'perf c2c report' session (Jiri Olsa) Infrastructure changes: - Initial work on having the annotate code better support multiple architectures, including the ability to cross-annotate, i.e. to annotate perf.data files collected on an ARM system on a x86_64 workstation (Arnaldo Carvalho de Melo, Ravi Bangoria, Kim Phillips) - Use USECS_PER_SEC instead of hard coded number in libtraceevent (Steven Rostedt) - Add retrieval of preempt count and latency flags in libtraceevent (Steven Rostedt) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-c2c.txt8
-rw-r--r--tools/perf/Documentation/perf-sched.txt66
-rw-r--r--tools/perf/arch/arm/annotate/instructions.c90
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c78
-rw-r--r--tools/perf/builtin-c2c.c80
-rw-r--r--tools/perf/builtin-sched.c914
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rw-r--r--tools/perf/ui/gtk/annotate.c2
-rw-r--r--tools/perf/util/annotate.c251
-rw-r--r--tools/perf/util/annotate.h6
-rw-r--r--tools/perf/util/evsel.c6
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/evsel_fprintf.c12
-rw-r--r--tools/perf/util/mem-events.c12
-rw-r--r--tools/perf/util/mem-events.h1
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/perf/util/symbol_fprintf.c11
18 files changed, 1370 insertions, 175 deletions
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 21810d711f5f..3f06730c7f47 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -100,6 +100,14 @@ REPORT OPTIONS
100--show-all:: 100--show-all::
101 Show all captured HITM lines, with no regard to HITM % 0.0005 limit. 101 Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
102 102
103-f::
104--force::
105 Don't do ownership validation.
106
107-d::
108--display::
109 Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
110
103C2C RECORD 111C2C RECORD
104---------- 112----------
105The perf c2c record command setup options related to HITM cacheline analysis 113The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 1cc08cc47ac5..fb9e52d65fca 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf sched' {record|latency|map|replay|script} 11'perf sched' {record|latency|map|replay|script|timehist}
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15There are five variants of perf sched: 15There are several variants of 'perf sched':
16 16
17 'perf sched record <command>' to record the scheduling events 17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload. 18 of an arbitrary workload.
@@ -36,6 +36,30 @@ There are five variants of perf sched:
36 are running on a CPU. A '*' denotes the CPU that had the event, and 36 are running on a CPU. A '*' denotes the CPU that had the event, and
37 a dot signals an idle CPU. 37 a dot signals an idle CPU.
38 38
39 'perf sched timehist' provides an analysis of scheduling events.
40
41 Example usage:
42 perf sched record -- sleep 1
43 perf sched timehist
44
45 By default it shows the individual schedule events, including the wait
46 time (time between sched-out and next sched-in events for the task), the
47 task scheduling delay (time between wakeup and actually running) and run
48 time for the task:
49
50 time cpu task name wait time sch delay run time
51 [tid/pid] (msec) (msec) (msec)
52 -------------- ------ -------------------- --------- --------- ---------
53 79371.874569 [0011] gcc[31949] 0.014 0.000 1.148
54 79371.874591 [0010] gcc[31951] 0.000 0.000 0.024
55 79371.874603 [0010] migration/10[59] 3.350 0.004 0.011
56 79371.874604 [0011] <idle> 1.148 0.000 0.035
57 79371.874723 [0005] <idle> 0.016 0.000 1.383
58 79371.874746 [0005] gcc[31949] 0.153 0.078 0.022
59 ...
60
61 Times are in msec.usec.
62
39OPTIONS 63OPTIONS
40------- 64-------
41-i:: 65-i::
@@ -66,6 +90,44 @@ OPTIONS for 'perf sched map'
66--color-pids:: 90--color-pids::
67 Highlight the given pids. 91 Highlight the given pids.
68 92
93OPTIONS for 'perf sched timehist'
94---------------------------------
95-k::
96--vmlinux=<file>::
97 vmlinux pathname
98
99--kallsyms=<file>::
100 kallsyms pathname
101
102-g::
103--no-call-graph::
104 Do not display call chains if present.
105
106--max-stack::
107 Maximum number of functions to display in backtrace, default 5.
108
109-s::
110--summary::
111 Show only a summary of scheduling by thread with min, max, and average
112 run times (in sec) and relative stddev.
113
114-S::
115--with-summary::
116 Show all scheduling events followed by a summary by thread with min,
117 max, and average run times (in sec) and relative stddev.
118
119--symfs=<directory>::
120 Look for files with symbols relative to this directory.
121
122-V::
123--cpu-visual::
124 Show visual aid for sched switches by CPU: 'i' marks idle time,
125 's' are scheduler events.
126
127-w::
128--wakeups::
129 Show wakeup events.
130
69SEE ALSO 131SEE ALSO
70-------- 132--------
71linkperf:perf-record[1] 133linkperf:perf-record[1]
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c
new file mode 100644
index 000000000000..d67b8aa26274
--- /dev/null
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -0,0 +1,90 @@
1static struct ins arm__instructions[] = {
2 { .name = "add", .ops = &mov_ops, },
3 { .name = "addl", .ops = &mov_ops, },
4 { .name = "addq", .ops = &mov_ops, },
5 { .name = "addw", .ops = &mov_ops, },
6 { .name = "and", .ops = &mov_ops, },
7 { .name = "b", .ops = &jump_ops, }, // might also be a call
8 { .name = "bcc", .ops = &jump_ops, },
9 { .name = "bcs", .ops = &jump_ops, },
10 { .name = "beq", .ops = &jump_ops, },
11 { .name = "bge", .ops = &jump_ops, },
12 { .name = "bgt", .ops = &jump_ops, },
13 { .name = "bhi", .ops = &jump_ops, },
14 { .name = "bl", .ops = &call_ops, },
15 { .name = "bls", .ops = &jump_ops, },
16 { .name = "blt", .ops = &jump_ops, },
17 { .name = "blx", .ops = &call_ops, },
18 { .name = "bne", .ops = &jump_ops, },
19 { .name = "bts", .ops = &mov_ops, },
20 { .name = "call", .ops = &call_ops, },
21 { .name = "callq", .ops = &call_ops, },
22 { .name = "cmp", .ops = &mov_ops, },
23 { .name = "cmpb", .ops = &mov_ops, },
24 { .name = "cmpl", .ops = &mov_ops, },
25 { .name = "cmpq", .ops = &mov_ops, },
26 { .name = "cmpw", .ops = &mov_ops, },
27 { .name = "cmpxch", .ops = &mov_ops, },
28 { .name = "dec", .ops = &dec_ops, },
29 { .name = "decl", .ops = &dec_ops, },
30 { .name = "imul", .ops = &mov_ops, },
31 { .name = "inc", .ops = &dec_ops, },
32 { .name = "incl", .ops = &dec_ops, },
33 { .name = "ja", .ops = &jump_ops, },
34 { .name = "jae", .ops = &jump_ops, },
35 { .name = "jb", .ops = &jump_ops, },
36 { .name = "jbe", .ops = &jump_ops, },
37 { .name = "jc", .ops = &jump_ops, },
38 { .name = "jcxz", .ops = &jump_ops, },
39 { .name = "je", .ops = &jump_ops, },
40 { .name = "jecxz", .ops = &jump_ops, },
41 { .name = "jg", .ops = &jump_ops, },
42 { .name = "jge", .ops = &jump_ops, },
43 { .name = "jl", .ops = &jump_ops, },
44 { .name = "jle", .ops = &jump_ops, },
45 { .name = "jmp", .ops = &jump_ops, },
46 { .name = "jmpq", .ops = &jump_ops, },
47 { .name = "jna", .ops = &jump_ops, },
48 { .name = "jnae", .ops = &jump_ops, },
49 { .name = "jnb", .ops = &jump_ops, },
50 { .name = "jnbe", .ops = &jump_ops, },
51 { .name = "jnc", .ops = &jump_ops, },
52 { .name = "jne", .ops = &jump_ops, },
53 { .name = "jng", .ops = &jump_ops, },
54 { .name = "jnge", .ops = &jump_ops, },
55 { .name = "jnl", .ops = &jump_ops, },
56 { .name = "jnle", .ops = &jump_ops, },
57 { .name = "jno", .ops = &jump_ops, },
58 { .name = "jnp", .ops = &jump_ops, },
59 { .name = "jns", .ops = &jump_ops, },
60 { .name = "jnz", .ops = &jump_ops, },
61 { .name = "jo", .ops = &jump_ops, },
62 { .name = "jp", .ops = &jump_ops, },
63 { .name = "jpe", .ops = &jump_ops, },
64 { .name = "jpo", .ops = &jump_ops, },
65 { .name = "jrcxz", .ops = &jump_ops, },
66 { .name = "js", .ops = &jump_ops, },
67 { .name = "jz", .ops = &jump_ops, },
68 { .name = "lea", .ops = &mov_ops, },
69 { .name = "lock", .ops = &lock_ops, },
70 { .name = "mov", .ops = &mov_ops, },
71 { .name = "movb", .ops = &mov_ops, },
72 { .name = "movdqa", .ops = &mov_ops, },
73 { .name = "movl", .ops = &mov_ops, },
74 { .name = "movq", .ops = &mov_ops, },
75 { .name = "movslq", .ops = &mov_ops, },
76 { .name = "movzbl", .ops = &mov_ops, },
77 { .name = "movzwl", .ops = &mov_ops, },
78 { .name = "nop", .ops = &nop_ops, },
79 { .name = "nopl", .ops = &nop_ops, },
80 { .name = "nopw", .ops = &nop_ops, },
81 { .name = "or", .ops = &mov_ops, },
82 { .name = "orl", .ops = &mov_ops, },
83 { .name = "test", .ops = &mov_ops, },
84 { .name = "testb", .ops = &mov_ops, },
85 { .name = "testl", .ops = &mov_ops, },
86 { .name = "xadd", .ops = &mov_ops, },
87 { .name = "xbeginl", .ops = &jump_ops, },
88 { .name = "xbeginq", .ops = &jump_ops, },
89 { .name = "retq", .ops = &ret_ops, },
90};
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
new file mode 100644
index 000000000000..c1625f256df3
--- /dev/null
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -0,0 +1,78 @@
1static struct ins x86__instructions[] = {
2 { .name = "add", .ops = &mov_ops, },
3 { .name = "addl", .ops = &mov_ops, },
4 { .name = "addq", .ops = &mov_ops, },
5 { .name = "addw", .ops = &mov_ops, },
6 { .name = "and", .ops = &mov_ops, },
7 { .name = "bts", .ops = &mov_ops, },
8 { .name = "call", .ops = &call_ops, },
9 { .name = "callq", .ops = &call_ops, },
10 { .name = "cmp", .ops = &mov_ops, },
11 { .name = "cmpb", .ops = &mov_ops, },
12 { .name = "cmpl", .ops = &mov_ops, },
13 { .name = "cmpq", .ops = &mov_ops, },
14 { .name = "cmpw", .ops = &mov_ops, },
15 { .name = "cmpxch", .ops = &mov_ops, },
16 { .name = "dec", .ops = &dec_ops, },
17 { .name = "decl", .ops = &dec_ops, },
18 { .name = "imul", .ops = &mov_ops, },
19 { .name = "inc", .ops = &dec_ops, },
20 { .name = "incl", .ops = &dec_ops, },
21 { .name = "ja", .ops = &jump_ops, },
22 { .name = "jae", .ops = &jump_ops, },
23 { .name = "jb", .ops = &jump_ops, },
24 { .name = "jbe", .ops = &jump_ops, },
25 { .name = "jc", .ops = &jump_ops, },
26 { .name = "jcxz", .ops = &jump_ops, },
27 { .name = "je", .ops = &jump_ops, },
28 { .name = "jecxz", .ops = &jump_ops, },
29 { .name = "jg", .ops = &jump_ops, },
30 { .name = "jge", .ops = &jump_ops, },
31 { .name = "jl", .ops = &jump_ops, },
32 { .name = "jle", .ops = &jump_ops, },
33 { .name = "jmp", .ops = &jump_ops, },
34 { .name = "jmpq", .ops = &jump_ops, },
35 { .name = "jna", .ops = &jump_ops, },
36 { .name = "jnae", .ops = &jump_ops, },
37 { .name = "jnb", .ops = &jump_ops, },
38 { .name = "jnbe", .ops = &jump_ops, },
39 { .name = "jnc", .ops = &jump_ops, },
40 { .name = "jne", .ops = &jump_ops, },
41 { .name = "jng", .ops = &jump_ops, },
42 { .name = "jnge", .ops = &jump_ops, },
43 { .name = "jnl", .ops = &jump_ops, },
44 { .name = "jnle", .ops = &jump_ops, },
45 { .name = "jno", .ops = &jump_ops, },
46 { .name = "jnp", .ops = &jump_ops, },
47 { .name = "jns", .ops = &jump_ops, },
48 { .name = "jnz", .ops = &jump_ops, },
49 { .name = "jo", .ops = &jump_ops, },
50 { .name = "jp", .ops = &jump_ops, },
51 { .name = "jpe", .ops = &jump_ops, },
52 { .name = "jpo", .ops = &jump_ops, },
53 { .name = "jrcxz", .ops = &jump_ops, },
54 { .name = "js", .ops = &jump_ops, },
55 { .name = "jz", .ops = &jump_ops, },
56 { .name = "lea", .ops = &mov_ops, },
57 { .name = "lock", .ops = &lock_ops, },
58 { .name = "mov", .ops = &mov_ops, },
59 { .name = "movb", .ops = &mov_ops, },
60 { .name = "movdqa", .ops = &mov_ops, },
61 { .name = "movl", .ops = &mov_ops, },
62 { .name = "movq", .ops = &mov_ops, },
63 { .name = "movslq", .ops = &mov_ops, },
64 { .name = "movzbl", .ops = &mov_ops, },
65 { .name = "movzwl", .ops = &mov_ops, },
66 { .name = "nop", .ops = &nop_ops, },
67 { .name = "nopl", .ops = &nop_ops, },
68 { .name = "nopw", .ops = &nop_ops, },
69 { .name = "or", .ops = &mov_ops, },
70 { .name = "orl", .ops = &mov_ops, },
71 { .name = "test", .ops = &mov_ops, },
72 { .name = "testb", .ops = &mov_ops, },
73 { .name = "testl", .ops = &mov_ops, },
74 { .name = "xadd", .ops = &mov_ops, },
75 { .name = "xbeginl", .ops = &jump_ops, },
76 { .name = "xbeginq", .ops = &jump_ops, },
77 { .name = "retq", .ops = &ret_ops, },
78};
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c6d0dda594d9..4b419631753d 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -91,6 +91,19 @@ struct perf_c2c {
91enum { 91enum {
92 DISPLAY_LCL, 92 DISPLAY_LCL,
93 DISPLAY_RMT, 93 DISPLAY_RMT,
94 DISPLAY_TOT,
95 DISPLAY_MAX,
96};
97
98static const char *display_str[DISPLAY_MAX] = {
99 [DISPLAY_LCL] = "Local",
100 [DISPLAY_RMT] = "Remote",
101 [DISPLAY_TOT] = "Total",
102};
103
104static const struct option c2c_options[] = {
105 OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
106 OPT_END()
94}; 107};
95 108
96static struct perf_c2c c2c; 109static struct perf_c2c c2c;
@@ -745,6 +758,10 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
745 case DISPLAY_LCL: 758 case DISPLAY_LCL:
746 st = stats->lcl_hitm; 759 st = stats->lcl_hitm;
747 tot = total->lcl_hitm; 760 tot = total->lcl_hitm;
761 break;
762 case DISPLAY_TOT:
763 st = stats->tot_hitm;
764 tot = total->tot_hitm;
748 default: 765 default:
749 break; 766 break;
750 } 767 }
@@ -1044,6 +1061,9 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
1044 break; 1061 break;
1045 case DISPLAY_LCL: 1062 case DISPLAY_LCL:
1046 DISPLAY_HITM(lcl_hitm); 1063 DISPLAY_HITM(lcl_hitm);
1064 break;
1065 case DISPLAY_TOT:
1066 DISPLAY_HITM(tot_hitm);
1047 default: 1067 default:
1048 break; 1068 break;
1049 } 1069 }
@@ -1351,6 +1371,7 @@ static struct c2c_dimension dim_tot_loads = {
1351static struct c2c_header percent_hitm_header[] = { 1371static struct c2c_header percent_hitm_header[] = {
1352 [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"), 1372 [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
1353 [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"), 1373 [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
1374 [DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
1354}; 1375};
1355 1376
1356static struct c2c_dimension dim_percent_hitm = { 1377static struct c2c_dimension dim_percent_hitm = {
@@ -1794,6 +1815,9 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
1794 break; 1815 break;
1795 case DISPLAY_RMT: 1816 case DISPLAY_RMT:
1796 FILTER_HITM(rmt_hitm); 1817 FILTER_HITM(rmt_hitm);
1818 break;
1819 case DISPLAY_TOT:
1820 FILTER_HITM(tot_hitm);
1797 default: 1821 default:
1798 break; 1822 break;
1799 }; 1823 };
@@ -1809,8 +1833,9 @@ static inline int valid_hitm_or_store(struct hist_entry *he)
1809 bool has_hitm; 1833 bool has_hitm;
1810 1834
1811 c2c_he = container_of(he, struct c2c_hist_entry, he); 1835 c2c_he = container_of(he, struct c2c_hist_entry, he);
1812 has_hitm = c2c.display == DISPLAY_LCL ? 1836 has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
1813 c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm; 1837 c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
1838 c2c_he->stats.rmt_hitm;
1814 return has_hitm || c2c_he->stats.store; 1839 return has_hitm || c2c_he->stats.store;
1815} 1840}
1816 1841
@@ -2095,7 +2120,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
2095 first = false; 2120 first = false;
2096 } 2121 }
2097 fprintf(out, " Cachelines sort on : %s HITMs\n", 2122 fprintf(out, " Cachelines sort on : %s HITMs\n",
2098 c2c.display == DISPLAY_LCL ? "Local" : "Remote"); 2123 display_str[c2c.display]);
2099 fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort); 2124 fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
2100} 2125}
2101 2126
@@ -2250,7 +2275,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
2250 "Shared Data Cache Line Table " 2275 "Shared Data Cache Line Table "
2251 "(%lu entries, sorted on %s HITMs)", 2276 "(%lu entries, sorted on %s HITMs)",
2252 browser->nr_non_filtered_entries, 2277 browser->nr_non_filtered_entries,
2253 c2c.display == DISPLAY_LCL ? "local" : "remote"); 2278 display_str[c2c.display]);
2254 return 0; 2279 return 0;
2255} 2280}
2256 2281
@@ -2387,9 +2412,11 @@ static int setup_callchain(struct perf_evlist *evlist)
2387 2412
2388static int setup_display(const char *str) 2413static int setup_display(const char *str)
2389{ 2414{
2390 const char *display = str ?: "rmt"; 2415 const char *display = str ?: "tot";
2391 2416
2392 if (!strcmp(display, "rmt")) 2417 if (!strcmp(display, "tot"))
2418 c2c.display = DISPLAY_TOT;
2419 else if (!strcmp(display, "rmt"))
2393 c2c.display = DISPLAY_RMT; 2420 c2c.display = DISPLAY_RMT;
2394 else if (!strcmp(display, "lcl")) 2421 else if (!strcmp(display, "lcl"))
2395 c2c.display = DISPLAY_LCL; 2422 c2c.display = DISPLAY_LCL;
@@ -2474,6 +2501,8 @@ static int setup_coalesce(const char *coalesce, bool no_source)
2474 return -1; 2501 return -1;
2475 2502
2476 if (asprintf(&c2c.cl_resort, "offset,%s", 2503 if (asprintf(&c2c.cl_resort, "offset,%s",
2504 c2c.display == DISPLAY_TOT ?
2505 "tot_hitm" :
2477 c2c.display == DISPLAY_RMT ? 2506 c2c.display == DISPLAY_RMT ?
2478 "rmt_hitm,lcl_hitm" : 2507 "rmt_hitm,lcl_hitm" :
2479 "lcl_hitm,rmt_hitm") < 0) 2508 "lcl_hitm,rmt_hitm") < 0)
@@ -2496,11 +2525,9 @@ static int perf_c2c__report(int argc, const char **argv)
2496 const char *display = NULL; 2525 const char *display = NULL;
2497 const char *coalesce = NULL; 2526 const char *coalesce = NULL;
2498 bool no_source = false; 2527 bool no_source = false;
2499 const struct option c2c_options[] = { 2528 const struct option options[] = {
2500 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2529 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2501 "file", "vmlinux pathname"), 2530 "file", "vmlinux pathname"),
2502 OPT_INCR('v', "verbose", &verbose,
2503 "be more verbose (show counter open errors, etc)"),
2504 OPT_STRING('i', "input", &input_name, "file", 2531 OPT_STRING('i', "input", &input_name, "file",
2505 "the input file to process"), 2532 "the input file to process"),
2506 OPT_INCR('N', "node-info", &c2c.node_info, 2533 OPT_INCR('N', "node-info", &c2c.node_info,
@@ -2520,32 +2547,28 @@ static int perf_c2c__report(int argc, const char **argv)
2520 "print_type,threshold[,print_limit],order,sort_key[,branch],value", 2547 "print_type,threshold[,print_limit],order,sort_key[,branch],value",
2521 callchain_help, &parse_callchain_opt, 2548 callchain_help, &parse_callchain_opt,
2522 callchain_default_opt), 2549 callchain_default_opt),
2523 OPT_STRING('d', "display", &display, NULL, "lcl,rmt"), 2550 OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
2524 OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", 2551 OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
2525 "coalesce fields: pid,tid,iaddr,dso"), 2552 "coalesce fields: pid,tid,iaddr,dso"),
2553 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
2554 OPT_PARENT(c2c_options),
2526 OPT_END() 2555 OPT_END()
2527 }; 2556 };
2528 int err = 0; 2557 int err = 0;
2529 2558
2530 argc = parse_options(argc, argv, c2c_options, report_c2c_usage, 2559 argc = parse_options(argc, argv, options, report_c2c_usage,
2531 PARSE_OPT_STOP_AT_NON_OPTION); 2560 PARSE_OPT_STOP_AT_NON_OPTION);
2532 if (argc) 2561 if (argc)
2533 usage_with_options(report_c2c_usage, c2c_options); 2562 usage_with_options(report_c2c_usage, options);
2534 2563
2535 if (c2c.stats_only) 2564 if (c2c.stats_only)
2536 c2c.use_stdio = true; 2565 c2c.use_stdio = true;
2537 2566
2538 if (c2c.use_stdio)
2539 use_browser = 0;
2540 else
2541 use_browser = 1;
2542
2543 setup_browser(false);
2544
2545 if (!input_name || !strlen(input_name)) 2567 if (!input_name || !strlen(input_name))
2546 input_name = "perf.data"; 2568 input_name = "perf.data";
2547 2569
2548 file.path = input_name; 2570 file.path = input_name;
2571 file.force = symbol_conf.force;
2549 2572
2550 err = setup_display(display); 2573 err = setup_display(display);
2551 if (err) 2574 if (err)
@@ -2568,6 +2591,7 @@ static int perf_c2c__report(int argc, const char **argv)
2568 pr_debug("No memory for session\n"); 2591 pr_debug("No memory for session\n");
2569 goto out; 2592 goto out;
2570 } 2593 }
2594
2571 err = setup_nodes(session); 2595 err = setup_nodes(session);
2572 if (err) { 2596 if (err) {
2573 pr_err("Failed setup nodes\n"); 2597 pr_err("Failed setup nodes\n");
@@ -2587,6 +2611,13 @@ static int perf_c2c__report(int argc, const char **argv)
2587 goto out_session; 2611 goto out_session;
2588 } 2612 }
2589 2613
2614 if (c2c.use_stdio)
2615 use_browser = 0;
2616 else
2617 use_browser = 1;
2618
2619 setup_browser(false);
2620
2590 err = perf_session__process_events(session); 2621 err = perf_session__process_events(session);
2591 if (err) { 2622 if (err) {
2592 pr_err("failed to process sample\n"); 2623 pr_err("failed to process sample\n");
@@ -2605,6 +2636,7 @@ static int perf_c2c__report(int argc, const char **argv)
2605 "tot_loads," 2636 "tot_loads,"
2606 "ld_fbhit,ld_l1hit,ld_l2hit," 2637 "ld_fbhit,ld_l1hit,ld_l2hit,"
2607 "ld_lclhit,ld_rmthit", 2638 "ld_lclhit,ld_rmthit",
2639 c2c.display == DISPLAY_TOT ? "tot_hitm" :
2608 c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" 2640 c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
2609 ); 2641 );
2610 2642
@@ -2655,11 +2687,10 @@ static int perf_c2c__record(int argc, const char **argv)
2655 OPT_CALLBACK('e', "event", &event_set, "event", 2687 OPT_CALLBACK('e', "event", &event_set, "event",
2656 "event selector. Use 'perf mem record -e list' to list available events", 2688 "event selector. Use 'perf mem record -e list' to list available events",
2657 parse_record_events), 2689 parse_record_events),
2658 OPT_INCR('v', "verbose", &verbose,
2659 "be more verbose (show counter open errors, etc)"),
2660 OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), 2690 OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
2661 OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), 2691 OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
2662 OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"), 2692 OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
2693 OPT_PARENT(c2c_options),
2663 OPT_END() 2694 OPT_END()
2664 }; 2695 };
2665 2696
@@ -2731,11 +2762,6 @@ static int perf_c2c__record(int argc, const char **argv)
2731 2762
2732int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) 2763int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
2733{ 2764{
2734 const struct option c2c_options[] = {
2735 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2736 OPT_END()
2737 };
2738
2739 argc = parse_options(argc, argv, c2c_options, c2c_usage, 2765 argc = parse_options(argc, argv, c2c_options, c2c_usage,
2740 PARSE_OPT_STOP_AT_NON_OPTION); 2766 PARSE_OPT_STOP_AT_NON_OPTION);
2741 2767
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index fb3441211e4b..829468defa07 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -13,12 +13,15 @@
13#include "util/cloexec.h" 13#include "util/cloexec.h"
14#include "util/thread_map.h" 14#include "util/thread_map.h"
15#include "util/color.h" 15#include "util/color.h"
16#include "util/stat.h"
17#include "util/callchain.h"
16 18
17#include <subcmd/parse-options.h> 19#include <subcmd/parse-options.h>
18#include "util/trace-event.h" 20#include "util/trace-event.h"
19 21
20#include "util/debug.h" 22#include "util/debug.h"
21 23
24#include <linux/log2.h>
22#include <sys/prctl.h> 25#include <sys/prctl.h>
23#include <sys/resource.h> 26#include <sys/resource.h>
24 27
@@ -192,8 +195,40 @@ struct perf_sched {
192 bool force; 195 bool force;
193 bool skip_merge; 196 bool skip_merge;
194 struct perf_sched_map map; 197 struct perf_sched_map map;
198
199 /* options for timehist command */
200 bool summary;
201 bool summary_only;
202 bool show_callchain;
203 unsigned int max_stack;
204 bool show_cpu_visual;
205 bool show_wakeups;
206 u64 skipped_samples;
207};
208
209/* per thread run time data */
210struct thread_runtime {
211 u64 last_time; /* time of previous sched in/out event */
212 u64 dt_run; /* run time */
213 u64 dt_wait; /* time between CPU access (off cpu) */
214 u64 dt_delay; /* time between wakeup and sched-in */
215 u64 ready_to_run; /* time of wakeup */
216
217 struct stats run_stats;
218 u64 total_run_time;
195}; 219};
196 220
221/* per event run time data */
222struct evsel_runtime {
223 u64 *last_time; /* time this event was last seen per cpu */
224 u32 ncpu; /* highest cpu slot allocated */
225};
226
227/* track idle times per cpu */
228static struct thread **idle_threads;
229static int idle_max_cpu;
230static char idle_comm[] = "<idle>";
231
197static u64 get_nsecs(void) 232static u64 get_nsecs(void)
198{ 233{
199 struct timespec ts; 234 struct timespec ts;
@@ -1654,6 +1689,837 @@ out_delete:
1654 return rc; 1689 return rc;
1655} 1690}
1656 1691
1692/*
1693 * scheduling times are printed as msec.usec
1694 */
1695static inline void print_sched_time(unsigned long long nsecs, int width)
1696{
1697 unsigned long msecs;
1698 unsigned long usecs;
1699
1700 msecs = nsecs / NSEC_PER_MSEC;
1701 nsecs -= msecs * NSEC_PER_MSEC;
1702 usecs = nsecs / NSEC_PER_USEC;
1703 printf("%*lu.%03lu ", width, msecs, usecs);
1704}
1705
1706/*
1707 * returns runtime data for event, allocating memory for it the
1708 * first time it is used.
1709 */
1710static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel)
1711{
1712 struct evsel_runtime *r = evsel->priv;
1713
1714 if (r == NULL) {
1715 r = zalloc(sizeof(struct evsel_runtime));
1716 evsel->priv = r;
1717 }
1718
1719 return r;
1720}
1721
1722/*
1723 * save last time event was seen per cpu
1724 */
1725static void perf_evsel__save_time(struct perf_evsel *evsel,
1726 u64 timestamp, u32 cpu)
1727{
1728 struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
1729
1730 if (r == NULL)
1731 return;
1732
1733 if ((cpu >= r->ncpu) || (r->last_time == NULL)) {
1734 int i, n = __roundup_pow_of_two(cpu+1);
1735 void *p = r->last_time;
1736
1737 p = realloc(r->last_time, n * sizeof(u64));
1738 if (!p)
1739 return;
1740
1741 r->last_time = p;
1742 for (i = r->ncpu; i < n; ++i)
1743 r->last_time[i] = (u64) 0;
1744
1745 r->ncpu = n;
1746 }
1747
1748 r->last_time[cpu] = timestamp;
1749}
1750
1751/* returns last time this event was seen on the given cpu */
1752static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu)
1753{
1754 struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
1755
1756 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
1757 return 0;
1758
1759 return r->last_time[cpu];
1760}
1761
1762static int comm_width = 20;
1763
1764static char *timehist_get_commstr(struct thread *thread)
1765{
1766 static char str[32];
1767 const char *comm = thread__comm_str(thread);
1768 pid_t tid = thread->tid;
1769 pid_t pid = thread->pid_;
1770 int n;
1771
1772 if (pid == 0)
1773 n = scnprintf(str, sizeof(str), "%s", comm);
1774
1775 else if (tid != pid)
1776 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid);
1777
1778 else
1779 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid);
1780
1781 if (n > comm_width)
1782 comm_width = n;
1783
1784 return str;
1785}
1786
1787static void timehist_header(struct perf_sched *sched)
1788{
1789 u32 ncpus = sched->max_cpu + 1;
1790 u32 i, j;
1791
1792 printf("%15s %6s ", "time", "cpu");
1793
1794 if (sched->show_cpu_visual) {
1795 printf(" ");
1796 for (i = 0, j = 0; i < ncpus; ++i) {
1797 printf("%x", j++);
1798 if (j > 15)
1799 j = 0;
1800 }
1801 printf(" ");
1802 }
1803
1804 printf(" %-20s %9s %9s %9s",
1805 "task name", "wait time", "sch delay", "run time");
1806
1807 printf("\n");
1808
1809 /*
1810 * units row
1811 */
1812 printf("%15s %-6s ", "", "");
1813
1814 if (sched->show_cpu_visual)
1815 printf(" %*s ", ncpus, "");
1816
1817 printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)");
1818
1819 /*
1820 * separator
1821 */
1822 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line);
1823
1824 if (sched->show_cpu_visual)
1825 printf(" %.*s ", ncpus, graph_dotted_line);
1826
1827 printf(" %.20s %.9s %.9s %.9s",
1828 graph_dotted_line, graph_dotted_line, graph_dotted_line,
1829 graph_dotted_line);
1830
1831 printf("\n");
1832}
1833
1834static void timehist_print_sample(struct perf_sched *sched,
1835 struct perf_sample *sample,
1836 struct addr_location *al,
1837 struct thread *thread)
1838{
1839 struct thread_runtime *tr = thread__priv(thread);
1840 u32 max_cpus = sched->max_cpu + 1;
1841 char tstr[64];
1842
1843 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
1844 printf("%15s [%04d] ", tstr, sample->cpu);
1845
1846 if (sched->show_cpu_visual) {
1847 u32 i;
1848 char c;
1849
1850 printf(" ");
1851 for (i = 0; i < max_cpus; ++i) {
1852 /* flag idle times with 'i'; others are sched events */
1853 if (i == sample->cpu)
1854 c = (thread->tid == 0) ? 'i' : 's';
1855 else
1856 c = ' ';
1857 printf("%c", c);
1858 }
1859 printf(" ");
1860 }
1861
1862 printf(" %-*s ", comm_width, timehist_get_commstr(thread));
1863
1864 print_sched_time(tr->dt_wait, 6);
1865 print_sched_time(tr->dt_delay, 6);
1866 print_sched_time(tr->dt_run, 6);
1867
1868 if (sched->show_wakeups)
1869 printf(" %-*s", comm_width, "");
1870
1871 if (thread->tid == 0)
1872 goto out;
1873
1874 if (sched->show_callchain)
1875 printf(" ");
1876
1877 sample__fprintf_sym(sample, al, 0,
1878 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
1879 EVSEL__PRINT_CALLCHAIN_ARROW,
1880 &callchain_cursor, stdout);
1881
1882out:
1883 printf("\n");
1884}
1885
1886/*
1887 * Explanation of delta-time stats:
1888 *
1889 * t = time of current schedule out event
1890 * tprev = time of previous sched out event
1891 * also time of schedule-in event for current task
1892 * last_time = time of last sched change event for current task
1893 * (i.e, time process was last scheduled out)
1894 * ready_to_run = time of wakeup for current task
1895 *
1896 * -----|------------|------------|------------|------
1897 * last ready tprev t
1898 * time to run
1899 *
1900 * |-------- dt_wait --------|
1901 * |- dt_delay -|-- dt_run --|
1902 *
1903 * dt_run = run time of current task
1904 * dt_wait = time between last schedule out event for task and tprev
1905 * represents time spent off the cpu
1906 * dt_delay = time between wakeup and schedule-in of task
1907 */
1908
1909static void timehist_update_runtime_stats(struct thread_runtime *r,
1910 u64 t, u64 tprev)
1911{
1912 r->dt_delay = 0;
1913 r->dt_wait = 0;
1914 r->dt_run = 0;
1915 if (tprev) {
1916 r->dt_run = t - tprev;
1917 if (r->ready_to_run) {
1918 if (r->ready_to_run > tprev)
1919 pr_debug("time travel: wakeup time for task > previous sched_switch event\n");
1920 else
1921 r->dt_delay = tprev - r->ready_to_run;
1922 }
1923
1924 if (r->last_time > tprev)
1925 pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
1926 else if (r->last_time)
1927 r->dt_wait = tprev - r->last_time;
1928 }
1929
1930 update_stats(&r->run_stats, r->dt_run);
1931 r->total_run_time += r->dt_run;
1932}
1933
1934static bool is_idle_sample(struct perf_sched *sched,
1935 struct perf_sample *sample,
1936 struct perf_evsel *evsel,
1937 struct machine *machine)
1938{
1939 struct thread *thread;
1940 struct callchain_cursor *cursor = &callchain_cursor;
1941
1942 /* pid 0 == swapper == idle task */
1943 if (sample->pid == 0)
1944 return true;
1945
1946 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) {
1947 if (perf_evsel__intval(evsel, sample, "prev_pid") == 0)
1948 return true;
1949 }
1950
1951 /* want main thread for process - has maps */
1952 thread = machine__findnew_thread(machine, sample->pid, sample->pid);
1953 if (thread == NULL) {
1954 pr_debug("Failed to get thread for pid %d.\n", sample->pid);
1955 return false;
1956 }
1957
1958 if (!symbol_conf.use_callchain || sample->callchain == NULL)
1959 return false;
1960
1961 if (thread__resolve_callchain(thread, cursor, evsel, sample,
1962 NULL, NULL, sched->max_stack) != 0) {
1963 if (verbose)
1964 error("Failed to resolve callchain. Skipping\n");
1965
1966 return false;
1967 }
1968 callchain_cursor_commit(cursor);
1969 return false;
1970}
1971
1972/*
1973 * Track idle stats per cpu by maintaining a local thread
1974 * struct for the idle task on each cpu.
1975 */
1976static int init_idle_threads(int ncpu)
1977{
1978 int i;
1979
1980 idle_threads = zalloc(ncpu * sizeof(struct thread *));
1981 if (!idle_threads)
1982 return -ENOMEM;
1983
1984 idle_max_cpu = ncpu - 1;
1985
1986 /* allocate the actual thread struct if needed */
1987 for (i = 0; i < ncpu; ++i) {
1988 idle_threads[i] = thread__new(0, 0);
1989 if (idle_threads[i] == NULL)
1990 return -ENOMEM;
1991
1992 thread__set_comm(idle_threads[i], idle_comm, 0);
1993 }
1994
1995 return 0;
1996}
1997
1998static void free_idle_threads(void)
1999{
2000 int i;
2001
2002 if (idle_threads == NULL)
2003 return;
2004
2005 for (i = 0; i <= idle_max_cpu; ++i) {
2006 if ((idle_threads[i]))
2007 thread__delete(idle_threads[i]);
2008 }
2009
2010 free(idle_threads);
2011}
2012
2013static struct thread *get_idle_thread(int cpu)
2014{
2015 /*
2016 * expand/allocate array of pointers to local thread
2017 * structs if needed
2018 */
2019 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) {
2020 int i, j = __roundup_pow_of_two(cpu+1);
2021 void *p;
2022
2023 p = realloc(idle_threads, j * sizeof(struct thread *));
2024 if (!p)
2025 return NULL;
2026
2027 idle_threads = (struct thread **) p;
2028 i = idle_max_cpu ? idle_max_cpu + 1 : 0;
2029 for (; i < j; ++i)
2030 idle_threads[i] = NULL;
2031
2032 idle_max_cpu = j;
2033 }
2034
2035 /* allocate a new thread struct if needed */
2036 if (idle_threads[cpu] == NULL) {
2037 idle_threads[cpu] = thread__new(0, 0);
2038
2039 if (idle_threads[cpu]) {
2040 idle_threads[cpu]->tid = 0;
2041 thread__set_comm(idle_threads[cpu], idle_comm, 0);
2042 }
2043 }
2044
2045 return idle_threads[cpu];
2046}
2047
2048/*
2049 * handle runtime stats saved per thread
2050 */
2051static struct thread_runtime *thread__init_runtime(struct thread *thread)
2052{
2053 struct thread_runtime *r;
2054
2055 r = zalloc(sizeof(struct thread_runtime));
2056 if (!r)
2057 return NULL;
2058
2059 init_stats(&r->run_stats);
2060 thread__set_priv(thread, r);
2061
2062 return r;
2063}
2064
2065static struct thread_runtime *thread__get_runtime(struct thread *thread)
2066{
2067 struct thread_runtime *tr;
2068
2069 tr = thread__priv(thread);
2070 if (tr == NULL) {
2071 tr = thread__init_runtime(thread);
2072 if (tr == NULL)
2073 pr_debug("Failed to malloc memory for runtime data.\n");
2074 }
2075
2076 return tr;
2077}
2078
2079static struct thread *timehist_get_thread(struct perf_sched *sched,
2080 struct perf_sample *sample,
2081 struct machine *machine,
2082 struct perf_evsel *evsel)
2083{
2084 struct thread *thread;
2085
2086 if (is_idle_sample(sched, sample, evsel, machine)) {
2087 thread = get_idle_thread(sample->cpu);
2088 if (thread == NULL)
2089 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
2090
2091 } else {
2092 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
2093 if (thread == NULL) {
2094 pr_debug("Failed to get thread for tid %d. skipping sample.\n",
2095 sample->tid);
2096 }
2097 }
2098
2099 return thread;
2100}
2101
2102static bool timehist_skip_sample(struct perf_sched *sched,
2103 struct thread *thread)
2104{
2105 bool rc = false;
2106
2107 if (thread__is_filtered(thread)) {
2108 rc = true;
2109 sched->skipped_samples++;
2110 }
2111
2112 return rc;
2113}
2114
2115static void timehist_print_wakeup_event(struct perf_sched *sched,
2116 struct perf_sample *sample,
2117 struct machine *machine,
2118 struct thread *awakened)
2119{
2120 struct thread *thread;
2121 char tstr[64];
2122
2123 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
2124 if (thread == NULL)
2125 return;
2126
2127 /* show wakeup unless both awakee and awaker are filtered */
2128 if (timehist_skip_sample(sched, thread) &&
2129 timehist_skip_sample(sched, awakened)) {
2130 return;
2131 }
2132
2133 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
2134 printf("%15s [%04d] ", tstr, sample->cpu);
2135 if (sched->show_cpu_visual)
2136 printf(" %*s ", sched->max_cpu + 1, "");
2137
2138 printf(" %-*s ", comm_width, timehist_get_commstr(thread));
2139
2140 /* dt spacer */
2141 printf(" %9s %9s %9s ", "", "", "");
2142
2143 printf("awakened: %s", timehist_get_commstr(awakened));
2144
2145 printf("\n");
2146}
2147
2148static int timehist_sched_wakeup_event(struct perf_tool *tool,
2149 union perf_event *event __maybe_unused,
2150 struct perf_evsel *evsel,
2151 struct perf_sample *sample,
2152 struct machine *machine)
2153{
2154 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
2155 struct thread *thread;
2156 struct thread_runtime *tr = NULL;
2157 /* want pid of awakened task not pid in sample */
2158 const u32 pid = perf_evsel__intval(evsel, sample, "pid");
2159
2160 thread = machine__findnew_thread(machine, 0, pid);
2161 if (thread == NULL)
2162 return -1;
2163
2164 tr = thread__get_runtime(thread);
2165 if (tr == NULL)
2166 return -1;
2167
2168 if (tr->ready_to_run == 0)
2169 tr->ready_to_run = sample->time;
2170
2171 /* show wakeups if requested */
2172 if (sched->show_wakeups)
2173 timehist_print_wakeup_event(sched, sample, machine, thread);
2174
2175 return 0;
2176}
2177
2178static int timehist_sched_change_event(struct perf_tool *tool,
2179 union perf_event *event,
2180 struct perf_evsel *evsel,
2181 struct perf_sample *sample,
2182 struct machine *machine)
2183{
2184 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
2185 struct addr_location al;
2186 struct thread *thread;
2187 struct thread_runtime *tr = NULL;
2188 u64 tprev;
2189 int rc = 0;
2190
2191 if (machine__resolve(machine, &al, sample) < 0) {
2192 pr_err("problem processing %d event. skipping it\n",
2193 event->header.type);
2194 rc = -1;
2195 goto out;
2196 }
2197
2198 thread = timehist_get_thread(sched, sample, machine, evsel);
2199 if (thread == NULL) {
2200 rc = -1;
2201 goto out;
2202 }
2203
2204 if (timehist_skip_sample(sched, thread))
2205 goto out;
2206
2207 tr = thread__get_runtime(thread);
2208 if (tr == NULL) {
2209 rc = -1;
2210 goto out;
2211 }
2212
2213 tprev = perf_evsel__get_time(evsel, sample->cpu);
2214
2215 timehist_update_runtime_stats(tr, sample->time, tprev);
2216 if (!sched->summary_only)
2217 timehist_print_sample(sched, sample, &al, thread);
2218
2219out:
2220 if (tr) {
2221 /* time of this sched_switch event becomes last time task seen */
2222 tr->last_time = sample->time;
2223
2224 /* sched out event for task so reset ready to run time */
2225 tr->ready_to_run = 0;
2226 }
2227
2228 perf_evsel__save_time(evsel, sample->time, sample->cpu);
2229
2230 return rc;
2231}
2232
2233static int timehist_sched_switch_event(struct perf_tool *tool,
2234 union perf_event *event,
2235 struct perf_evsel *evsel,
2236 struct perf_sample *sample,
2237 struct machine *machine __maybe_unused)
2238{
2239 return timehist_sched_change_event(tool, event, evsel, sample, machine);
2240}
2241
2242static int process_lost(struct perf_tool *tool __maybe_unused,
2243 union perf_event *event,
2244 struct perf_sample *sample,
2245 struct machine *machine __maybe_unused)
2246{
2247 char tstr[64];
2248
2249 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
2250 printf("%15s ", tstr);
2251 printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
2252
2253 return 0;
2254}
2255
2256
2257static void print_thread_runtime(struct thread *t,
2258 struct thread_runtime *r)
2259{
2260 double mean = avg_stats(&r->run_stats);
2261 float stddev;
2262
2263 printf("%*s %5d %9" PRIu64 " ",
2264 comm_width, timehist_get_commstr(t), t->ppid,
2265 (u64) r->run_stats.n);
2266
2267 print_sched_time(r->total_run_time, 8);
2268 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean);
2269 print_sched_time(r->run_stats.min, 6);
2270 printf(" ");
2271 print_sched_time((u64) mean, 6);
2272 printf(" ");
2273 print_sched_time(r->run_stats.max, 6);
2274 printf(" ");
2275 printf("%5.2f", stddev);
2276 printf("\n");
2277}
2278
2279struct total_run_stats {
2280 u64 sched_count;
2281 u64 task_count;
2282 u64 total_run_time;
2283};
2284
2285static int __show_thread_runtime(struct thread *t, void *priv)
2286{
2287 struct total_run_stats *stats = priv;
2288 struct thread_runtime *r;
2289
2290 if (thread__is_filtered(t))
2291 return 0;
2292
2293 r = thread__priv(t);
2294 if (r && r->run_stats.n) {
2295 stats->task_count++;
2296 stats->sched_count += r->run_stats.n;
2297 stats->total_run_time += r->total_run_time;
2298 print_thread_runtime(t, r);
2299 }
2300
2301 return 0;
2302}
2303
2304static int show_thread_runtime(struct thread *t, void *priv)
2305{
2306 if (t->dead)
2307 return 0;
2308
2309 return __show_thread_runtime(t, priv);
2310}
2311
2312static int show_deadthread_runtime(struct thread *t, void *priv)
2313{
2314 if (!t->dead)
2315 return 0;
2316
2317 return __show_thread_runtime(t, priv);
2318}
2319
2320static void timehist_print_summary(struct perf_sched *sched,
2321 struct perf_session *session)
2322{
2323 struct machine *m = &session->machines.host;
2324 struct total_run_stats totals;
2325 u64 task_count;
2326 struct thread *t;
2327 struct thread_runtime *r;
2328 int i;
2329
2330 memset(&totals, 0, sizeof(totals));
2331
2332 if (comm_width < 30)
2333 comm_width = 30;
2334
2335 printf("\nRuntime summary\n");
2336 printf("%*s parent sched-in ", comm_width, "comm");
2337 printf(" run-time min-run avg-run max-run stddev\n");
2338 printf("%*s (count) ", comm_width, "");
2339 printf(" (msec) (msec) (msec) (msec) %%\n");
2340 printf("%.105s\n", graph_dotted_line);
2341
2342 machine__for_each_thread(m, show_thread_runtime, &totals);
2343 task_count = totals.task_count;
2344 if (!task_count)
2345 printf("<no still running tasks>\n");
2346
2347 printf("\nTerminated tasks:\n");
2348 machine__for_each_thread(m, show_deadthread_runtime, &totals);
2349 if (task_count == totals.task_count)
2350 printf("<no terminated tasks>\n");
2351
2352 /* CPU idle stats not tracked when samples were skipped */
2353 if (sched->skipped_samples)
2354 return;
2355
2356 printf("\nIdle stats:\n");
2357 for (i = 0; i <= idle_max_cpu; ++i) {
2358 t = idle_threads[i];
2359 if (!t)
2360 continue;
2361
2362 r = thread__priv(t);
2363 if (r && r->run_stats.n) {
2364 totals.sched_count += r->run_stats.n;
2365 printf(" CPU %2d idle for ", i);
2366 print_sched_time(r->total_run_time, 6);
2367 printf(" msec\n");
2368 } else
2369 printf(" CPU %2d idle entire time window\n", i);
2370 }
2371
2372 printf("\n"
2373 " Total number of unique tasks: %" PRIu64 "\n"
2374 "Total number of context switches: %" PRIu64 "\n"
2375 " Total run time (msec): ",
2376 totals.task_count, totals.sched_count);
2377
2378 print_sched_time(totals.total_run_time, 2);
2379 printf("\n");
2380}
2381
2382typedef int (*sched_handler)(struct perf_tool *tool,
2383 union perf_event *event,
2384 struct perf_evsel *evsel,
2385 struct perf_sample *sample,
2386 struct machine *machine);
2387
2388static int perf_timehist__process_sample(struct perf_tool *tool,
2389 union perf_event *event,
2390 struct perf_sample *sample,
2391 struct perf_evsel *evsel,
2392 struct machine *machine)
2393{
2394 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
2395 int err = 0;
2396 int this_cpu = sample->cpu;
2397
2398 if (this_cpu > sched->max_cpu)
2399 sched->max_cpu = this_cpu;
2400
2401 if (evsel->handler != NULL) {
2402 sched_handler f = evsel->handler;
2403
2404 err = f(tool, event, evsel, sample, machine);
2405 }
2406
2407 return err;
2408}
2409
2410static int timehist_check_attr(struct perf_sched *sched,
2411 struct perf_evlist *evlist)
2412{
2413 struct perf_evsel *evsel;
2414 struct evsel_runtime *er;
2415
2416 list_for_each_entry(evsel, &evlist->entries, node) {
2417 er = perf_evsel__get_runtime(evsel);
2418 if (er == NULL) {
2419 pr_err("Failed to allocate memory for evsel runtime data\n");
2420 return -1;
2421 }
2422
2423 if (sched->show_callchain &&
2424 !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) {
2425 pr_info("Samples do not have callchains.\n");
2426 sched->show_callchain = 0;
2427 symbol_conf.use_callchain = 0;
2428 }
2429 }
2430
2431 return 0;
2432}
2433
2434static int perf_sched__timehist(struct perf_sched *sched)
2435{
2436 const struct perf_evsel_str_handler handlers[] = {
2437 { "sched:sched_switch", timehist_sched_switch_event, },
2438 { "sched:sched_wakeup", timehist_sched_wakeup_event, },
2439 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, },
2440 };
2441 struct perf_data_file file = {
2442 .path = input_name,
2443 .mode = PERF_DATA_MODE_READ,
2444 };
2445
2446 struct perf_session *session;
2447 struct perf_evlist *evlist;
2448 int err = -1;
2449
2450 /*
2451 * event handlers for timehist option
2452 */
2453 sched->tool.sample = perf_timehist__process_sample;
2454 sched->tool.mmap = perf_event__process_mmap;
2455 sched->tool.comm = perf_event__process_comm;
2456 sched->tool.exit = perf_event__process_exit;
2457 sched->tool.fork = perf_event__process_fork;
2458 sched->tool.lost = process_lost;
2459 sched->tool.attr = perf_event__process_attr;
2460 sched->tool.tracing_data = perf_event__process_tracing_data;
2461 sched->tool.build_id = perf_event__process_build_id;
2462
2463 sched->tool.ordered_events = true;
2464 sched->tool.ordering_requires_timestamps = true;
2465
2466 symbol_conf.use_callchain = sched->show_callchain;
2467
2468 session = perf_session__new(&file, false, &sched->tool);
2469 if (session == NULL)
2470 return -ENOMEM;
2471
2472 evlist = session->evlist;
2473
2474 symbol__init(&session->header.env);
2475
2476 if (timehist_check_attr(sched, evlist) != 0)
2477 goto out;
2478
2479 setup_pager();
2480
2481 /* setup per-evsel handlers */
2482 if (perf_session__set_tracepoints_handlers(session, handlers))
2483 goto out;
2484
2485 if (!perf_session__has_traces(session, "record -R"))
2486 goto out;
2487
2488 /* pre-allocate struct for per-CPU idle stats */
2489 sched->max_cpu = session->header.env.nr_cpus_online;
2490 if (sched->max_cpu == 0)
2491 sched->max_cpu = 4;
2492 if (init_idle_threads(sched->max_cpu))
2493 goto out;
2494
2495 /* summary_only implies summary option, but don't overwrite summary if set */
2496 if (sched->summary_only)
2497 sched->summary = sched->summary_only;
2498
2499 if (!sched->summary_only)
2500 timehist_header(sched);
2501
2502 err = perf_session__process_events(session);
2503 if (err) {
2504 pr_err("Failed to process events, error %d", err);
2505 goto out;
2506 }
2507
2508 sched->nr_events = evlist->stats.nr_events[0];
2509 sched->nr_lost_events = evlist->stats.total_lost;
2510 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST];
2511
2512 if (sched->summary)
2513 timehist_print_summary(sched, session);
2514
2515out:
2516 free_idle_threads();
2517 perf_session__delete(session);
2518
2519 return err;
2520}
2521
2522
1657static void print_bad_events(struct perf_sched *sched) 2523static void print_bad_events(struct perf_sched *sched)
1658{ 2524{
1659 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 2525 if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
@@ -1957,6 +2823,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
1957 .next_shortname1 = 'A', 2823 .next_shortname1 = 'A',
1958 .next_shortname2 = '0', 2824 .next_shortname2 = '0',
1959 .skip_merge = 0, 2825 .skip_merge = 0,
2826 .show_callchain = 1,
2827 .max_stack = 5,
1960 }; 2828 };
1961 const struct option sched_options[] = { 2829 const struct option sched_options[] = {
1962 OPT_STRING('i', "input", &input_name, "file", 2830 OPT_STRING('i', "input", &input_name, "file",
@@ -1970,8 +2838,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
1970 const struct option latency_options[] = { 2838 const struct option latency_options[] = {
1971 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 2839 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
1972 "sort by key(s): runtime, switch, avg, max"), 2840 "sort by key(s): runtime, switch, avg, max"),
1973 OPT_INCR('v', "verbose", &verbose,
1974 "be more verbose (show symbol address, etc)"),
1975 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 2841 OPT_INTEGER('C', "CPU", &sched.profile_cpu,
1976 "CPU to profile on"), 2842 "CPU to profile on"),
1977 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 2843 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
@@ -1983,8 +2849,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
1983 const struct option replay_options[] = { 2849 const struct option replay_options[] = {
1984 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 2850 OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
1985 "repeat the workload replay N times (-1: infinite)"), 2851 "repeat the workload replay N times (-1: infinite)"),
1986 OPT_INCR('v', "verbose", &verbose,
1987 "be more verbose (show symbol address, etc)"),
1988 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 2852 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1989 "dump raw trace in ASCII"), 2853 "dump raw trace in ASCII"),
1990 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 2854 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
@@ -2001,6 +2865,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
2001 "display given CPUs in map"), 2865 "display given CPUs in map"),
2002 OPT_PARENT(sched_options) 2866 OPT_PARENT(sched_options)
2003 }; 2867 };
2868 const struct option timehist_options[] = {
2869 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2870 "file", "vmlinux pathname"),
2871 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2872 "file", "kallsyms pathname"),
2873 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain,
2874 "Display call chains if present (default on)"),
2875 OPT_UINTEGER(0, "max-stack", &sched.max_stack,
2876 "Maximum number of functions to display backtrace."),
2877 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
2878 "Look for files with symbols relative to this directory"),
2879 OPT_BOOLEAN('s', "summary", &sched.summary_only,
2880 "Show only syscall summary with statistics"),
2881 OPT_BOOLEAN('S', "with-summary", &sched.summary,
2882 "Show all syscalls and summary with statistics"),
2883 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
2884 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
2885 OPT_PARENT(sched_options)
2886 };
2887
2004 const char * const latency_usage[] = { 2888 const char * const latency_usage[] = {
2005 "perf sched latency [<options>]", 2889 "perf sched latency [<options>]",
2006 NULL 2890 NULL
@@ -2013,8 +2897,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
2013 "perf sched map [<options>]", 2897 "perf sched map [<options>]",
2014 NULL 2898 NULL
2015 }; 2899 };
2900 const char * const timehist_usage[] = {
2901 "perf sched timehist [<options>]",
2902 NULL
2903 };
2016 const char *const sched_subcommands[] = { "record", "latency", "map", 2904 const char *const sched_subcommands[] = { "record", "latency", "map",
2017 "replay", "script", NULL }; 2905 "replay", "script",
2906 "timehist", NULL };
2018 const char *sched_usage[] = { 2907 const char *sched_usage[] = {
2019 NULL, 2908 NULL,
2020 NULL 2909 NULL
@@ -2077,6 +2966,21 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
2077 usage_with_options(replay_usage, replay_options); 2966 usage_with_options(replay_usage, replay_options);
2078 } 2967 }
2079 return perf_sched__replay(&sched); 2968 return perf_sched__replay(&sched);
2969 } else if (!strcmp(argv[0], "timehist")) {
2970 if (argc) {
2971 argc = parse_options(argc, argv, timehist_options,
2972 timehist_usage, 0);
2973 if (argc)
2974 usage_with_options(timehist_usage, timehist_options);
2975 }
2976 if (sched.show_wakeups && sched.summary_only) {
2977 pr_err(" Error: -s and -w are mutually exclusive.\n");
2978 parse_options_usage(timehist_usage, timehist_options, "s", true);
2979 parse_options_usage(NULL, timehist_options, "w", true);
2980 return -EINVAL;
2981 }
2982
2983 return perf_sched__timehist(&sched);
2080 } else { 2984 } else {
2081 usage_with_options(sched_usage, sched_options); 2985 usage_with_options(sched_usage, sched_options);
2082 } 2986 }
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index fe3af9535e85..3df4178ba378 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -130,7 +130,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
130 return err; 130 return err;
131 } 131 }
132 132
133 err = symbol__disassemble(sym, map, 0); 133 err = symbol__disassemble(sym, map, NULL, 0);
134 if (err == 0) { 134 if (err == 0) {
135out_assign: 135out_assign:
136 top->sym_filter_entry = he; 136 top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 4c18271c71c9..e6e9f7d80dbd 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
1050 (nr_pcnt - 1); 1050 (nr_pcnt - 1);
1051 } 1051 }
1052 1052
1053 err = symbol__disassemble(sym, map, sizeof_bdl); 1053 err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
1054 if (err) { 1054 if (err) {
1055 char msg[BUFSIZ]; 1055 char msg[BUFSIZ];
1056 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); 1056 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 42d319927762..8c9308ac30b7 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
167 if (map->dso->annotate_warned) 167 if (map->dso->annotate_warned)
168 return -1; 168 return -1;
169 169
170 err = symbol__disassemble(sym, map, 0); 170 err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
171 if (err) { 171 if (err) {
172 char msg[BUFSIZ]; 172 char msg[BUFSIZ];
173 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); 173 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index aeb5a441bd74..095d90a9077f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -18,17 +18,61 @@
18#include "annotate.h" 18#include "annotate.h"
19#include "evsel.h" 19#include "evsel.h"
20#include "block-range.h" 20#include "block-range.h"
21#include "arch/common.h"
21#include <regex.h> 22#include <regex.h>
22#include <pthread.h> 23#include <pthread.h>
23#include <linux/bitops.h> 24#include <linux/bitops.h>
25#include <sys/utsname.h>
24 26
25const char *disassembler_style; 27const char *disassembler_style;
26const char *objdump_path; 28const char *objdump_path;
27static regex_t file_lineno; 29static regex_t file_lineno;
28 30
29static struct ins *ins__find(const char *name); 31static struct ins *ins__find(struct arch *arch, const char *name);
30static int disasm_line__parse(char *line, char **namep, char **rawp); 32static int disasm_line__parse(char *line, char **namep, char **rawp);
31 33
34struct arch {
35 const char *name;
36 struct ins *instructions;
37 size_t nr_instructions;
38 bool sorted_instructions;
39 struct {
40 char comment_char;
41 char skip_functions_char;
42 } objdump;
43};
44
45static struct ins_ops call_ops;
46static struct ins_ops dec_ops;
47static struct ins_ops jump_ops;
48static struct ins_ops mov_ops;
49static struct ins_ops nop_ops;
50static struct ins_ops lock_ops;
51static struct ins_ops ret_ops;
52
53#include "arch/arm/annotate/instructions.c"
54#include "arch/x86/annotate/instructions.c"
55
56static struct arch architectures[] = {
57 {
58 .name = "arm",
59 .instructions = arm__instructions,
60 .nr_instructions = ARRAY_SIZE(arm__instructions),
61 .objdump = {
62 .comment_char = ';',
63 .skip_functions_char = '+',
64 },
65 },
66 {
67 .name = "x86",
68 .instructions = x86__instructions,
69 .nr_instructions = ARRAY_SIZE(x86__instructions),
70 .objdump = {
71 .comment_char = '#',
72 },
73 },
74};
75
32static void ins__delete(struct ins_operands *ops) 76static void ins__delete(struct ins_operands *ops)
33{ 77{
34 if (ops == NULL) 78 if (ops == NULL)
@@ -54,7 +98,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
54 return ins__raw_scnprintf(ins, bf, size, ops); 98 return ins__raw_scnprintf(ins, bf, size, ops);
55} 99}
56 100
57static int call__parse(struct ins_operands *ops, struct map *map) 101static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
58{ 102{
59 char *endptr, *tok, *name; 103 char *endptr, *tok, *name;
60 104
@@ -66,10 +110,9 @@ static int call__parse(struct ins_operands *ops, struct map *map)
66 110
67 name++; 111 name++;
68 112
69#ifdef __arm__ 113 if (arch->objdump.skip_functions_char &&
70 if (strchr(name, '+')) 114 strchr(name, arch->objdump.skip_functions_char))
71 return -1; 115 return -1;
72#endif
73 116
74 tok = strchr(name, '>'); 117 tok = strchr(name, '>');
75 if (tok == NULL) 118 if (tok == NULL)
@@ -118,7 +161,7 @@ bool ins__is_call(const struct ins *ins)
118 return ins->ops == &call_ops; 161 return ins->ops == &call_ops;
119} 162}
120 163
121static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused) 164static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
122{ 165{
123 const char *s = strchr(ops->raw, '+'); 166 const char *s = strchr(ops->raw, '+');
124 167
@@ -173,7 +216,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
173 return 0; 216 return 0;
174} 217}
175 218
176static int lock__parse(struct ins_operands *ops, struct map *map) 219static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map)
177{ 220{
178 char *name; 221 char *name;
179 222
@@ -184,7 +227,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map)
184 if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) 227 if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0)
185 goto out_free_ops; 228 goto out_free_ops;
186 229
187 ops->locked.ins = ins__find(name); 230 ops->locked.ins = ins__find(arch, name);
188 free(name); 231 free(name);
189 232
190 if (ops->locked.ins == NULL) 233 if (ops->locked.ins == NULL)
@@ -194,7 +237,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map)
194 return 0; 237 return 0;
195 238
196 if (ops->locked.ins->ops->parse && 239 if (ops->locked.ins->ops->parse &&
197 ops->locked.ins->ops->parse(ops->locked.ops, map) < 0) 240 ops->locked.ins->ops->parse(arch, ops->locked.ops, map) < 0)
198 goto out_free_ops; 241 goto out_free_ops;
199 242
200 return 0; 243 return 0;
@@ -237,7 +280,7 @@ static struct ins_ops lock_ops = {
237 .scnprintf = lock__scnprintf, 280 .scnprintf = lock__scnprintf,
238}; 281};
239 282
240static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) 283static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused)
241{ 284{
242 char *s = strchr(ops->raw, ','), *target, *comment, prev; 285 char *s = strchr(ops->raw, ','), *target, *comment, prev;
243 286
@@ -252,11 +295,7 @@ static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
252 return -1; 295 return -1;
253 296
254 target = ++s; 297 target = ++s;
255#ifdef __arm__ 298 comment = strchr(s, arch->objdump.comment_char);
256 comment = strchr(s, ';');
257#else
258 comment = strchr(s, '#');
259#endif
260 299
261 if (comment != NULL) 300 if (comment != NULL)
262 s = comment - 1; 301 s = comment - 1;
@@ -304,7 +343,7 @@ static struct ins_ops mov_ops = {
304 .scnprintf = mov__scnprintf, 343 .scnprintf = mov__scnprintf,
305}; 344};
306 345
307static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused) 346static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused)
308{ 347{
309 char *target, *comment, *s, prev; 348 char *target, *comment, *s, prev;
310 349
@@ -364,99 +403,6 @@ bool ins__is_ret(const struct ins *ins)
364 return ins->ops == &ret_ops; 403 return ins->ops == &ret_ops;
365} 404}
366 405
367static struct ins instructions[] = {
368 { .name = "add", .ops = &mov_ops, },
369 { .name = "addl", .ops = &mov_ops, },
370 { .name = "addq", .ops = &mov_ops, },
371 { .name = "addw", .ops = &mov_ops, },
372 { .name = "and", .ops = &mov_ops, },
373#ifdef __arm__
374 { .name = "b", .ops = &jump_ops, }, // might also be a call
375 { .name = "bcc", .ops = &jump_ops, },
376 { .name = "bcs", .ops = &jump_ops, },
377 { .name = "beq", .ops = &jump_ops, },
378 { .name = "bge", .ops = &jump_ops, },
379 { .name = "bgt", .ops = &jump_ops, },
380 { .name = "bhi", .ops = &jump_ops, },
381 { .name = "bl", .ops = &call_ops, },
382 { .name = "bls", .ops = &jump_ops, },
383 { .name = "blt", .ops = &jump_ops, },
384 { .name = "blx", .ops = &call_ops, },
385 { .name = "bne", .ops = &jump_ops, },
386#endif
387 { .name = "bts", .ops = &mov_ops, },
388 { .name = "call", .ops = &call_ops, },
389 { .name = "callq", .ops = &call_ops, },
390 { .name = "cmp", .ops = &mov_ops, },
391 { .name = "cmpb", .ops = &mov_ops, },
392 { .name = "cmpl", .ops = &mov_ops, },
393 { .name = "cmpq", .ops = &mov_ops, },
394 { .name = "cmpw", .ops = &mov_ops, },
395 { .name = "cmpxch", .ops = &mov_ops, },
396 { .name = "dec", .ops = &dec_ops, },
397 { .name = "decl", .ops = &dec_ops, },
398 { .name = "imul", .ops = &mov_ops, },
399 { .name = "inc", .ops = &dec_ops, },
400 { .name = "incl", .ops = &dec_ops, },
401 { .name = "ja", .ops = &jump_ops, },
402 { .name = "jae", .ops = &jump_ops, },
403 { .name = "jb", .ops = &jump_ops, },
404 { .name = "jbe", .ops = &jump_ops, },
405 { .name = "jc", .ops = &jump_ops, },
406 { .name = "jcxz", .ops = &jump_ops, },
407 { .name = "je", .ops = &jump_ops, },
408 { .name = "jecxz", .ops = &jump_ops, },
409 { .name = "jg", .ops = &jump_ops, },
410 { .name = "jge", .ops = &jump_ops, },
411 { .name = "jl", .ops = &jump_ops, },
412 { .name = "jle", .ops = &jump_ops, },
413 { .name = "jmp", .ops = &jump_ops, },
414 { .name = "jmpq", .ops = &jump_ops, },
415 { .name = "jna", .ops = &jump_ops, },
416 { .name = "jnae", .ops = &jump_ops, },
417 { .name = "jnb", .ops = &jump_ops, },
418 { .name = "jnbe", .ops = &jump_ops, },
419 { .name = "jnc", .ops = &jump_ops, },
420 { .name = "jne", .ops = &jump_ops, },
421 { .name = "jng", .ops = &jump_ops, },
422 { .name = "jnge", .ops = &jump_ops, },
423 { .name = "jnl", .ops = &jump_ops, },
424 { .name = "jnle", .ops = &jump_ops, },
425 { .name = "jno", .ops = &jump_ops, },
426 { .name = "jnp", .ops = &jump_ops, },
427 { .name = "jns", .ops = &jump_ops, },
428 { .name = "jnz", .ops = &jump_ops, },
429 { .name = "jo", .ops = &jump_ops, },
430 { .name = "jp", .ops = &jump_ops, },
431 { .name = "jpe", .ops = &jump_ops, },
432 { .name = "jpo", .ops = &jump_ops, },
433 { .name = "jrcxz", .ops = &jump_ops, },
434 { .name = "js", .ops = &jump_ops, },
435 { .name = "jz", .ops = &jump_ops, },
436 { .name = "lea", .ops = &mov_ops, },
437 { .name = "lock", .ops = &lock_ops, },
438 { .name = "mov", .ops = &mov_ops, },
439 { .name = "movb", .ops = &mov_ops, },
440 { .name = "movdqa",.ops = &mov_ops, },
441 { .name = "movl", .ops = &mov_ops, },
442 { .name = "movq", .ops = &mov_ops, },
443 { .name = "movslq", .ops = &mov_ops, },
444 { .name = "movzbl", .ops = &mov_ops, },
445 { .name = "movzwl", .ops = &mov_ops, },
446 { .name = "nop", .ops = &nop_ops, },
447 { .name = "nopl", .ops = &nop_ops, },
448 { .name = "nopw", .ops = &nop_ops, },
449 { .name = "or", .ops = &mov_ops, },
450 { .name = "orl", .ops = &mov_ops, },
451 { .name = "test", .ops = &mov_ops, },
452 { .name = "testb", .ops = &mov_ops, },
453 { .name = "testl", .ops = &mov_ops, },
454 { .name = "xadd", .ops = &mov_ops, },
455 { .name = "xbeginl", .ops = &jump_ops, },
456 { .name = "xbeginq", .ops = &jump_ops, },
457 { .name = "retq", .ops = &ret_ops, },
458};
459
460static int ins__key_cmp(const void *name, const void *insp) 406static int ins__key_cmp(const void *name, const void *insp)
461{ 407{
462 const struct ins *ins = insp; 408 const struct ins *ins = insp;
@@ -472,24 +418,58 @@ static int ins__cmp(const void *a, const void *b)
472 return strcmp(ia->name, ib->name); 418 return strcmp(ia->name, ib->name);
473} 419}
474 420
475static void ins__sort(void) 421static void ins__sort(struct arch *arch)
476{ 422{
477 const int nmemb = ARRAY_SIZE(instructions); 423 const int nmemb = arch->nr_instructions;
478 424
479 qsort(instructions, nmemb, sizeof(struct ins), ins__cmp); 425 qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
480} 426}
481 427
482static struct ins *ins__find(const char *name) 428static struct ins *ins__find(struct arch *arch, const char *name)
483{ 429{
484 const int nmemb = ARRAY_SIZE(instructions); 430 const int nmemb = arch->nr_instructions;
431
432 if (!arch->sorted_instructions) {
433 ins__sort(arch);
434 arch->sorted_instructions = true;
435 }
436
437 return bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
438}
439
440static int arch__key_cmp(const void *name, const void *archp)
441{
442 const struct arch *arch = archp;
443
444 return strcmp(name, arch->name);
445}
446
447static int arch__cmp(const void *a, const void *b)
448{
449 const struct arch *aa = a;
450 const struct arch *ab = b;
451
452 return strcmp(aa->name, ab->name);
453}
454
455static void arch__sort(void)
456{
457 const int nmemb = ARRAY_SIZE(architectures);
458
459 qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
460}
461
462static struct arch *arch__find(const char *name)
463{
464 const int nmemb = ARRAY_SIZE(architectures);
485 static bool sorted; 465 static bool sorted;
486 466
487 if (!sorted) { 467 if (!sorted) {
488 ins__sort(); 468 arch__sort();
489 sorted = true; 469 sorted = true;
490 } 470 }
491 471
492 return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp); 472 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
493} 473}
494 474
495int symbol__alloc_hist(struct symbol *sym) 475int symbol__alloc_hist(struct symbol *sym)
@@ -709,9 +689,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
709 return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); 689 return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
710} 690}
711 691
712static void disasm_line__init_ins(struct disasm_line *dl, struct map *map) 692static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map)
713{ 693{
714 dl->ins = ins__find(dl->name); 694 dl->ins = ins__find(arch, dl->name);
715 695
716 if (dl->ins == NULL) 696 if (dl->ins == NULL)
717 return; 697 return;
@@ -719,7 +699,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
719 if (!dl->ins->ops) 699 if (!dl->ins->ops)
720 return; 700 return;
721 701
722 if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0) 702 if (dl->ins->ops->parse && dl->ins->ops->parse(arch, &dl->ops, map) < 0)
723 dl->ins = NULL; 703 dl->ins = NULL;
724} 704}
725 705
@@ -762,6 +742,7 @@ out_free_name:
762 742
763static struct disasm_line *disasm_line__new(s64 offset, char *line, 743static struct disasm_line *disasm_line__new(s64 offset, char *line,
764 size_t privsize, int line_nr, 744 size_t privsize, int line_nr,
745 struct arch *arch,
765 struct map *map) 746 struct map *map)
766{ 747{
767 struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); 748 struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
@@ -777,7 +758,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line,
777 if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) 758 if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
778 goto out_free_line; 759 goto out_free_line;
779 760
780 disasm_line__init_ins(dl, map); 761 disasm_line__init_ins(dl, arch, map);
781 } 762 }
782 } 763 }
783 764
@@ -1087,6 +1068,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
1087 * The ops.raw part will be parsed further according to type of the instruction. 1068 * The ops.raw part will be parsed further according to type of the instruction.
1088 */ 1069 */
1089static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, 1070static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
1071 struct arch *arch,
1090 FILE *file, size_t privsize, 1072 FILE *file, size_t privsize,
1091 int *line_nr) 1073 int *line_nr)
1092{ 1074{
@@ -1149,7 +1131,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
1149 parsed_line = tmp2 + 1; 1131 parsed_line = tmp2 + 1;
1150 } 1132 }
1151 1133
1152 dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map); 1134 dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map);
1153 free(line); 1135 free(line);
1154 (*line_nr)++; 1136 (*line_nr)++;
1155 1137
@@ -1280,10 +1262,23 @@ fallback:
1280 return 0; 1262 return 0;
1281} 1263}
1282 1264
1283int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) 1265static const char *annotate__norm_arch(const char *arch_name)
1266{
1267 struct utsname uts;
1268
1269 if (!arch_name) { /* Assume we are annotating locally. */
1270 if (uname(&uts) < 0)
1271 return NULL;
1272 arch_name = uts.machine;
1273 }
1274 return normalize_arch((char *)arch_name);
1275}
1276
1277int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
1284{ 1278{
1285 struct dso *dso = map->dso; 1279 struct dso *dso = map->dso;
1286 char command[PATH_MAX * 2]; 1280 char command[PATH_MAX * 2];
1281 struct arch *arch = NULL;
1287 FILE *file; 1282 FILE *file;
1288 char symfs_filename[PATH_MAX]; 1283 char symfs_filename[PATH_MAX];
1289 struct kcore_extract kce; 1284 struct kcore_extract kce;
@@ -1297,6 +1292,14 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
1297 if (err) 1292 if (err)
1298 return err; 1293 return err;
1299 1294
1295 arch_name = annotate__norm_arch(arch_name);
1296 if (!arch_name)
1297 return -1;
1298
1299 arch = arch__find(arch_name);
1300 if (arch == NULL)
1301 return -ENOTSUP;
1302
1300 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1303 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
1301 symfs_filename, sym->name, map->unmap_ip(map, sym->start), 1304 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
1302 map->unmap_ip(map, sym->end)); 1305 map->unmap_ip(map, sym->end));
@@ -1395,7 +1398,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
1395 1398
1396 nline = 0; 1399 nline = 0;
1397 while (!feof(file)) { 1400 while (!feof(file)) {
1398 if (symbol__parse_objdump_line(sym, map, file, privsize, 1401 if (symbol__parse_objdump_line(sym, map, arch, file, privsize,
1399 &lineno) < 0) 1402 &lineno) < 0)
1400 break; 1403 break;
1401 nline++; 1404 nline++;
@@ -1793,7 +1796,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
1793 struct rb_root source_line = RB_ROOT; 1796 struct rb_root source_line = RB_ROOT;
1794 u64 len; 1797 u64 len;
1795 1798
1796 if (symbol__disassemble(sym, map, 0) < 0) 1799 if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
1797 return -1; 1800 return -1;
1798 1801
1799 len = symbol__size(sym); 1802 len = symbol__size(sym);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 5bbcec173b82..8e490b5c91bc 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -34,9 +34,11 @@ struct ins_operands {
34 }; 34 };
35}; 35};
36 36
37struct arch;
38
37struct ins_ops { 39struct ins_ops {
38 void (*free)(struct ins_operands *ops); 40 void (*free)(struct ins_operands *ops);
39 int (*parse)(struct ins_operands *ops, struct map *map); 41 int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map);
40 int (*scnprintf)(struct ins *ins, char *bf, size_t size, 42 int (*scnprintf)(struct ins *ins, char *bf, size_t size,
41 struct ins_operands *ops); 43 struct ins_operands *ops);
42}; 44};
@@ -156,7 +158,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
156int symbol__alloc_hist(struct symbol *sym); 158int symbol__alloc_hist(struct symbol *sym);
157void symbol__annotate_zero_histograms(struct symbol *sym); 159void symbol__annotate_zero_histograms(struct symbol *sym);
158 160
159int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize); 161int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
160 162
161enum symbol_disassemble_errno { 163enum symbol_disassemble_errno {
162 SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, 164 SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e58a2fbf3b16..b2365a63db45 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1481,7 +1481,7 @@ retry_sample_id:
1481 1481
1482 group_fd = get_group_fd(evsel, cpu, thread); 1482 group_fd = get_group_fd(evsel, cpu, thread);
1483retry_open: 1483retry_open:
1484 pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", 1484 pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
1485 pid, cpus->map[cpu], group_fd, flags); 1485 pid, cpus->map[cpu], group_fd, flags);
1486 1486
1487 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 1487 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
@@ -1490,11 +1490,13 @@ retry_open:
1490 group_fd, flags); 1490 group_fd, flags);
1491 if (FD(evsel, cpu, thread) < 0) { 1491 if (FD(evsel, cpu, thread) < 0) {
1492 err = -errno; 1492 err = -errno;
1493 pr_debug2("sys_perf_event_open failed, error %d\n", 1493 pr_debug2("\nsys_perf_event_open failed, error %d\n",
1494 err); 1494 err);
1495 goto try_fallback; 1495 goto try_fallback;
1496 } 1496 }
1497 1497
1498 pr_debug2(" = %d\n", FD(evsel, cpu, thread));
1499
1498 if (evsel->bpf_fd >= 0) { 1500 if (evsel->bpf_fd >= 0) {
1499 int evt_fd = FD(evsel, cpu, thread); 1501 int evt_fd = FD(evsel, cpu, thread);
1500 int bpf_fd = evsel->bpf_fd; 1502 int bpf_fd = evsel->bpf_fd;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 8cd7cd227483..27fa3a343577 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -391,6 +391,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
391#define EVSEL__PRINT_ONELINE (1<<4) 391#define EVSEL__PRINT_ONELINE (1<<4)
392#define EVSEL__PRINT_SRCLINE (1<<5) 392#define EVSEL__PRINT_SRCLINE (1<<5)
393#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) 393#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6)
394#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7)
394 395
395struct callchain_cursor; 396struct callchain_cursor;
396 397
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 662a0a6182e7..53bb614feafb 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -108,7 +108,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
108 int print_oneline = print_opts & EVSEL__PRINT_ONELINE; 108 int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
109 int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; 109 int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
110 int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; 110 int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
111 int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
111 char s = print_oneline ? ' ' : '\t'; 112 char s = print_oneline ? ' ' : '\t';
113 bool first = true;
112 114
113 if (sample->callchain) { 115 if (sample->callchain) {
114 struct addr_location node_al; 116 struct addr_location node_al;
@@ -124,6 +126,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
124 126
125 printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); 127 printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
126 128
129 if (print_arrow && !first)
130 printed += fprintf(fp, " <-");
131
127 if (print_ip) 132 if (print_ip)
128 printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); 133 printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
129 134
@@ -137,7 +142,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
137 142
138 if (print_symoffset) { 143 if (print_symoffset) {
139 printed += __symbol__fprintf_symname_offs(node->sym, &node_al, 144 printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
140 print_unknown_as_addr, fp); 145 print_unknown_as_addr,
146 true, fp);
141 } else { 147 } else {
142 printed += __symbol__fprintf_symname(node->sym, &node_al, 148 printed += __symbol__fprintf_symname(node->sym, &node_al,
143 print_unknown_as_addr, fp); 149 print_unknown_as_addr, fp);
@@ -157,6 +163,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
157 printed += fprintf(fp, "\n"); 163 printed += fprintf(fp, "\n");
158 164
159 callchain_cursor_advance(cursor); 165 callchain_cursor_advance(cursor);
166 first = false;
160 } 167 }
161 } 168 }
162 169
@@ -188,7 +195,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
188 printed += fprintf(fp, " "); 195 printed += fprintf(fp, " ");
189 if (print_symoffset) { 196 if (print_symoffset) {
190 printed += __symbol__fprintf_symname_offs(al->sym, al, 197 printed += __symbol__fprintf_symname_offs(al->sym, al,
191 print_unknown_as_addr, fp); 198 print_unknown_as_addr,
199 true, fp);
192 } else { 200 } else {
193 printed += __symbol__fprintf_symname(al->sym, al, 201 printed += __symbol__fprintf_symname(al->sym, al,
194 print_unknown_as_addr, fp); 202 print_unknown_as_addr, fp);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index e50773286ef6..1d4ab53c60ca 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -280,6 +280,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
280 u64 lock = data_src->mem_lock; 280 u64 lock = data_src->mem_lock;
281 int err = 0; 281 int err = 0;
282 282
283#define HITM_INC(__f) \
284do { \
285 stats->__f++; \
286 stats->tot_hitm++; \
287} while (0)
288
283#define P(a, b) PERF_MEM_##a##_##b 289#define P(a, b) PERF_MEM_##a##_##b
284 290
285 stats->nr_entries++; 291 stats->nr_entries++;
@@ -303,7 +309,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
303 if (lvl & P(LVL, L2 )) stats->ld_l2hit++; 309 if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
304 if (lvl & P(LVL, L3 )) { 310 if (lvl & P(LVL, L3 )) {
305 if (snoop & P(SNOOP, HITM)) 311 if (snoop & P(SNOOP, HITM))
306 stats->lcl_hitm++; 312 HITM_INC(lcl_hitm);
307 else 313 else
308 stats->ld_llchit++; 314 stats->ld_llchit++;
309 } 315 }
@@ -331,7 +337,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
331 if (snoop & P(SNOOP, HIT)) 337 if (snoop & P(SNOOP, HIT))
332 stats->rmt_hit++; 338 stats->rmt_hit++;
333 else if (snoop & P(SNOOP, HITM)) 339 else if (snoop & P(SNOOP, HITM))
334 stats->rmt_hitm++; 340 HITM_INC(rmt_hitm);
335 } 341 }
336 342
337 if ((lvl & P(LVL, MISS))) 343 if ((lvl & P(LVL, MISS)))
@@ -364,6 +370,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
364 } 370 }
365 371
366#undef P 372#undef P
373#undef HITM_INC
367 return err; 374 return err;
368} 375}
369 376
@@ -390,6 +397,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
390 stats->ld_llchit += add->ld_llchit; 397 stats->ld_llchit += add->ld_llchit;
391 stats->lcl_hitm += add->lcl_hitm; 398 stats->lcl_hitm += add->lcl_hitm;
392 stats->rmt_hitm += add->rmt_hitm; 399 stats->rmt_hitm += add->rmt_hitm;
400 stats->tot_hitm += add->tot_hitm;
393 stats->rmt_hit += add->rmt_hit; 401 stats->rmt_hit += add->rmt_hit;
394 stats->lcl_dram += add->lcl_dram; 402 stats->lcl_dram += add->lcl_dram;
395 stats->rmt_dram += add->rmt_dram; 403 stats->rmt_dram += add->rmt_dram;
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index faf80403b519..40f72ee4f42a 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -59,6 +59,7 @@ struct c2c_stats {
59 u32 ld_llchit; /* count of loads that hit LLC */ 59 u32 ld_llchit; /* count of loads that hit LLC */
60 u32 lcl_hitm; /* count of loads with local HITM */ 60 u32 lcl_hitm; /* count of loads with local HITM */
61 u32 rmt_hitm; /* count of loads with remote HITM */ 61 u32 rmt_hitm; /* count of loads with remote HITM */
62 u32 tot_hitm; /* count of loads with local and remote HITM */
62 u32 rmt_hit; /* count of loads with remote hit clean; */ 63 u32 rmt_hit; /* count of loads with remote hit clean; */
63 u32 lcl_dram; /* count of loads miss to local DRAM */ 64 u32 lcl_dram; /* count of loads miss to local DRAM */
64 u32 rmt_dram; /* count of loads miss to remote DRAM */ 65 u32 rmt_dram; /* count of loads miss to remote DRAM */
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2d0a905c879a..dec7e2d44885 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -282,7 +282,8 @@ int symbol__annotation_init(void);
282struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); 282struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
283size_t __symbol__fprintf_symname_offs(const struct symbol *sym, 283size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
284 const struct addr_location *al, 284 const struct addr_location *al,
285 bool unknown_as_addr, FILE *fp); 285 bool unknown_as_addr,
286 bool print_offsets, FILE *fp);
286size_t symbol__fprintf_symname_offs(const struct symbol *sym, 287size_t symbol__fprintf_symname_offs(const struct symbol *sym,
287 const struct addr_location *al, FILE *fp); 288 const struct addr_location *al, FILE *fp);
288size_t __symbol__fprintf_symname(const struct symbol *sym, 289size_t __symbol__fprintf_symname(const struct symbol *sym,
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index a680bdaa65dc..7c6b33e8e2d2 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -15,14 +15,15 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp)
15 15
16size_t __symbol__fprintf_symname_offs(const struct symbol *sym, 16size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
17 const struct addr_location *al, 17 const struct addr_location *al,
18 bool unknown_as_addr, FILE *fp) 18 bool unknown_as_addr,
19 bool print_offsets, FILE *fp)
19{ 20{
20 unsigned long offset; 21 unsigned long offset;
21 size_t length; 22 size_t length;
22 23
23 if (sym && sym->name) { 24 if (sym && sym->name) {
24 length = fprintf(fp, "%s", sym->name); 25 length = fprintf(fp, "%s", sym->name);
25 if (al) { 26 if (al && print_offsets) {
26 if (al->addr < sym->end) 27 if (al->addr < sym->end)
27 offset = al->addr - sym->start; 28 offset = al->addr - sym->start;
28 else 29 else
@@ -40,19 +41,19 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym,
40 const struct addr_location *al, 41 const struct addr_location *al,
41 FILE *fp) 42 FILE *fp)
42{ 43{
43 return __symbol__fprintf_symname_offs(sym, al, false, fp); 44 return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
44} 45}
45 46
46size_t __symbol__fprintf_symname(const struct symbol *sym, 47size_t __symbol__fprintf_symname(const struct symbol *sym,
47 const struct addr_location *al, 48 const struct addr_location *al,
48 bool unknown_as_addr, FILE *fp) 49 bool unknown_as_addr, FILE *fp)
49{ 50{
50 return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp); 51 return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
51} 52}
52 53
53size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) 54size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
54{ 55{
55 return __symbol__fprintf_symname_offs(sym, NULL, false, fp); 56 return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
56} 57}
57 58
58size_t dso__fprintf_symbols_by_name(struct dso *dso, 59size_t dso__fprintf_symbols_by_name(struct dso *dso,