diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-11-23 23:09:31 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-11-23 23:09:31 -0500 |
commit | 47414424c53a70eceb0fc6e0a35a31a2b763d5b2 (patch) | |
tree | 07979aa784313ba03712df2b85a3b3f71f1733d6 /tools/perf | |
parent | 69e6cdd0cf16f645be39038e5ccc9379e3923d00 (diff) | |
parent | a407b0678bc1c39d70af5fdbe6421c164b69a8c0 (diff) |
Merge tag 'perf-core-for-mingo-20161123' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
New tool:
- 'perf sched timehist' provides an analysis of scheduling events.
Example usage:
perf sched record -- sleep 1
perf sched timehist
By default it shows the individual schedule events, including the wait
time (time between sched-out and next sched-in events for the task), the
task scheduling delay (time between wakeup and actually running) and run
time for the task:
time cpu task name wait time sch delay run time
[tid/pid] (msec) (msec) (msec)
-------- ------ ---------------- --------- --------- --------
1.874569 [0011] gcc[31949] 0.014 0.000 1.148
1.874591 [0010] gcc[31951] 0.000 0.000 0.024
1.874603 [0010] migration/10[59] 3.350 0.004 0.011
1.874604 [0011] <idle> 1.148 0.000 0.035
1.874723 [0005] <idle> 0.016 0.000 1.383
1.874746 [0005] gcc[31949] 0.153 0.078 0.022
...
Times are in msec.usec. (David Ahern, Namhyung Kim)
Improvements:
- Make 'perf c2c report' support -f/--force, to allow skipping the
ownership check for root users, for instance, just like the other
tools (Jiri Olsa)
- Allow sorting cachelines by total number of HITMs, in addition to
local and remote numbers (Jiri Olsa)
Fixes:
- Make sure errors aren't suppressed by the TUI reset at the end of
a 'perf c2c report' session (Jiri Olsa)
Infrastructure changes:
- Initial work on having the annotate code better support multiple
architectures, including the ability to cross-annotate, i.e. to
annotate perf.data files collected on an ARM system on a x86_64
workstation (Arnaldo Carvalho de Melo, Ravi Bangoria, Kim Phillips)
- Use USECS_PER_SEC instead of hard coded number in libtraceevent (Steven Rostedt)
- Add retrieval of preempt count and latency flags in libtraceevent (Steven Rostedt)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-c2c.txt | 8 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-sched.txt | 66 | ||||
-rw-r--r-- | tools/perf/arch/arm/annotate/instructions.c | 90 | ||||
-rw-r--r-- | tools/perf/arch/x86/annotate/instructions.c | 78 | ||||
-rw-r--r-- | tools/perf/builtin-c2c.c | 80 | ||||
-rw-r--r-- | tools/perf/builtin-sched.c | 914 | ||||
-rw-r--r-- | tools/perf/builtin-top.c | 2 | ||||
-rw-r--r-- | tools/perf/ui/browsers/annotate.c | 2 | ||||
-rw-r--r-- | tools/perf/ui/gtk/annotate.c | 2 | ||||
-rw-r--r-- | tools/perf/util/annotate.c | 251 | ||||
-rw-r--r-- | tools/perf/util/annotate.h | 6 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 6 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel_fprintf.c | 12 | ||||
-rw-r--r-- | tools/perf/util/mem-events.c | 12 | ||||
-rw-r--r-- | tools/perf/util/mem-events.h | 1 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 3 | ||||
-rw-r--r-- | tools/perf/util/symbol_fprintf.c | 11 |
18 files changed, 1370 insertions, 175 deletions
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 21810d711f5f..3f06730c7f47 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt | |||
@@ -100,6 +100,14 @@ REPORT OPTIONS | |||
100 | --show-all:: | 100 | --show-all:: |
101 | Show all captured HITM lines, with no regard to HITM % 0.0005 limit. | 101 | Show all captured HITM lines, with no regard to HITM % 0.0005 limit. |
102 | 102 | ||
103 | -f:: | ||
104 | --force:: | ||
105 | Don't do ownership validation. | ||
106 | |||
107 | -d:: | ||
108 | --display:: | ||
109 | Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default. | ||
110 | |||
103 | C2C RECORD | 111 | C2C RECORD |
104 | ---------- | 112 | ---------- |
105 | The perf c2c record command setup options related to HITM cacheline analysis | 113 | The perf c2c record command setup options related to HITM cacheline analysis |
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 1cc08cc47ac5..fb9e52d65fca 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt | |||
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies) | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf sched' {record|latency|map|replay|script} | 11 | 'perf sched' {record|latency|map|replay|script|timehist} |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | There are five variants of perf sched: | 15 | There are several variants of 'perf sched': |
16 | 16 | ||
17 | 'perf sched record <command>' to record the scheduling events | 17 | 'perf sched record <command>' to record the scheduling events |
18 | of an arbitrary workload. | 18 | of an arbitrary workload. |
@@ -36,6 +36,30 @@ There are five variants of perf sched: | |||
36 | are running on a CPU. A '*' denotes the CPU that had the event, and | 36 | are running on a CPU. A '*' denotes the CPU that had the event, and |
37 | a dot signals an idle CPU. | 37 | a dot signals an idle CPU. |
38 | 38 | ||
39 | 'perf sched timehist' provides an analysis of scheduling events. | ||
40 | |||
41 | Example usage: | ||
42 | perf sched record -- sleep 1 | ||
43 | perf sched timehist | ||
44 | |||
45 | By default it shows the individual schedule events, including the wait | ||
46 | time (time between sched-out and next sched-in events for the task), the | ||
47 | task scheduling delay (time between wakeup and actually running) and run | ||
48 | time for the task: | ||
49 | |||
50 | time cpu task name wait time sch delay run time | ||
51 | [tid/pid] (msec) (msec) (msec) | ||
52 | -------------- ------ -------------------- --------- --------- --------- | ||
53 | 79371.874569 [0011] gcc[31949] 0.014 0.000 1.148 | ||
54 | 79371.874591 [0010] gcc[31951] 0.000 0.000 0.024 | ||
55 | 79371.874603 [0010] migration/10[59] 3.350 0.004 0.011 | ||
56 | 79371.874604 [0011] <idle> 1.148 0.000 0.035 | ||
57 | 79371.874723 [0005] <idle> 0.016 0.000 1.383 | ||
58 | 79371.874746 [0005] gcc[31949] 0.153 0.078 0.022 | ||
59 | ... | ||
60 | |||
61 | Times are in msec.usec. | ||
62 | |||
39 | OPTIONS | 63 | OPTIONS |
40 | ------- | 64 | ------- |
41 | -i:: | 65 | -i:: |
@@ -66,6 +90,44 @@ OPTIONS for 'perf sched map' | |||
66 | --color-pids:: | 90 | --color-pids:: |
67 | Highlight the given pids. | 91 | Highlight the given pids. |
68 | 92 | ||
93 | OPTIONS for 'perf sched timehist' | ||
94 | --------------------------------- | ||
95 | -k:: | ||
96 | --vmlinux=<file>:: | ||
97 | vmlinux pathname | ||
98 | |||
99 | --kallsyms=<file>:: | ||
100 | kallsyms pathname | ||
101 | |||
102 | -g:: | ||
103 | --no-call-graph:: | ||
104 | Do not display call chains if present. | ||
105 | |||
106 | --max-stack:: | ||
107 | Maximum number of functions to display in backtrace, default 5. | ||
108 | |||
109 | -s:: | ||
110 | --summary:: | ||
111 | Show only a summary of scheduling by thread with min, max, and average | ||
112 | run times (in sec) and relative stddev. | ||
113 | |||
114 | -S:: | ||
115 | --with-summary:: | ||
116 | Show all scheduling events followed by a summary by thread with min, | ||
117 | max, and average run times (in sec) and relative stddev. | ||
118 | |||
119 | --symfs=<directory>:: | ||
120 | Look for files with symbols relative to this directory. | ||
121 | |||
122 | -V:: | ||
123 | --cpu-visual:: | ||
124 | Show visual aid for sched switches by CPU: 'i' marks idle time, | ||
125 | 's' are scheduler events. | ||
126 | |||
127 | -w:: | ||
128 | --wakeups:: | ||
129 | Show wakeup events. | ||
130 | |||
69 | SEE ALSO | 131 | SEE ALSO |
70 | -------- | 132 | -------- |
71 | linkperf:perf-record[1] | 133 | linkperf:perf-record[1] |
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c new file mode 100644 index 000000000000..d67b8aa26274 --- /dev/null +++ b/tools/perf/arch/arm/annotate/instructions.c | |||
@@ -0,0 +1,90 @@ | |||
1 | static struct ins arm__instructions[] = { | ||
2 | { .name = "add", .ops = &mov_ops, }, | ||
3 | { .name = "addl", .ops = &mov_ops, }, | ||
4 | { .name = "addq", .ops = &mov_ops, }, | ||
5 | { .name = "addw", .ops = &mov_ops, }, | ||
6 | { .name = "and", .ops = &mov_ops, }, | ||
7 | { .name = "b", .ops = &jump_ops, }, // might also be a call | ||
8 | { .name = "bcc", .ops = &jump_ops, }, | ||
9 | { .name = "bcs", .ops = &jump_ops, }, | ||
10 | { .name = "beq", .ops = &jump_ops, }, | ||
11 | { .name = "bge", .ops = &jump_ops, }, | ||
12 | { .name = "bgt", .ops = &jump_ops, }, | ||
13 | { .name = "bhi", .ops = &jump_ops, }, | ||
14 | { .name = "bl", .ops = &call_ops, }, | ||
15 | { .name = "bls", .ops = &jump_ops, }, | ||
16 | { .name = "blt", .ops = &jump_ops, }, | ||
17 | { .name = "blx", .ops = &call_ops, }, | ||
18 | { .name = "bne", .ops = &jump_ops, }, | ||
19 | { .name = "bts", .ops = &mov_ops, }, | ||
20 | { .name = "call", .ops = &call_ops, }, | ||
21 | { .name = "callq", .ops = &call_ops, }, | ||
22 | { .name = "cmp", .ops = &mov_ops, }, | ||
23 | { .name = "cmpb", .ops = &mov_ops, }, | ||
24 | { .name = "cmpl", .ops = &mov_ops, }, | ||
25 | { .name = "cmpq", .ops = &mov_ops, }, | ||
26 | { .name = "cmpw", .ops = &mov_ops, }, | ||
27 | { .name = "cmpxch", .ops = &mov_ops, }, | ||
28 | { .name = "dec", .ops = &dec_ops, }, | ||
29 | { .name = "decl", .ops = &dec_ops, }, | ||
30 | { .name = "imul", .ops = &mov_ops, }, | ||
31 | { .name = "inc", .ops = &dec_ops, }, | ||
32 | { .name = "incl", .ops = &dec_ops, }, | ||
33 | { .name = "ja", .ops = &jump_ops, }, | ||
34 | { .name = "jae", .ops = &jump_ops, }, | ||
35 | { .name = "jb", .ops = &jump_ops, }, | ||
36 | { .name = "jbe", .ops = &jump_ops, }, | ||
37 | { .name = "jc", .ops = &jump_ops, }, | ||
38 | { .name = "jcxz", .ops = &jump_ops, }, | ||
39 | { .name = "je", .ops = &jump_ops, }, | ||
40 | { .name = "jecxz", .ops = &jump_ops, }, | ||
41 | { .name = "jg", .ops = &jump_ops, }, | ||
42 | { .name = "jge", .ops = &jump_ops, }, | ||
43 | { .name = "jl", .ops = &jump_ops, }, | ||
44 | { .name = "jle", .ops = &jump_ops, }, | ||
45 | { .name = "jmp", .ops = &jump_ops, }, | ||
46 | { .name = "jmpq", .ops = &jump_ops, }, | ||
47 | { .name = "jna", .ops = &jump_ops, }, | ||
48 | { .name = "jnae", .ops = &jump_ops, }, | ||
49 | { .name = "jnb", .ops = &jump_ops, }, | ||
50 | { .name = "jnbe", .ops = &jump_ops, }, | ||
51 | { .name = "jnc", .ops = &jump_ops, }, | ||
52 | { .name = "jne", .ops = &jump_ops, }, | ||
53 | { .name = "jng", .ops = &jump_ops, }, | ||
54 | { .name = "jnge", .ops = &jump_ops, }, | ||
55 | { .name = "jnl", .ops = &jump_ops, }, | ||
56 | { .name = "jnle", .ops = &jump_ops, }, | ||
57 | { .name = "jno", .ops = &jump_ops, }, | ||
58 | { .name = "jnp", .ops = &jump_ops, }, | ||
59 | { .name = "jns", .ops = &jump_ops, }, | ||
60 | { .name = "jnz", .ops = &jump_ops, }, | ||
61 | { .name = "jo", .ops = &jump_ops, }, | ||
62 | { .name = "jp", .ops = &jump_ops, }, | ||
63 | { .name = "jpe", .ops = &jump_ops, }, | ||
64 | { .name = "jpo", .ops = &jump_ops, }, | ||
65 | { .name = "jrcxz", .ops = &jump_ops, }, | ||
66 | { .name = "js", .ops = &jump_ops, }, | ||
67 | { .name = "jz", .ops = &jump_ops, }, | ||
68 | { .name = "lea", .ops = &mov_ops, }, | ||
69 | { .name = "lock", .ops = &lock_ops, }, | ||
70 | { .name = "mov", .ops = &mov_ops, }, | ||
71 | { .name = "movb", .ops = &mov_ops, }, | ||
72 | { .name = "movdqa", .ops = &mov_ops, }, | ||
73 | { .name = "movl", .ops = &mov_ops, }, | ||
74 | { .name = "movq", .ops = &mov_ops, }, | ||
75 | { .name = "movslq", .ops = &mov_ops, }, | ||
76 | { .name = "movzbl", .ops = &mov_ops, }, | ||
77 | { .name = "movzwl", .ops = &mov_ops, }, | ||
78 | { .name = "nop", .ops = &nop_ops, }, | ||
79 | { .name = "nopl", .ops = &nop_ops, }, | ||
80 | { .name = "nopw", .ops = &nop_ops, }, | ||
81 | { .name = "or", .ops = &mov_ops, }, | ||
82 | { .name = "orl", .ops = &mov_ops, }, | ||
83 | { .name = "test", .ops = &mov_ops, }, | ||
84 | { .name = "testb", .ops = &mov_ops, }, | ||
85 | { .name = "testl", .ops = &mov_ops, }, | ||
86 | { .name = "xadd", .ops = &mov_ops, }, | ||
87 | { .name = "xbeginl", .ops = &jump_ops, }, | ||
88 | { .name = "xbeginq", .ops = &jump_ops, }, | ||
89 | { .name = "retq", .ops = &ret_ops, }, | ||
90 | }; | ||
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c new file mode 100644 index 000000000000..c1625f256df3 --- /dev/null +++ b/tools/perf/arch/x86/annotate/instructions.c | |||
@@ -0,0 +1,78 @@ | |||
1 | static struct ins x86__instructions[] = { | ||
2 | { .name = "add", .ops = &mov_ops, }, | ||
3 | { .name = "addl", .ops = &mov_ops, }, | ||
4 | { .name = "addq", .ops = &mov_ops, }, | ||
5 | { .name = "addw", .ops = &mov_ops, }, | ||
6 | { .name = "and", .ops = &mov_ops, }, | ||
7 | { .name = "bts", .ops = &mov_ops, }, | ||
8 | { .name = "call", .ops = &call_ops, }, | ||
9 | { .name = "callq", .ops = &call_ops, }, | ||
10 | { .name = "cmp", .ops = &mov_ops, }, | ||
11 | { .name = "cmpb", .ops = &mov_ops, }, | ||
12 | { .name = "cmpl", .ops = &mov_ops, }, | ||
13 | { .name = "cmpq", .ops = &mov_ops, }, | ||
14 | { .name = "cmpw", .ops = &mov_ops, }, | ||
15 | { .name = "cmpxch", .ops = &mov_ops, }, | ||
16 | { .name = "dec", .ops = &dec_ops, }, | ||
17 | { .name = "decl", .ops = &dec_ops, }, | ||
18 | { .name = "imul", .ops = &mov_ops, }, | ||
19 | { .name = "inc", .ops = &dec_ops, }, | ||
20 | { .name = "incl", .ops = &dec_ops, }, | ||
21 | { .name = "ja", .ops = &jump_ops, }, | ||
22 | { .name = "jae", .ops = &jump_ops, }, | ||
23 | { .name = "jb", .ops = &jump_ops, }, | ||
24 | { .name = "jbe", .ops = &jump_ops, }, | ||
25 | { .name = "jc", .ops = &jump_ops, }, | ||
26 | { .name = "jcxz", .ops = &jump_ops, }, | ||
27 | { .name = "je", .ops = &jump_ops, }, | ||
28 | { .name = "jecxz", .ops = &jump_ops, }, | ||
29 | { .name = "jg", .ops = &jump_ops, }, | ||
30 | { .name = "jge", .ops = &jump_ops, }, | ||
31 | { .name = "jl", .ops = &jump_ops, }, | ||
32 | { .name = "jle", .ops = &jump_ops, }, | ||
33 | { .name = "jmp", .ops = &jump_ops, }, | ||
34 | { .name = "jmpq", .ops = &jump_ops, }, | ||
35 | { .name = "jna", .ops = &jump_ops, }, | ||
36 | { .name = "jnae", .ops = &jump_ops, }, | ||
37 | { .name = "jnb", .ops = &jump_ops, }, | ||
38 | { .name = "jnbe", .ops = &jump_ops, }, | ||
39 | { .name = "jnc", .ops = &jump_ops, }, | ||
40 | { .name = "jne", .ops = &jump_ops, }, | ||
41 | { .name = "jng", .ops = &jump_ops, }, | ||
42 | { .name = "jnge", .ops = &jump_ops, }, | ||
43 | { .name = "jnl", .ops = &jump_ops, }, | ||
44 | { .name = "jnle", .ops = &jump_ops, }, | ||
45 | { .name = "jno", .ops = &jump_ops, }, | ||
46 | { .name = "jnp", .ops = &jump_ops, }, | ||
47 | { .name = "jns", .ops = &jump_ops, }, | ||
48 | { .name = "jnz", .ops = &jump_ops, }, | ||
49 | { .name = "jo", .ops = &jump_ops, }, | ||
50 | { .name = "jp", .ops = &jump_ops, }, | ||
51 | { .name = "jpe", .ops = &jump_ops, }, | ||
52 | { .name = "jpo", .ops = &jump_ops, }, | ||
53 | { .name = "jrcxz", .ops = &jump_ops, }, | ||
54 | { .name = "js", .ops = &jump_ops, }, | ||
55 | { .name = "jz", .ops = &jump_ops, }, | ||
56 | { .name = "lea", .ops = &mov_ops, }, | ||
57 | { .name = "lock", .ops = &lock_ops, }, | ||
58 | { .name = "mov", .ops = &mov_ops, }, | ||
59 | { .name = "movb", .ops = &mov_ops, }, | ||
60 | { .name = "movdqa", .ops = &mov_ops, }, | ||
61 | { .name = "movl", .ops = &mov_ops, }, | ||
62 | { .name = "movq", .ops = &mov_ops, }, | ||
63 | { .name = "movslq", .ops = &mov_ops, }, | ||
64 | { .name = "movzbl", .ops = &mov_ops, }, | ||
65 | { .name = "movzwl", .ops = &mov_ops, }, | ||
66 | { .name = "nop", .ops = &nop_ops, }, | ||
67 | { .name = "nopl", .ops = &nop_ops, }, | ||
68 | { .name = "nopw", .ops = &nop_ops, }, | ||
69 | { .name = "or", .ops = &mov_ops, }, | ||
70 | { .name = "orl", .ops = &mov_ops, }, | ||
71 | { .name = "test", .ops = &mov_ops, }, | ||
72 | { .name = "testb", .ops = &mov_ops, }, | ||
73 | { .name = "testl", .ops = &mov_ops, }, | ||
74 | { .name = "xadd", .ops = &mov_ops, }, | ||
75 | { .name = "xbeginl", .ops = &jump_ops, }, | ||
76 | { .name = "xbeginq", .ops = &jump_ops, }, | ||
77 | { .name = "retq", .ops = &ret_ops, }, | ||
78 | }; | ||
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c6d0dda594d9..4b419631753d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c | |||
@@ -91,6 +91,19 @@ struct perf_c2c { | |||
91 | enum { | 91 | enum { |
92 | DISPLAY_LCL, | 92 | DISPLAY_LCL, |
93 | DISPLAY_RMT, | 93 | DISPLAY_RMT, |
94 | DISPLAY_TOT, | ||
95 | DISPLAY_MAX, | ||
96 | }; | ||
97 | |||
98 | static const char *display_str[DISPLAY_MAX] = { | ||
99 | [DISPLAY_LCL] = "Local", | ||
100 | [DISPLAY_RMT] = "Remote", | ||
101 | [DISPLAY_TOT] = "Total", | ||
102 | }; | ||
103 | |||
104 | static const struct option c2c_options[] = { | ||
105 | OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), | ||
106 | OPT_END() | ||
94 | }; | 107 | }; |
95 | 108 | ||
96 | static struct perf_c2c c2c; | 109 | static struct perf_c2c c2c; |
@@ -745,6 +758,10 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he) | |||
745 | case DISPLAY_LCL: | 758 | case DISPLAY_LCL: |
746 | st = stats->lcl_hitm; | 759 | st = stats->lcl_hitm; |
747 | tot = total->lcl_hitm; | 760 | tot = total->lcl_hitm; |
761 | break; | ||
762 | case DISPLAY_TOT: | ||
763 | st = stats->tot_hitm; | ||
764 | tot = total->tot_hitm; | ||
748 | default: | 765 | default: |
749 | break; | 766 | break; |
750 | } | 767 | } |
@@ -1044,6 +1061,9 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, | |||
1044 | break; | 1061 | break; |
1045 | case DISPLAY_LCL: | 1062 | case DISPLAY_LCL: |
1046 | DISPLAY_HITM(lcl_hitm); | 1063 | DISPLAY_HITM(lcl_hitm); |
1064 | break; | ||
1065 | case DISPLAY_TOT: | ||
1066 | DISPLAY_HITM(tot_hitm); | ||
1047 | default: | 1067 | default: |
1048 | break; | 1068 | break; |
1049 | } | 1069 | } |
@@ -1351,6 +1371,7 @@ static struct c2c_dimension dim_tot_loads = { | |||
1351 | static struct c2c_header percent_hitm_header[] = { | 1371 | static struct c2c_header percent_hitm_header[] = { |
1352 | [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"), | 1372 | [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"), |
1353 | [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"), | 1373 | [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"), |
1374 | [DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"), | ||
1354 | }; | 1375 | }; |
1355 | 1376 | ||
1356 | static struct c2c_dimension dim_percent_hitm = { | 1377 | static struct c2c_dimension dim_percent_hitm = { |
@@ -1794,6 +1815,9 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) | |||
1794 | break; | 1815 | break; |
1795 | case DISPLAY_RMT: | 1816 | case DISPLAY_RMT: |
1796 | FILTER_HITM(rmt_hitm); | 1817 | FILTER_HITM(rmt_hitm); |
1818 | break; | ||
1819 | case DISPLAY_TOT: | ||
1820 | FILTER_HITM(tot_hitm); | ||
1797 | default: | 1821 | default: |
1798 | break; | 1822 | break; |
1799 | }; | 1823 | }; |
@@ -1809,8 +1833,9 @@ static inline int valid_hitm_or_store(struct hist_entry *he) | |||
1809 | bool has_hitm; | 1833 | bool has_hitm; |
1810 | 1834 | ||
1811 | c2c_he = container_of(he, struct c2c_hist_entry, he); | 1835 | c2c_he = container_of(he, struct c2c_hist_entry, he); |
1812 | has_hitm = c2c.display == DISPLAY_LCL ? | 1836 | has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm : |
1813 | c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm; | 1837 | c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm : |
1838 | c2c_he->stats.rmt_hitm; | ||
1814 | return has_hitm || c2c_he->stats.store; | 1839 | return has_hitm || c2c_he->stats.store; |
1815 | } | 1840 | } |
1816 | 1841 | ||
@@ -2095,7 +2120,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session) | |||
2095 | first = false; | 2120 | first = false; |
2096 | } | 2121 | } |
2097 | fprintf(out, " Cachelines sort on : %s HITMs\n", | 2122 | fprintf(out, " Cachelines sort on : %s HITMs\n", |
2098 | c2c.display == DISPLAY_LCL ? "Local" : "Remote"); | 2123 | display_str[c2c.display]); |
2099 | fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort); | 2124 | fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort); |
2100 | } | 2125 | } |
2101 | 2126 | ||
@@ -2250,7 +2275,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser, | |||
2250 | "Shared Data Cache Line Table " | 2275 | "Shared Data Cache Line Table " |
2251 | "(%lu entries, sorted on %s HITMs)", | 2276 | "(%lu entries, sorted on %s HITMs)", |
2252 | browser->nr_non_filtered_entries, | 2277 | browser->nr_non_filtered_entries, |
2253 | c2c.display == DISPLAY_LCL ? "local" : "remote"); | 2278 | display_str[c2c.display]); |
2254 | return 0; | 2279 | return 0; |
2255 | } | 2280 | } |
2256 | 2281 | ||
@@ -2387,9 +2412,11 @@ static int setup_callchain(struct perf_evlist *evlist) | |||
2387 | 2412 | ||
2388 | static int setup_display(const char *str) | 2413 | static int setup_display(const char *str) |
2389 | { | 2414 | { |
2390 | const char *display = str ?: "rmt"; | 2415 | const char *display = str ?: "tot"; |
2391 | 2416 | ||
2392 | if (!strcmp(display, "rmt")) | 2417 | if (!strcmp(display, "tot")) |
2418 | c2c.display = DISPLAY_TOT; | ||
2419 | else if (!strcmp(display, "rmt")) | ||
2393 | c2c.display = DISPLAY_RMT; | 2420 | c2c.display = DISPLAY_RMT; |
2394 | else if (!strcmp(display, "lcl")) | 2421 | else if (!strcmp(display, "lcl")) |
2395 | c2c.display = DISPLAY_LCL; | 2422 | c2c.display = DISPLAY_LCL; |
@@ -2474,6 +2501,8 @@ static int setup_coalesce(const char *coalesce, bool no_source) | |||
2474 | return -1; | 2501 | return -1; |
2475 | 2502 | ||
2476 | if (asprintf(&c2c.cl_resort, "offset,%s", | 2503 | if (asprintf(&c2c.cl_resort, "offset,%s", |
2504 | c2c.display == DISPLAY_TOT ? | ||
2505 | "tot_hitm" : | ||
2477 | c2c.display == DISPLAY_RMT ? | 2506 | c2c.display == DISPLAY_RMT ? |
2478 | "rmt_hitm,lcl_hitm" : | 2507 | "rmt_hitm,lcl_hitm" : |
2479 | "lcl_hitm,rmt_hitm") < 0) | 2508 | "lcl_hitm,rmt_hitm") < 0) |
@@ -2496,11 +2525,9 @@ static int perf_c2c__report(int argc, const char **argv) | |||
2496 | const char *display = NULL; | 2525 | const char *display = NULL; |
2497 | const char *coalesce = NULL; | 2526 | const char *coalesce = NULL; |
2498 | bool no_source = false; | 2527 | bool no_source = false; |
2499 | const struct option c2c_options[] = { | 2528 | const struct option options[] = { |
2500 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, | 2529 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, |
2501 | "file", "vmlinux pathname"), | 2530 | "file", "vmlinux pathname"), |
2502 | OPT_INCR('v', "verbose", &verbose, | ||
2503 | "be more verbose (show counter open errors, etc)"), | ||
2504 | OPT_STRING('i', "input", &input_name, "file", | 2531 | OPT_STRING('i', "input", &input_name, "file", |
2505 | "the input file to process"), | 2532 | "the input file to process"), |
2506 | OPT_INCR('N', "node-info", &c2c.node_info, | 2533 | OPT_INCR('N', "node-info", &c2c.node_info, |
@@ -2520,32 +2547,28 @@ static int perf_c2c__report(int argc, const char **argv) | |||
2520 | "print_type,threshold[,print_limit],order,sort_key[,branch],value", | 2547 | "print_type,threshold[,print_limit],order,sort_key[,branch],value", |
2521 | callchain_help, &parse_callchain_opt, | 2548 | callchain_help, &parse_callchain_opt, |
2522 | callchain_default_opt), | 2549 | callchain_default_opt), |
2523 | OPT_STRING('d', "display", &display, NULL, "lcl,rmt"), | 2550 | OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"), |
2524 | OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", | 2551 | OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", |
2525 | "coalesce fields: pid,tid,iaddr,dso"), | 2552 | "coalesce fields: pid,tid,iaddr,dso"), |
2553 | OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), | ||
2554 | OPT_PARENT(c2c_options), | ||
2526 | OPT_END() | 2555 | OPT_END() |
2527 | }; | 2556 | }; |
2528 | int err = 0; | 2557 | int err = 0; |
2529 | 2558 | ||
2530 | argc = parse_options(argc, argv, c2c_options, report_c2c_usage, | 2559 | argc = parse_options(argc, argv, options, report_c2c_usage, |
2531 | PARSE_OPT_STOP_AT_NON_OPTION); | 2560 | PARSE_OPT_STOP_AT_NON_OPTION); |
2532 | if (argc) | 2561 | if (argc) |
2533 | usage_with_options(report_c2c_usage, c2c_options); | 2562 | usage_with_options(report_c2c_usage, options); |
2534 | 2563 | ||
2535 | if (c2c.stats_only) | 2564 | if (c2c.stats_only) |
2536 | c2c.use_stdio = true; | 2565 | c2c.use_stdio = true; |
2537 | 2566 | ||
2538 | if (c2c.use_stdio) | ||
2539 | use_browser = 0; | ||
2540 | else | ||
2541 | use_browser = 1; | ||
2542 | |||
2543 | setup_browser(false); | ||
2544 | |||
2545 | if (!input_name || !strlen(input_name)) | 2567 | if (!input_name || !strlen(input_name)) |
2546 | input_name = "perf.data"; | 2568 | input_name = "perf.data"; |
2547 | 2569 | ||
2548 | file.path = input_name; | 2570 | file.path = input_name; |
2571 | file.force = symbol_conf.force; | ||
2549 | 2572 | ||
2550 | err = setup_display(display); | 2573 | err = setup_display(display); |
2551 | if (err) | 2574 | if (err) |
@@ -2568,6 +2591,7 @@ static int perf_c2c__report(int argc, const char **argv) | |||
2568 | pr_debug("No memory for session\n"); | 2591 | pr_debug("No memory for session\n"); |
2569 | goto out; | 2592 | goto out; |
2570 | } | 2593 | } |
2594 | |||
2571 | err = setup_nodes(session); | 2595 | err = setup_nodes(session); |
2572 | if (err) { | 2596 | if (err) { |
2573 | pr_err("Failed setup nodes\n"); | 2597 | pr_err("Failed setup nodes\n"); |
@@ -2587,6 +2611,13 @@ static int perf_c2c__report(int argc, const char **argv) | |||
2587 | goto out_session; | 2611 | goto out_session; |
2588 | } | 2612 | } |
2589 | 2613 | ||
2614 | if (c2c.use_stdio) | ||
2615 | use_browser = 0; | ||
2616 | else | ||
2617 | use_browser = 1; | ||
2618 | |||
2619 | setup_browser(false); | ||
2620 | |||
2590 | err = perf_session__process_events(session); | 2621 | err = perf_session__process_events(session); |
2591 | if (err) { | 2622 | if (err) { |
2592 | pr_err("failed to process sample\n"); | 2623 | pr_err("failed to process sample\n"); |
@@ -2605,6 +2636,7 @@ static int perf_c2c__report(int argc, const char **argv) | |||
2605 | "tot_loads," | 2636 | "tot_loads," |
2606 | "ld_fbhit,ld_l1hit,ld_l2hit," | 2637 | "ld_fbhit,ld_l1hit,ld_l2hit," |
2607 | "ld_lclhit,ld_rmthit", | 2638 | "ld_lclhit,ld_rmthit", |
2639 | c2c.display == DISPLAY_TOT ? "tot_hitm" : | ||
2608 | c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" | 2640 | c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" |
2609 | ); | 2641 | ); |
2610 | 2642 | ||
@@ -2655,11 +2687,10 @@ static int perf_c2c__record(int argc, const char **argv) | |||
2655 | OPT_CALLBACK('e', "event", &event_set, "event", | 2687 | OPT_CALLBACK('e', "event", &event_set, "event", |
2656 | "event selector. Use 'perf mem record -e list' to list available events", | 2688 | "event selector. Use 'perf mem record -e list' to list available events", |
2657 | parse_record_events), | 2689 | parse_record_events), |
2658 | OPT_INCR('v', "verbose", &verbose, | ||
2659 | "be more verbose (show counter open errors, etc)"), | ||
2660 | OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), | 2690 | OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), |
2661 | OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), | 2691 | OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), |
2662 | OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"), | 2692 | OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"), |
2693 | OPT_PARENT(c2c_options), | ||
2663 | OPT_END() | 2694 | OPT_END() |
2664 | }; | 2695 | }; |
2665 | 2696 | ||
@@ -2731,11 +2762,6 @@ static int perf_c2c__record(int argc, const char **argv) | |||
2731 | 2762 | ||
2732 | int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) | 2763 | int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) |
2733 | { | 2764 | { |
2734 | const struct option c2c_options[] = { | ||
2735 | OPT_INCR('v', "verbose", &verbose, "be more verbose"), | ||
2736 | OPT_END() | ||
2737 | }; | ||
2738 | |||
2739 | argc = parse_options(argc, argv, c2c_options, c2c_usage, | 2765 | argc = parse_options(argc, argv, c2c_options, c2c_usage, |
2740 | PARSE_OPT_STOP_AT_NON_OPTION); | 2766 | PARSE_OPT_STOP_AT_NON_OPTION); |
2741 | 2767 | ||
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index fb3441211e4b..829468defa07 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -13,12 +13,15 @@ | |||
13 | #include "util/cloexec.h" | 13 | #include "util/cloexec.h" |
14 | #include "util/thread_map.h" | 14 | #include "util/thread_map.h" |
15 | #include "util/color.h" | 15 | #include "util/color.h" |
16 | #include "util/stat.h" | ||
17 | #include "util/callchain.h" | ||
16 | 18 | ||
17 | #include <subcmd/parse-options.h> | 19 | #include <subcmd/parse-options.h> |
18 | #include "util/trace-event.h" | 20 | #include "util/trace-event.h" |
19 | 21 | ||
20 | #include "util/debug.h" | 22 | #include "util/debug.h" |
21 | 23 | ||
24 | #include <linux/log2.h> | ||
22 | #include <sys/prctl.h> | 25 | #include <sys/prctl.h> |
23 | #include <sys/resource.h> | 26 | #include <sys/resource.h> |
24 | 27 | ||
@@ -192,8 +195,40 @@ struct perf_sched { | |||
192 | bool force; | 195 | bool force; |
193 | bool skip_merge; | 196 | bool skip_merge; |
194 | struct perf_sched_map map; | 197 | struct perf_sched_map map; |
198 | |||
199 | /* options for timehist command */ | ||
200 | bool summary; | ||
201 | bool summary_only; | ||
202 | bool show_callchain; | ||
203 | unsigned int max_stack; | ||
204 | bool show_cpu_visual; | ||
205 | bool show_wakeups; | ||
206 | u64 skipped_samples; | ||
207 | }; | ||
208 | |||
209 | /* per thread run time data */ | ||
210 | struct thread_runtime { | ||
211 | u64 last_time; /* time of previous sched in/out event */ | ||
212 | u64 dt_run; /* run time */ | ||
213 | u64 dt_wait; /* time between CPU access (off cpu) */ | ||
214 | u64 dt_delay; /* time between wakeup and sched-in */ | ||
215 | u64 ready_to_run; /* time of wakeup */ | ||
216 | |||
217 | struct stats run_stats; | ||
218 | u64 total_run_time; | ||
195 | }; | 219 | }; |
196 | 220 | ||
221 | /* per event run time data */ | ||
222 | struct evsel_runtime { | ||
223 | u64 *last_time; /* time this event was last seen per cpu */ | ||
224 | u32 ncpu; /* highest cpu slot allocated */ | ||
225 | }; | ||
226 | |||
227 | /* track idle times per cpu */ | ||
228 | static struct thread **idle_threads; | ||
229 | static int idle_max_cpu; | ||
230 | static char idle_comm[] = "<idle>"; | ||
231 | |||
197 | static u64 get_nsecs(void) | 232 | static u64 get_nsecs(void) |
198 | { | 233 | { |
199 | struct timespec ts; | 234 | struct timespec ts; |
@@ -1654,6 +1689,837 @@ out_delete: | |||
1654 | return rc; | 1689 | return rc; |
1655 | } | 1690 | } |
1656 | 1691 | ||
1692 | /* | ||
1693 | * scheduling times are printed as msec.usec | ||
1694 | */ | ||
1695 | static inline void print_sched_time(unsigned long long nsecs, int width) | ||
1696 | { | ||
1697 | unsigned long msecs; | ||
1698 | unsigned long usecs; | ||
1699 | |||
1700 | msecs = nsecs / NSEC_PER_MSEC; | ||
1701 | nsecs -= msecs * NSEC_PER_MSEC; | ||
1702 | usecs = nsecs / NSEC_PER_USEC; | ||
1703 | printf("%*lu.%03lu ", width, msecs, usecs); | ||
1704 | } | ||
1705 | |||
1706 | /* | ||
1707 | * returns runtime data for event, allocating memory for it the | ||
1708 | * first time it is used. | ||
1709 | */ | ||
1710 | static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) | ||
1711 | { | ||
1712 | struct evsel_runtime *r = evsel->priv; | ||
1713 | |||
1714 | if (r == NULL) { | ||
1715 | r = zalloc(sizeof(struct evsel_runtime)); | ||
1716 | evsel->priv = r; | ||
1717 | } | ||
1718 | |||
1719 | return r; | ||
1720 | } | ||
1721 | |||
1722 | /* | ||
1723 | * save last time event was seen per cpu | ||
1724 | */ | ||
1725 | static void perf_evsel__save_time(struct perf_evsel *evsel, | ||
1726 | u64 timestamp, u32 cpu) | ||
1727 | { | ||
1728 | struct evsel_runtime *r = perf_evsel__get_runtime(evsel); | ||
1729 | |||
1730 | if (r == NULL) | ||
1731 | return; | ||
1732 | |||
1733 | if ((cpu >= r->ncpu) || (r->last_time == NULL)) { | ||
1734 | int i, n = __roundup_pow_of_two(cpu+1); | ||
1735 | void *p = r->last_time; | ||
1736 | |||
1737 | p = realloc(r->last_time, n * sizeof(u64)); | ||
1738 | if (!p) | ||
1739 | return; | ||
1740 | |||
1741 | r->last_time = p; | ||
1742 | for (i = r->ncpu; i < n; ++i) | ||
1743 | r->last_time[i] = (u64) 0; | ||
1744 | |||
1745 | r->ncpu = n; | ||
1746 | } | ||
1747 | |||
1748 | r->last_time[cpu] = timestamp; | ||
1749 | } | ||
1750 | |||
1751 | /* returns last time this event was seen on the given cpu */ | ||
1752 | static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) | ||
1753 | { | ||
1754 | struct evsel_runtime *r = perf_evsel__get_runtime(evsel); | ||
1755 | |||
1756 | if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) | ||
1757 | return 0; | ||
1758 | |||
1759 | return r->last_time[cpu]; | ||
1760 | } | ||
1761 | |||
1762 | static int comm_width = 20; | ||
1763 | |||
1764 | static char *timehist_get_commstr(struct thread *thread) | ||
1765 | { | ||
1766 | static char str[32]; | ||
1767 | const char *comm = thread__comm_str(thread); | ||
1768 | pid_t tid = thread->tid; | ||
1769 | pid_t pid = thread->pid_; | ||
1770 | int n; | ||
1771 | |||
1772 | if (pid == 0) | ||
1773 | n = scnprintf(str, sizeof(str), "%s", comm); | ||
1774 | |||
1775 | else if (tid != pid) | ||
1776 | n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); | ||
1777 | |||
1778 | else | ||
1779 | n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); | ||
1780 | |||
1781 | if (n > comm_width) | ||
1782 | comm_width = n; | ||
1783 | |||
1784 | return str; | ||
1785 | } | ||
1786 | |||
1787 | static void timehist_header(struct perf_sched *sched) | ||
1788 | { | ||
1789 | u32 ncpus = sched->max_cpu + 1; | ||
1790 | u32 i, j; | ||
1791 | |||
1792 | printf("%15s %6s ", "time", "cpu"); | ||
1793 | |||
1794 | if (sched->show_cpu_visual) { | ||
1795 | printf(" "); | ||
1796 | for (i = 0, j = 0; i < ncpus; ++i) { | ||
1797 | printf("%x", j++); | ||
1798 | if (j > 15) | ||
1799 | j = 0; | ||
1800 | } | ||
1801 | printf(" "); | ||
1802 | } | ||
1803 | |||
1804 | printf(" %-20s %9s %9s %9s", | ||
1805 | "task name", "wait time", "sch delay", "run time"); | ||
1806 | |||
1807 | printf("\n"); | ||
1808 | |||
1809 | /* | ||
1810 | * units row | ||
1811 | */ | ||
1812 | printf("%15s %-6s ", "", ""); | ||
1813 | |||
1814 | if (sched->show_cpu_visual) | ||
1815 | printf(" %*s ", ncpus, ""); | ||
1816 | |||
1817 | printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); | ||
1818 | |||
1819 | /* | ||
1820 | * separator | ||
1821 | */ | ||
1822 | printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); | ||
1823 | |||
1824 | if (sched->show_cpu_visual) | ||
1825 | printf(" %.*s ", ncpus, graph_dotted_line); | ||
1826 | |||
1827 | printf(" %.20s %.9s %.9s %.9s", | ||
1828 | graph_dotted_line, graph_dotted_line, graph_dotted_line, | ||
1829 | graph_dotted_line); | ||
1830 | |||
1831 | printf("\n"); | ||
1832 | } | ||
1833 | |||
1834 | static void timehist_print_sample(struct perf_sched *sched, | ||
1835 | struct perf_sample *sample, | ||
1836 | struct addr_location *al, | ||
1837 | struct thread *thread) | ||
1838 | { | ||
1839 | struct thread_runtime *tr = thread__priv(thread); | ||
1840 | u32 max_cpus = sched->max_cpu + 1; | ||
1841 | char tstr[64]; | ||
1842 | |||
1843 | timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); | ||
1844 | printf("%15s [%04d] ", tstr, sample->cpu); | ||
1845 | |||
1846 | if (sched->show_cpu_visual) { | ||
1847 | u32 i; | ||
1848 | char c; | ||
1849 | |||
1850 | printf(" "); | ||
1851 | for (i = 0; i < max_cpus; ++i) { | ||
1852 | /* flag idle times with 'i'; others are sched events */ | ||
1853 | if (i == sample->cpu) | ||
1854 | c = (thread->tid == 0) ? 'i' : 's'; | ||
1855 | else | ||
1856 | c = ' '; | ||
1857 | printf("%c", c); | ||
1858 | } | ||
1859 | printf(" "); | ||
1860 | } | ||
1861 | |||
1862 | printf(" %-*s ", comm_width, timehist_get_commstr(thread)); | ||
1863 | |||
1864 | print_sched_time(tr->dt_wait, 6); | ||
1865 | print_sched_time(tr->dt_delay, 6); | ||
1866 | print_sched_time(tr->dt_run, 6); | ||
1867 | |||
1868 | if (sched->show_wakeups) | ||
1869 | printf(" %-*s", comm_width, ""); | ||
1870 | |||
1871 | if (thread->tid == 0) | ||
1872 | goto out; | ||
1873 | |||
1874 | if (sched->show_callchain) | ||
1875 | printf(" "); | ||
1876 | |||
1877 | sample__fprintf_sym(sample, al, 0, | ||
1878 | EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | | ||
1879 | EVSEL__PRINT_CALLCHAIN_ARROW, | ||
1880 | &callchain_cursor, stdout); | ||
1881 | |||
1882 | out: | ||
1883 | printf("\n"); | ||
1884 | } | ||
1885 | |||
1886 | /* | ||
1887 | * Explanation of delta-time stats: | ||
1888 | * | ||
1889 | * t = time of current schedule out event | ||
1890 | * tprev = time of previous sched out event | ||
1891 | * also time of schedule-in event for current task | ||
1892 | * last_time = time of last sched change event for current task | ||
1893 | * (i.e, time process was last scheduled out) | ||
1894 | * ready_to_run = time of wakeup for current task | ||
1895 | * | ||
1896 | * -----|------------|------------|------------|------ | ||
1897 | * last ready tprev t | ||
1898 | * time to run | ||
1899 | * | ||
1900 | * |-------- dt_wait --------| | ||
1901 | * |- dt_delay -|-- dt_run --| | ||
1902 | * | ||
1903 | * dt_run = run time of current task | ||
1904 | * dt_wait = time between last schedule out event for task and tprev | ||
1905 | * represents time spent off the cpu | ||
1906 | * dt_delay = time between wakeup and schedule-in of task | ||
1907 | */ | ||
1908 | |||
1909 | static void timehist_update_runtime_stats(struct thread_runtime *r, | ||
1910 | u64 t, u64 tprev) | ||
1911 | { | ||
1912 | r->dt_delay = 0; | ||
1913 | r->dt_wait = 0; | ||
1914 | r->dt_run = 0; | ||
1915 | if (tprev) { | ||
1916 | r->dt_run = t - tprev; | ||
1917 | if (r->ready_to_run) { | ||
1918 | if (r->ready_to_run > tprev) | ||
1919 | pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); | ||
1920 | else | ||
1921 | r->dt_delay = tprev - r->ready_to_run; | ||
1922 | } | ||
1923 | |||
1924 | if (r->last_time > tprev) | ||
1925 | pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); | ||
1926 | else if (r->last_time) | ||
1927 | r->dt_wait = tprev - r->last_time; | ||
1928 | } | ||
1929 | |||
1930 | update_stats(&r->run_stats, r->dt_run); | ||
1931 | r->total_run_time += r->dt_run; | ||
1932 | } | ||
1933 | |||
1934 | static bool is_idle_sample(struct perf_sched *sched, | ||
1935 | struct perf_sample *sample, | ||
1936 | struct perf_evsel *evsel, | ||
1937 | struct machine *machine) | ||
1938 | { | ||
1939 | struct thread *thread; | ||
1940 | struct callchain_cursor *cursor = &callchain_cursor; | ||
1941 | |||
1942 | /* pid 0 == swapper == idle task */ | ||
1943 | if (sample->pid == 0) | ||
1944 | return true; | ||
1945 | |||
1946 | if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { | ||
1947 | if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) | ||
1948 | return true; | ||
1949 | } | ||
1950 | |||
1951 | /* want main thread for process - has maps */ | ||
1952 | thread = machine__findnew_thread(machine, sample->pid, sample->pid); | ||
1953 | if (thread == NULL) { | ||
1954 | pr_debug("Failed to get thread for pid %d.\n", sample->pid); | ||
1955 | return false; | ||
1956 | } | ||
1957 | |||
1958 | if (!symbol_conf.use_callchain || sample->callchain == NULL) | ||
1959 | return false; | ||
1960 | |||
1961 | if (thread__resolve_callchain(thread, cursor, evsel, sample, | ||
1962 | NULL, NULL, sched->max_stack) != 0) { | ||
1963 | if (verbose) | ||
1964 | error("Failed to resolve callchain. Skipping\n"); | ||
1965 | |||
1966 | return false; | ||
1967 | } | ||
1968 | callchain_cursor_commit(cursor); | ||
1969 | return false; | ||
1970 | } | ||
1971 | |||
1972 | /* | ||
1973 | * Track idle stats per cpu by maintaining a local thread | ||
1974 | * struct for the idle task on each cpu. | ||
1975 | */ | ||
1976 | static int init_idle_threads(int ncpu) | ||
1977 | { | ||
1978 | int i; | ||
1979 | |||
1980 | idle_threads = zalloc(ncpu * sizeof(struct thread *)); | ||
1981 | if (!idle_threads) | ||
1982 | return -ENOMEM; | ||
1983 | |||
1984 | idle_max_cpu = ncpu - 1; | ||
1985 | |||
1986 | /* allocate the actual thread struct if needed */ | ||
1987 | for (i = 0; i < ncpu; ++i) { | ||
1988 | idle_threads[i] = thread__new(0, 0); | ||
1989 | if (idle_threads[i] == NULL) | ||
1990 | return -ENOMEM; | ||
1991 | |||
1992 | thread__set_comm(idle_threads[i], idle_comm, 0); | ||
1993 | } | ||
1994 | |||
1995 | return 0; | ||
1996 | } | ||
1997 | |||
1998 | static void free_idle_threads(void) | ||
1999 | { | ||
2000 | int i; | ||
2001 | |||
2002 | if (idle_threads == NULL) | ||
2003 | return; | ||
2004 | |||
2005 | for (i = 0; i <= idle_max_cpu; ++i) { | ||
2006 | if ((idle_threads[i])) | ||
2007 | thread__delete(idle_threads[i]); | ||
2008 | } | ||
2009 | |||
2010 | free(idle_threads); | ||
2011 | } | ||
2012 | |||
2013 | static struct thread *get_idle_thread(int cpu) | ||
2014 | { | ||
2015 | /* | ||
2016 | * expand/allocate array of pointers to local thread | ||
2017 | * structs if needed | ||
2018 | */ | ||
2019 | if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { | ||
2020 | int i, j = __roundup_pow_of_two(cpu+1); | ||
2021 | void *p; | ||
2022 | |||
2023 | p = realloc(idle_threads, j * sizeof(struct thread *)); | ||
2024 | if (!p) | ||
2025 | return NULL; | ||
2026 | |||
2027 | idle_threads = (struct thread **) p; | ||
2028 | i = idle_max_cpu ? idle_max_cpu + 1 : 0; | ||
2029 | for (; i < j; ++i) | ||
2030 | idle_threads[i] = NULL; | ||
2031 | |||
2032 | idle_max_cpu = j; | ||
2033 | } | ||
2034 | |||
2035 | /* allocate a new thread struct if needed */ | ||
2036 | if (idle_threads[cpu] == NULL) { | ||
2037 | idle_threads[cpu] = thread__new(0, 0); | ||
2038 | |||
2039 | if (idle_threads[cpu]) { | ||
2040 | idle_threads[cpu]->tid = 0; | ||
2041 | thread__set_comm(idle_threads[cpu], idle_comm, 0); | ||
2042 | } | ||
2043 | } | ||
2044 | |||
2045 | return idle_threads[cpu]; | ||
2046 | } | ||
2047 | |||
2048 | /* | ||
2049 | * handle runtime stats saved per thread | ||
2050 | */ | ||
2051 | static struct thread_runtime *thread__init_runtime(struct thread *thread) | ||
2052 | { | ||
2053 | struct thread_runtime *r; | ||
2054 | |||
2055 | r = zalloc(sizeof(struct thread_runtime)); | ||
2056 | if (!r) | ||
2057 | return NULL; | ||
2058 | |||
2059 | init_stats(&r->run_stats); | ||
2060 | thread__set_priv(thread, r); | ||
2061 | |||
2062 | return r; | ||
2063 | } | ||
2064 | |||
2065 | static struct thread_runtime *thread__get_runtime(struct thread *thread) | ||
2066 | { | ||
2067 | struct thread_runtime *tr; | ||
2068 | |||
2069 | tr = thread__priv(thread); | ||
2070 | if (tr == NULL) { | ||
2071 | tr = thread__init_runtime(thread); | ||
2072 | if (tr == NULL) | ||
2073 | pr_debug("Failed to malloc memory for runtime data.\n"); | ||
2074 | } | ||
2075 | |||
2076 | return tr; | ||
2077 | } | ||
2078 | |||
2079 | static struct thread *timehist_get_thread(struct perf_sched *sched, | ||
2080 | struct perf_sample *sample, | ||
2081 | struct machine *machine, | ||
2082 | struct perf_evsel *evsel) | ||
2083 | { | ||
2084 | struct thread *thread; | ||
2085 | |||
2086 | if (is_idle_sample(sched, sample, evsel, machine)) { | ||
2087 | thread = get_idle_thread(sample->cpu); | ||
2088 | if (thread == NULL) | ||
2089 | pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); | ||
2090 | |||
2091 | } else { | ||
2092 | thread = machine__findnew_thread(machine, sample->pid, sample->tid); | ||
2093 | if (thread == NULL) { | ||
2094 | pr_debug("Failed to get thread for tid %d. skipping sample.\n", | ||
2095 | sample->tid); | ||
2096 | } | ||
2097 | } | ||
2098 | |||
2099 | return thread; | ||
2100 | } | ||
2101 | |||
2102 | static bool timehist_skip_sample(struct perf_sched *sched, | ||
2103 | struct thread *thread) | ||
2104 | { | ||
2105 | bool rc = false; | ||
2106 | |||
2107 | if (thread__is_filtered(thread)) { | ||
2108 | rc = true; | ||
2109 | sched->skipped_samples++; | ||
2110 | } | ||
2111 | |||
2112 | return rc; | ||
2113 | } | ||
2114 | |||
2115 | static void timehist_print_wakeup_event(struct perf_sched *sched, | ||
2116 | struct perf_sample *sample, | ||
2117 | struct machine *machine, | ||
2118 | struct thread *awakened) | ||
2119 | { | ||
2120 | struct thread *thread; | ||
2121 | char tstr[64]; | ||
2122 | |||
2123 | thread = machine__findnew_thread(machine, sample->pid, sample->tid); | ||
2124 | if (thread == NULL) | ||
2125 | return; | ||
2126 | |||
2127 | /* show wakeup unless both awakee and awaker are filtered */ | ||
2128 | if (timehist_skip_sample(sched, thread) && | ||
2129 | timehist_skip_sample(sched, awakened)) { | ||
2130 | return; | ||
2131 | } | ||
2132 | |||
2133 | timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); | ||
2134 | printf("%15s [%04d] ", tstr, sample->cpu); | ||
2135 | if (sched->show_cpu_visual) | ||
2136 | printf(" %*s ", sched->max_cpu + 1, ""); | ||
2137 | |||
2138 | printf(" %-*s ", comm_width, timehist_get_commstr(thread)); | ||
2139 | |||
2140 | /* dt spacer */ | ||
2141 | printf(" %9s %9s %9s ", "", "", ""); | ||
2142 | |||
2143 | printf("awakened: %s", timehist_get_commstr(awakened)); | ||
2144 | |||
2145 | printf("\n"); | ||
2146 | } | ||
2147 | |||
2148 | static int timehist_sched_wakeup_event(struct perf_tool *tool, | ||
2149 | union perf_event *event __maybe_unused, | ||
2150 | struct perf_evsel *evsel, | ||
2151 | struct perf_sample *sample, | ||
2152 | struct machine *machine) | ||
2153 | { | ||
2154 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
2155 | struct thread *thread; | ||
2156 | struct thread_runtime *tr = NULL; | ||
2157 | /* want pid of awakened task not pid in sample */ | ||
2158 | const u32 pid = perf_evsel__intval(evsel, sample, "pid"); | ||
2159 | |||
2160 | thread = machine__findnew_thread(machine, 0, pid); | ||
2161 | if (thread == NULL) | ||
2162 | return -1; | ||
2163 | |||
2164 | tr = thread__get_runtime(thread); | ||
2165 | if (tr == NULL) | ||
2166 | return -1; | ||
2167 | |||
2168 | if (tr->ready_to_run == 0) | ||
2169 | tr->ready_to_run = sample->time; | ||
2170 | |||
2171 | /* show wakeups if requested */ | ||
2172 | if (sched->show_wakeups) | ||
2173 | timehist_print_wakeup_event(sched, sample, machine, thread); | ||
2174 | |||
2175 | return 0; | ||
2176 | } | ||
2177 | |||
2178 | static int timehist_sched_change_event(struct perf_tool *tool, | ||
2179 | union perf_event *event, | ||
2180 | struct perf_evsel *evsel, | ||
2181 | struct perf_sample *sample, | ||
2182 | struct machine *machine) | ||
2183 | { | ||
2184 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
2185 | struct addr_location al; | ||
2186 | struct thread *thread; | ||
2187 | struct thread_runtime *tr = NULL; | ||
2188 | u64 tprev; | ||
2189 | int rc = 0; | ||
2190 | |||
2191 | if (machine__resolve(machine, &al, sample) < 0) { | ||
2192 | pr_err("problem processing %d event. skipping it\n", | ||
2193 | event->header.type); | ||
2194 | rc = -1; | ||
2195 | goto out; | ||
2196 | } | ||
2197 | |||
2198 | thread = timehist_get_thread(sched, sample, machine, evsel); | ||
2199 | if (thread == NULL) { | ||
2200 | rc = -1; | ||
2201 | goto out; | ||
2202 | } | ||
2203 | |||
2204 | if (timehist_skip_sample(sched, thread)) | ||
2205 | goto out; | ||
2206 | |||
2207 | tr = thread__get_runtime(thread); | ||
2208 | if (tr == NULL) { | ||
2209 | rc = -1; | ||
2210 | goto out; | ||
2211 | } | ||
2212 | |||
2213 | tprev = perf_evsel__get_time(evsel, sample->cpu); | ||
2214 | |||
2215 | timehist_update_runtime_stats(tr, sample->time, tprev); | ||
2216 | if (!sched->summary_only) | ||
2217 | timehist_print_sample(sched, sample, &al, thread); | ||
2218 | |||
2219 | out: | ||
2220 | if (tr) { | ||
2221 | /* time of this sched_switch event becomes last time task seen */ | ||
2222 | tr->last_time = sample->time; | ||
2223 | |||
2224 | /* sched out event for task so reset ready to run time */ | ||
2225 | tr->ready_to_run = 0; | ||
2226 | } | ||
2227 | |||
2228 | perf_evsel__save_time(evsel, sample->time, sample->cpu); | ||
2229 | |||
2230 | return rc; | ||
2231 | } | ||
2232 | |||
2233 | static int timehist_sched_switch_event(struct perf_tool *tool, | ||
2234 | union perf_event *event, | ||
2235 | struct perf_evsel *evsel, | ||
2236 | struct perf_sample *sample, | ||
2237 | struct machine *machine __maybe_unused) | ||
2238 | { | ||
2239 | return timehist_sched_change_event(tool, event, evsel, sample, machine); | ||
2240 | } | ||
2241 | |||
2242 | static int process_lost(struct perf_tool *tool __maybe_unused, | ||
2243 | union perf_event *event, | ||
2244 | struct perf_sample *sample, | ||
2245 | struct machine *machine __maybe_unused) | ||
2246 | { | ||
2247 | char tstr[64]; | ||
2248 | |||
2249 | timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); | ||
2250 | printf("%15s ", tstr); | ||
2251 | printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); | ||
2252 | |||
2253 | return 0; | ||
2254 | } | ||
2255 | |||
2256 | |||
2257 | static void print_thread_runtime(struct thread *t, | ||
2258 | struct thread_runtime *r) | ||
2259 | { | ||
2260 | double mean = avg_stats(&r->run_stats); | ||
2261 | float stddev; | ||
2262 | |||
2263 | printf("%*s %5d %9" PRIu64 " ", | ||
2264 | comm_width, timehist_get_commstr(t), t->ppid, | ||
2265 | (u64) r->run_stats.n); | ||
2266 | |||
2267 | print_sched_time(r->total_run_time, 8); | ||
2268 | stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); | ||
2269 | print_sched_time(r->run_stats.min, 6); | ||
2270 | printf(" "); | ||
2271 | print_sched_time((u64) mean, 6); | ||
2272 | printf(" "); | ||
2273 | print_sched_time(r->run_stats.max, 6); | ||
2274 | printf(" "); | ||
2275 | printf("%5.2f", stddev); | ||
2276 | printf("\n"); | ||
2277 | } | ||
2278 | |||
2279 | struct total_run_stats { | ||
2280 | u64 sched_count; | ||
2281 | u64 task_count; | ||
2282 | u64 total_run_time; | ||
2283 | }; | ||
2284 | |||
2285 | static int __show_thread_runtime(struct thread *t, void *priv) | ||
2286 | { | ||
2287 | struct total_run_stats *stats = priv; | ||
2288 | struct thread_runtime *r; | ||
2289 | |||
2290 | if (thread__is_filtered(t)) | ||
2291 | return 0; | ||
2292 | |||
2293 | r = thread__priv(t); | ||
2294 | if (r && r->run_stats.n) { | ||
2295 | stats->task_count++; | ||
2296 | stats->sched_count += r->run_stats.n; | ||
2297 | stats->total_run_time += r->total_run_time; | ||
2298 | print_thread_runtime(t, r); | ||
2299 | } | ||
2300 | |||
2301 | return 0; | ||
2302 | } | ||
2303 | |||
2304 | static int show_thread_runtime(struct thread *t, void *priv) | ||
2305 | { | ||
2306 | if (t->dead) | ||
2307 | return 0; | ||
2308 | |||
2309 | return __show_thread_runtime(t, priv); | ||
2310 | } | ||
2311 | |||
2312 | static int show_deadthread_runtime(struct thread *t, void *priv) | ||
2313 | { | ||
2314 | if (!t->dead) | ||
2315 | return 0; | ||
2316 | |||
2317 | return __show_thread_runtime(t, priv); | ||
2318 | } | ||
2319 | |||
2320 | static void timehist_print_summary(struct perf_sched *sched, | ||
2321 | struct perf_session *session) | ||
2322 | { | ||
2323 | struct machine *m = &session->machines.host; | ||
2324 | struct total_run_stats totals; | ||
2325 | u64 task_count; | ||
2326 | struct thread *t; | ||
2327 | struct thread_runtime *r; | ||
2328 | int i; | ||
2329 | |||
2330 | memset(&totals, 0, sizeof(totals)); | ||
2331 | |||
2332 | if (comm_width < 30) | ||
2333 | comm_width = 30; | ||
2334 | |||
2335 | printf("\nRuntime summary\n"); | ||
2336 | printf("%*s parent sched-in ", comm_width, "comm"); | ||
2337 | printf(" run-time min-run avg-run max-run stddev\n"); | ||
2338 | printf("%*s (count) ", comm_width, ""); | ||
2339 | printf(" (msec) (msec) (msec) (msec) %%\n"); | ||
2340 | printf("%.105s\n", graph_dotted_line); | ||
2341 | |||
2342 | machine__for_each_thread(m, show_thread_runtime, &totals); | ||
2343 | task_count = totals.task_count; | ||
2344 | if (!task_count) | ||
2345 | printf("<no still running tasks>\n"); | ||
2346 | |||
2347 | printf("\nTerminated tasks:\n"); | ||
2348 | machine__for_each_thread(m, show_deadthread_runtime, &totals); | ||
2349 | if (task_count == totals.task_count) | ||
2350 | printf("<no terminated tasks>\n"); | ||
2351 | |||
2352 | /* CPU idle stats not tracked when samples were skipped */ | ||
2353 | if (sched->skipped_samples) | ||
2354 | return; | ||
2355 | |||
2356 | printf("\nIdle stats:\n"); | ||
2357 | for (i = 0; i <= idle_max_cpu; ++i) { | ||
2358 | t = idle_threads[i]; | ||
2359 | if (!t) | ||
2360 | continue; | ||
2361 | |||
2362 | r = thread__priv(t); | ||
2363 | if (r && r->run_stats.n) { | ||
2364 | totals.sched_count += r->run_stats.n; | ||
2365 | printf(" CPU %2d idle for ", i); | ||
2366 | print_sched_time(r->total_run_time, 6); | ||
2367 | printf(" msec\n"); | ||
2368 | } else | ||
2369 | printf(" CPU %2d idle entire time window\n", i); | ||
2370 | } | ||
2371 | |||
2372 | printf("\n" | ||
2373 | " Total number of unique tasks: %" PRIu64 "\n" | ||
2374 | "Total number of context switches: %" PRIu64 "\n" | ||
2375 | " Total run time (msec): ", | ||
2376 | totals.task_count, totals.sched_count); | ||
2377 | |||
2378 | print_sched_time(totals.total_run_time, 2); | ||
2379 | printf("\n"); | ||
2380 | } | ||
2381 | |||
2382 | typedef int (*sched_handler)(struct perf_tool *tool, | ||
2383 | union perf_event *event, | ||
2384 | struct perf_evsel *evsel, | ||
2385 | struct perf_sample *sample, | ||
2386 | struct machine *machine); | ||
2387 | |||
2388 | static int perf_timehist__process_sample(struct perf_tool *tool, | ||
2389 | union perf_event *event, | ||
2390 | struct perf_sample *sample, | ||
2391 | struct perf_evsel *evsel, | ||
2392 | struct machine *machine) | ||
2393 | { | ||
2394 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
2395 | int err = 0; | ||
2396 | int this_cpu = sample->cpu; | ||
2397 | |||
2398 | if (this_cpu > sched->max_cpu) | ||
2399 | sched->max_cpu = this_cpu; | ||
2400 | |||
2401 | if (evsel->handler != NULL) { | ||
2402 | sched_handler f = evsel->handler; | ||
2403 | |||
2404 | err = f(tool, event, evsel, sample, machine); | ||
2405 | } | ||
2406 | |||
2407 | return err; | ||
2408 | } | ||
2409 | |||
2410 | static int timehist_check_attr(struct perf_sched *sched, | ||
2411 | struct perf_evlist *evlist) | ||
2412 | { | ||
2413 | struct perf_evsel *evsel; | ||
2414 | struct evsel_runtime *er; | ||
2415 | |||
2416 | list_for_each_entry(evsel, &evlist->entries, node) { | ||
2417 | er = perf_evsel__get_runtime(evsel); | ||
2418 | if (er == NULL) { | ||
2419 | pr_err("Failed to allocate memory for evsel runtime data\n"); | ||
2420 | return -1; | ||
2421 | } | ||
2422 | |||
2423 | if (sched->show_callchain && | ||
2424 | !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { | ||
2425 | pr_info("Samples do not have callchains.\n"); | ||
2426 | sched->show_callchain = 0; | ||
2427 | symbol_conf.use_callchain = 0; | ||
2428 | } | ||
2429 | } | ||
2430 | |||
2431 | return 0; | ||
2432 | } | ||
2433 | |||
2434 | static int perf_sched__timehist(struct perf_sched *sched) | ||
2435 | { | ||
2436 | const struct perf_evsel_str_handler handlers[] = { | ||
2437 | { "sched:sched_switch", timehist_sched_switch_event, }, | ||
2438 | { "sched:sched_wakeup", timehist_sched_wakeup_event, }, | ||
2439 | { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, | ||
2440 | }; | ||
2441 | struct perf_data_file file = { | ||
2442 | .path = input_name, | ||
2443 | .mode = PERF_DATA_MODE_READ, | ||
2444 | }; | ||
2445 | |||
2446 | struct perf_session *session; | ||
2447 | struct perf_evlist *evlist; | ||
2448 | int err = -1; | ||
2449 | |||
2450 | /* | ||
2451 | * event handlers for timehist option | ||
2452 | */ | ||
2453 | sched->tool.sample = perf_timehist__process_sample; | ||
2454 | sched->tool.mmap = perf_event__process_mmap; | ||
2455 | sched->tool.comm = perf_event__process_comm; | ||
2456 | sched->tool.exit = perf_event__process_exit; | ||
2457 | sched->tool.fork = perf_event__process_fork; | ||
2458 | sched->tool.lost = process_lost; | ||
2459 | sched->tool.attr = perf_event__process_attr; | ||
2460 | sched->tool.tracing_data = perf_event__process_tracing_data; | ||
2461 | sched->tool.build_id = perf_event__process_build_id; | ||
2462 | |||
2463 | sched->tool.ordered_events = true; | ||
2464 | sched->tool.ordering_requires_timestamps = true; | ||
2465 | |||
2466 | symbol_conf.use_callchain = sched->show_callchain; | ||
2467 | |||
2468 | session = perf_session__new(&file, false, &sched->tool); | ||
2469 | if (session == NULL) | ||
2470 | return -ENOMEM; | ||
2471 | |||
2472 | evlist = session->evlist; | ||
2473 | |||
2474 | symbol__init(&session->header.env); | ||
2475 | |||
2476 | if (timehist_check_attr(sched, evlist) != 0) | ||
2477 | goto out; | ||
2478 | |||
2479 | setup_pager(); | ||
2480 | |||
2481 | /* setup per-evsel handlers */ | ||
2482 | if (perf_session__set_tracepoints_handlers(session, handlers)) | ||
2483 | goto out; | ||
2484 | |||
2485 | if (!perf_session__has_traces(session, "record -R")) | ||
2486 | goto out; | ||
2487 | |||
2488 | /* pre-allocate struct for per-CPU idle stats */ | ||
2489 | sched->max_cpu = session->header.env.nr_cpus_online; | ||
2490 | if (sched->max_cpu == 0) | ||
2491 | sched->max_cpu = 4; | ||
2492 | if (init_idle_threads(sched->max_cpu)) | ||
2493 | goto out; | ||
2494 | |||
2495 | /* summary_only implies summary option, but don't overwrite summary if set */ | ||
2496 | if (sched->summary_only) | ||
2497 | sched->summary = sched->summary_only; | ||
2498 | |||
2499 | if (!sched->summary_only) | ||
2500 | timehist_header(sched); | ||
2501 | |||
2502 | err = perf_session__process_events(session); | ||
2503 | if (err) { | ||
2504 | pr_err("Failed to process events, error %d", err); | ||
2505 | goto out; | ||
2506 | } | ||
2507 | |||
2508 | sched->nr_events = evlist->stats.nr_events[0]; | ||
2509 | sched->nr_lost_events = evlist->stats.total_lost; | ||
2510 | sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; | ||
2511 | |||
2512 | if (sched->summary) | ||
2513 | timehist_print_summary(sched, session); | ||
2514 | |||
2515 | out: | ||
2516 | free_idle_threads(); | ||
2517 | perf_session__delete(session); | ||
2518 | |||
2519 | return err; | ||
2520 | } | ||
2521 | |||
2522 | |||
1657 | static void print_bad_events(struct perf_sched *sched) | 2523 | static void print_bad_events(struct perf_sched *sched) |
1658 | { | 2524 | { |
1659 | if (sched->nr_unordered_timestamps && sched->nr_timestamps) { | 2525 | if (sched->nr_unordered_timestamps && sched->nr_timestamps) { |
@@ -1957,6 +2823,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1957 | .next_shortname1 = 'A', | 2823 | .next_shortname1 = 'A', |
1958 | .next_shortname2 = '0', | 2824 | .next_shortname2 = '0', |
1959 | .skip_merge = 0, | 2825 | .skip_merge = 0, |
2826 | .show_callchain = 1, | ||
2827 | .max_stack = 5, | ||
1960 | }; | 2828 | }; |
1961 | const struct option sched_options[] = { | 2829 | const struct option sched_options[] = { |
1962 | OPT_STRING('i', "input", &input_name, "file", | 2830 | OPT_STRING('i', "input", &input_name, "file", |
@@ -1970,8 +2838,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1970 | const struct option latency_options[] = { | 2838 | const struct option latency_options[] = { |
1971 | OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", | 2839 | OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", |
1972 | "sort by key(s): runtime, switch, avg, max"), | 2840 | "sort by key(s): runtime, switch, avg, max"), |
1973 | OPT_INCR('v', "verbose", &verbose, | ||
1974 | "be more verbose (show symbol address, etc)"), | ||
1975 | OPT_INTEGER('C', "CPU", &sched.profile_cpu, | 2841 | OPT_INTEGER('C', "CPU", &sched.profile_cpu, |
1976 | "CPU to profile on"), | 2842 | "CPU to profile on"), |
1977 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 2843 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
@@ -1983,8 +2849,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1983 | const struct option replay_options[] = { | 2849 | const struct option replay_options[] = { |
1984 | OPT_UINTEGER('r', "repeat", &sched.replay_repeat, | 2850 | OPT_UINTEGER('r', "repeat", &sched.replay_repeat, |
1985 | "repeat the workload replay N times (-1: infinite)"), | 2851 | "repeat the workload replay N times (-1: infinite)"), |
1986 | OPT_INCR('v', "verbose", &verbose, | ||
1987 | "be more verbose (show symbol address, etc)"), | ||
1988 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 2852 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
1989 | "dump raw trace in ASCII"), | 2853 | "dump raw trace in ASCII"), |
1990 | OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), | 2854 | OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), |
@@ -2001,6 +2865,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2001 | "display given CPUs in map"), | 2865 | "display given CPUs in map"), |
2002 | OPT_PARENT(sched_options) | 2866 | OPT_PARENT(sched_options) |
2003 | }; | 2867 | }; |
2868 | const struct option timehist_options[] = { | ||
2869 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, | ||
2870 | "file", "vmlinux pathname"), | ||
2871 | OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, | ||
2872 | "file", "kallsyms pathname"), | ||
2873 | OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, | ||
2874 | "Display call chains if present (default on)"), | ||
2875 | OPT_UINTEGER(0, "max-stack", &sched.max_stack, | ||
2876 | "Maximum number of functions to display backtrace."), | ||
2877 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", | ||
2878 | "Look for files with symbols relative to this directory"), | ||
2879 | OPT_BOOLEAN('s', "summary", &sched.summary_only, | ||
2880 | "Show only syscall summary with statistics"), | ||
2881 | OPT_BOOLEAN('S', "with-summary", &sched.summary, | ||
2882 | "Show all syscalls and summary with statistics"), | ||
2883 | OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), | ||
2884 | OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), | ||
2885 | OPT_PARENT(sched_options) | ||
2886 | }; | ||
2887 | |||
2004 | const char * const latency_usage[] = { | 2888 | const char * const latency_usage[] = { |
2005 | "perf sched latency [<options>]", | 2889 | "perf sched latency [<options>]", |
2006 | NULL | 2890 | NULL |
@@ -2013,8 +2897,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2013 | "perf sched map [<options>]", | 2897 | "perf sched map [<options>]", |
2014 | NULL | 2898 | NULL |
2015 | }; | 2899 | }; |
2900 | const char * const timehist_usage[] = { | ||
2901 | "perf sched timehist [<options>]", | ||
2902 | NULL | ||
2903 | }; | ||
2016 | const char *const sched_subcommands[] = { "record", "latency", "map", | 2904 | const char *const sched_subcommands[] = { "record", "latency", "map", |
2017 | "replay", "script", NULL }; | 2905 | "replay", "script", |
2906 | "timehist", NULL }; | ||
2018 | const char *sched_usage[] = { | 2907 | const char *sched_usage[] = { |
2019 | NULL, | 2908 | NULL, |
2020 | NULL | 2909 | NULL |
@@ -2077,6 +2966,21 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2077 | usage_with_options(replay_usage, replay_options); | 2966 | usage_with_options(replay_usage, replay_options); |
2078 | } | 2967 | } |
2079 | return perf_sched__replay(&sched); | 2968 | return perf_sched__replay(&sched); |
2969 | } else if (!strcmp(argv[0], "timehist")) { | ||
2970 | if (argc) { | ||
2971 | argc = parse_options(argc, argv, timehist_options, | ||
2972 | timehist_usage, 0); | ||
2973 | if (argc) | ||
2974 | usage_with_options(timehist_usage, timehist_options); | ||
2975 | } | ||
2976 | if (sched.show_wakeups && sched.summary_only) { | ||
2977 | pr_err(" Error: -s and -w are mutually exclusive.\n"); | ||
2978 | parse_options_usage(timehist_usage, timehist_options, "s", true); | ||
2979 | parse_options_usage(NULL, timehist_options, "w", true); | ||
2980 | return -EINVAL; | ||
2981 | } | ||
2982 | |||
2983 | return perf_sched__timehist(&sched); | ||
2080 | } else { | 2984 | } else { |
2081 | usage_with_options(sched_usage, sched_options); | 2985 | usage_with_options(sched_usage, sched_options); |
2082 | } | 2986 | } |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fe3af9535e85..3df4178ba378 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -130,7 +130,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) | |||
130 | return err; | 130 | return err; |
131 | } | 131 | } |
132 | 132 | ||
133 | err = symbol__disassemble(sym, map, 0); | 133 | err = symbol__disassemble(sym, map, NULL, 0); |
134 | if (err == 0) { | 134 | if (err == 0) { |
135 | out_assign: | 135 | out_assign: |
136 | top->sym_filter_entry = he; | 136 | top->sym_filter_entry = he; |
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4c18271c71c9..e6e9f7d80dbd 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c | |||
@@ -1050,7 +1050,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, | |||
1050 | (nr_pcnt - 1); | 1050 | (nr_pcnt - 1); |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | err = symbol__disassemble(sym, map, sizeof_bdl); | 1053 | err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl); |
1054 | if (err) { | 1054 | if (err) { |
1055 | char msg[BUFSIZ]; | 1055 | char msg[BUFSIZ]; |
1056 | symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); | 1056 | symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); |
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 42d319927762..8c9308ac30b7 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c | |||
@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, | |||
167 | if (map->dso->annotate_warned) | 167 | if (map->dso->annotate_warned) |
168 | return -1; | 168 | return -1; |
169 | 169 | ||
170 | err = symbol__disassemble(sym, map, 0); | 170 | err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0); |
171 | if (err) { | 171 | if (err) { |
172 | char msg[BUFSIZ]; | 172 | char msg[BUFSIZ]; |
173 | symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); | 173 | symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); |
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index aeb5a441bd74..095d90a9077f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
@@ -18,17 +18,61 @@ | |||
18 | #include "annotate.h" | 18 | #include "annotate.h" |
19 | #include "evsel.h" | 19 | #include "evsel.h" |
20 | #include "block-range.h" | 20 | #include "block-range.h" |
21 | #include "arch/common.h" | ||
21 | #include <regex.h> | 22 | #include <regex.h> |
22 | #include <pthread.h> | 23 | #include <pthread.h> |
23 | #include <linux/bitops.h> | 24 | #include <linux/bitops.h> |
25 | #include <sys/utsname.h> | ||
24 | 26 | ||
25 | const char *disassembler_style; | 27 | const char *disassembler_style; |
26 | const char *objdump_path; | 28 | const char *objdump_path; |
27 | static regex_t file_lineno; | 29 | static regex_t file_lineno; |
28 | 30 | ||
29 | static struct ins *ins__find(const char *name); | 31 | static struct ins *ins__find(struct arch *arch, const char *name); |
30 | static int disasm_line__parse(char *line, char **namep, char **rawp); | 32 | static int disasm_line__parse(char *line, char **namep, char **rawp); |
31 | 33 | ||
34 | struct arch { | ||
35 | const char *name; | ||
36 | struct ins *instructions; | ||
37 | size_t nr_instructions; | ||
38 | bool sorted_instructions; | ||
39 | struct { | ||
40 | char comment_char; | ||
41 | char skip_functions_char; | ||
42 | } objdump; | ||
43 | }; | ||
44 | |||
45 | static struct ins_ops call_ops; | ||
46 | static struct ins_ops dec_ops; | ||
47 | static struct ins_ops jump_ops; | ||
48 | static struct ins_ops mov_ops; | ||
49 | static struct ins_ops nop_ops; | ||
50 | static struct ins_ops lock_ops; | ||
51 | static struct ins_ops ret_ops; | ||
52 | |||
53 | #include "arch/arm/annotate/instructions.c" | ||
54 | #include "arch/x86/annotate/instructions.c" | ||
55 | |||
56 | static struct arch architectures[] = { | ||
57 | { | ||
58 | .name = "arm", | ||
59 | .instructions = arm__instructions, | ||
60 | .nr_instructions = ARRAY_SIZE(arm__instructions), | ||
61 | .objdump = { | ||
62 | .comment_char = ';', | ||
63 | .skip_functions_char = '+', | ||
64 | }, | ||
65 | }, | ||
66 | { | ||
67 | .name = "x86", | ||
68 | .instructions = x86__instructions, | ||
69 | .nr_instructions = ARRAY_SIZE(x86__instructions), | ||
70 | .objdump = { | ||
71 | .comment_char = '#', | ||
72 | }, | ||
73 | }, | ||
74 | }; | ||
75 | |||
32 | static void ins__delete(struct ins_operands *ops) | 76 | static void ins__delete(struct ins_operands *ops) |
33 | { | 77 | { |
34 | if (ops == NULL) | 78 | if (ops == NULL) |
@@ -54,7 +98,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size, | |||
54 | return ins__raw_scnprintf(ins, bf, size, ops); | 98 | return ins__raw_scnprintf(ins, bf, size, ops); |
55 | } | 99 | } |
56 | 100 | ||
57 | static int call__parse(struct ins_operands *ops, struct map *map) | 101 | static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map) |
58 | { | 102 | { |
59 | char *endptr, *tok, *name; | 103 | char *endptr, *tok, *name; |
60 | 104 | ||
@@ -66,10 +110,9 @@ static int call__parse(struct ins_operands *ops, struct map *map) | |||
66 | 110 | ||
67 | name++; | 111 | name++; |
68 | 112 | ||
69 | #ifdef __arm__ | 113 | if (arch->objdump.skip_functions_char && |
70 | if (strchr(name, '+')) | 114 | strchr(name, arch->objdump.skip_functions_char)) |
71 | return -1; | 115 | return -1; |
72 | #endif | ||
73 | 116 | ||
74 | tok = strchr(name, '>'); | 117 | tok = strchr(name, '>'); |
75 | if (tok == NULL) | 118 | if (tok == NULL) |
@@ -118,7 +161,7 @@ bool ins__is_call(const struct ins *ins) | |||
118 | return ins->ops == &call_ops; | 161 | return ins->ops == &call_ops; |
119 | } | 162 | } |
120 | 163 | ||
121 | static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused) | 164 | static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) |
122 | { | 165 | { |
123 | const char *s = strchr(ops->raw, '+'); | 166 | const char *s = strchr(ops->raw, '+'); |
124 | 167 | ||
@@ -173,7 +216,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) | |||
173 | return 0; | 216 | return 0; |
174 | } | 217 | } |
175 | 218 | ||
176 | static int lock__parse(struct ins_operands *ops, struct map *map) | 219 | static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map) |
177 | { | 220 | { |
178 | char *name; | 221 | char *name; |
179 | 222 | ||
@@ -184,7 +227,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map) | |||
184 | if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) | 227 | if (disasm_line__parse(ops->raw, &name, &ops->locked.ops->raw) < 0) |
185 | goto out_free_ops; | 228 | goto out_free_ops; |
186 | 229 | ||
187 | ops->locked.ins = ins__find(name); | 230 | ops->locked.ins = ins__find(arch, name); |
188 | free(name); | 231 | free(name); |
189 | 232 | ||
190 | if (ops->locked.ins == NULL) | 233 | if (ops->locked.ins == NULL) |
@@ -194,7 +237,7 @@ static int lock__parse(struct ins_operands *ops, struct map *map) | |||
194 | return 0; | 237 | return 0; |
195 | 238 | ||
196 | if (ops->locked.ins->ops->parse && | 239 | if (ops->locked.ins->ops->parse && |
197 | ops->locked.ins->ops->parse(ops->locked.ops, map) < 0) | 240 | ops->locked.ins->ops->parse(arch, ops->locked.ops, map) < 0) |
198 | goto out_free_ops; | 241 | goto out_free_ops; |
199 | 242 | ||
200 | return 0; | 243 | return 0; |
@@ -237,7 +280,7 @@ static struct ins_ops lock_ops = { | |||
237 | .scnprintf = lock__scnprintf, | 280 | .scnprintf = lock__scnprintf, |
238 | }; | 281 | }; |
239 | 282 | ||
240 | static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) | 283 | static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused) |
241 | { | 284 | { |
242 | char *s = strchr(ops->raw, ','), *target, *comment, prev; | 285 | char *s = strchr(ops->raw, ','), *target, *comment, prev; |
243 | 286 | ||
@@ -252,11 +295,7 @@ static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused) | |||
252 | return -1; | 295 | return -1; |
253 | 296 | ||
254 | target = ++s; | 297 | target = ++s; |
255 | #ifdef __arm__ | 298 | comment = strchr(s, arch->objdump.comment_char); |
256 | comment = strchr(s, ';'); | ||
257 | #else | ||
258 | comment = strchr(s, '#'); | ||
259 | #endif | ||
260 | 299 | ||
261 | if (comment != NULL) | 300 | if (comment != NULL) |
262 | s = comment - 1; | 301 | s = comment - 1; |
@@ -304,7 +343,7 @@ static struct ins_ops mov_ops = { | |||
304 | .scnprintf = mov__scnprintf, | 343 | .scnprintf = mov__scnprintf, |
305 | }; | 344 | }; |
306 | 345 | ||
307 | static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused) | 346 | static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) |
308 | { | 347 | { |
309 | char *target, *comment, *s, prev; | 348 | char *target, *comment, *s, prev; |
310 | 349 | ||
@@ -364,99 +403,6 @@ bool ins__is_ret(const struct ins *ins) | |||
364 | return ins->ops == &ret_ops; | 403 | return ins->ops == &ret_ops; |
365 | } | 404 | } |
366 | 405 | ||
367 | static struct ins instructions[] = { | ||
368 | { .name = "add", .ops = &mov_ops, }, | ||
369 | { .name = "addl", .ops = &mov_ops, }, | ||
370 | { .name = "addq", .ops = &mov_ops, }, | ||
371 | { .name = "addw", .ops = &mov_ops, }, | ||
372 | { .name = "and", .ops = &mov_ops, }, | ||
373 | #ifdef __arm__ | ||
374 | { .name = "b", .ops = &jump_ops, }, // might also be a call | ||
375 | { .name = "bcc", .ops = &jump_ops, }, | ||
376 | { .name = "bcs", .ops = &jump_ops, }, | ||
377 | { .name = "beq", .ops = &jump_ops, }, | ||
378 | { .name = "bge", .ops = &jump_ops, }, | ||
379 | { .name = "bgt", .ops = &jump_ops, }, | ||
380 | { .name = "bhi", .ops = &jump_ops, }, | ||
381 | { .name = "bl", .ops = &call_ops, }, | ||
382 | { .name = "bls", .ops = &jump_ops, }, | ||
383 | { .name = "blt", .ops = &jump_ops, }, | ||
384 | { .name = "blx", .ops = &call_ops, }, | ||
385 | { .name = "bne", .ops = &jump_ops, }, | ||
386 | #endif | ||
387 | { .name = "bts", .ops = &mov_ops, }, | ||
388 | { .name = "call", .ops = &call_ops, }, | ||
389 | { .name = "callq", .ops = &call_ops, }, | ||
390 | { .name = "cmp", .ops = &mov_ops, }, | ||
391 | { .name = "cmpb", .ops = &mov_ops, }, | ||
392 | { .name = "cmpl", .ops = &mov_ops, }, | ||
393 | { .name = "cmpq", .ops = &mov_ops, }, | ||
394 | { .name = "cmpw", .ops = &mov_ops, }, | ||
395 | { .name = "cmpxch", .ops = &mov_ops, }, | ||
396 | { .name = "dec", .ops = &dec_ops, }, | ||
397 | { .name = "decl", .ops = &dec_ops, }, | ||
398 | { .name = "imul", .ops = &mov_ops, }, | ||
399 | { .name = "inc", .ops = &dec_ops, }, | ||
400 | { .name = "incl", .ops = &dec_ops, }, | ||
401 | { .name = "ja", .ops = &jump_ops, }, | ||
402 | { .name = "jae", .ops = &jump_ops, }, | ||
403 | { .name = "jb", .ops = &jump_ops, }, | ||
404 | { .name = "jbe", .ops = &jump_ops, }, | ||
405 | { .name = "jc", .ops = &jump_ops, }, | ||
406 | { .name = "jcxz", .ops = &jump_ops, }, | ||
407 | { .name = "je", .ops = &jump_ops, }, | ||
408 | { .name = "jecxz", .ops = &jump_ops, }, | ||
409 | { .name = "jg", .ops = &jump_ops, }, | ||
410 | { .name = "jge", .ops = &jump_ops, }, | ||
411 | { .name = "jl", .ops = &jump_ops, }, | ||
412 | { .name = "jle", .ops = &jump_ops, }, | ||
413 | { .name = "jmp", .ops = &jump_ops, }, | ||
414 | { .name = "jmpq", .ops = &jump_ops, }, | ||
415 | { .name = "jna", .ops = &jump_ops, }, | ||
416 | { .name = "jnae", .ops = &jump_ops, }, | ||
417 | { .name = "jnb", .ops = &jump_ops, }, | ||
418 | { .name = "jnbe", .ops = &jump_ops, }, | ||
419 | { .name = "jnc", .ops = &jump_ops, }, | ||
420 | { .name = "jne", .ops = &jump_ops, }, | ||
421 | { .name = "jng", .ops = &jump_ops, }, | ||
422 | { .name = "jnge", .ops = &jump_ops, }, | ||
423 | { .name = "jnl", .ops = &jump_ops, }, | ||
424 | { .name = "jnle", .ops = &jump_ops, }, | ||
425 | { .name = "jno", .ops = &jump_ops, }, | ||
426 | { .name = "jnp", .ops = &jump_ops, }, | ||
427 | { .name = "jns", .ops = &jump_ops, }, | ||
428 | { .name = "jnz", .ops = &jump_ops, }, | ||
429 | { .name = "jo", .ops = &jump_ops, }, | ||
430 | { .name = "jp", .ops = &jump_ops, }, | ||
431 | { .name = "jpe", .ops = &jump_ops, }, | ||
432 | { .name = "jpo", .ops = &jump_ops, }, | ||
433 | { .name = "jrcxz", .ops = &jump_ops, }, | ||
434 | { .name = "js", .ops = &jump_ops, }, | ||
435 | { .name = "jz", .ops = &jump_ops, }, | ||
436 | { .name = "lea", .ops = &mov_ops, }, | ||
437 | { .name = "lock", .ops = &lock_ops, }, | ||
438 | { .name = "mov", .ops = &mov_ops, }, | ||
439 | { .name = "movb", .ops = &mov_ops, }, | ||
440 | { .name = "movdqa",.ops = &mov_ops, }, | ||
441 | { .name = "movl", .ops = &mov_ops, }, | ||
442 | { .name = "movq", .ops = &mov_ops, }, | ||
443 | { .name = "movslq", .ops = &mov_ops, }, | ||
444 | { .name = "movzbl", .ops = &mov_ops, }, | ||
445 | { .name = "movzwl", .ops = &mov_ops, }, | ||
446 | { .name = "nop", .ops = &nop_ops, }, | ||
447 | { .name = "nopl", .ops = &nop_ops, }, | ||
448 | { .name = "nopw", .ops = &nop_ops, }, | ||
449 | { .name = "or", .ops = &mov_ops, }, | ||
450 | { .name = "orl", .ops = &mov_ops, }, | ||
451 | { .name = "test", .ops = &mov_ops, }, | ||
452 | { .name = "testb", .ops = &mov_ops, }, | ||
453 | { .name = "testl", .ops = &mov_ops, }, | ||
454 | { .name = "xadd", .ops = &mov_ops, }, | ||
455 | { .name = "xbeginl", .ops = &jump_ops, }, | ||
456 | { .name = "xbeginq", .ops = &jump_ops, }, | ||
457 | { .name = "retq", .ops = &ret_ops, }, | ||
458 | }; | ||
459 | |||
460 | static int ins__key_cmp(const void *name, const void *insp) | 406 | static int ins__key_cmp(const void *name, const void *insp) |
461 | { | 407 | { |
462 | const struct ins *ins = insp; | 408 | const struct ins *ins = insp; |
@@ -472,24 +418,58 @@ static int ins__cmp(const void *a, const void *b) | |||
472 | return strcmp(ia->name, ib->name); | 418 | return strcmp(ia->name, ib->name); |
473 | } | 419 | } |
474 | 420 | ||
475 | static void ins__sort(void) | 421 | static void ins__sort(struct arch *arch) |
476 | { | 422 | { |
477 | const int nmemb = ARRAY_SIZE(instructions); | 423 | const int nmemb = arch->nr_instructions; |
478 | 424 | ||
479 | qsort(instructions, nmemb, sizeof(struct ins), ins__cmp); | 425 | qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); |
480 | } | 426 | } |
481 | 427 | ||
482 | static struct ins *ins__find(const char *name) | 428 | static struct ins *ins__find(struct arch *arch, const char *name) |
483 | { | 429 | { |
484 | const int nmemb = ARRAY_SIZE(instructions); | 430 | const int nmemb = arch->nr_instructions; |
431 | |||
432 | if (!arch->sorted_instructions) { | ||
433 | ins__sort(arch); | ||
434 | arch->sorted_instructions = true; | ||
435 | } | ||
436 | |||
437 | return bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); | ||
438 | } | ||
439 | |||
440 | static int arch__key_cmp(const void *name, const void *archp) | ||
441 | { | ||
442 | const struct arch *arch = archp; | ||
443 | |||
444 | return strcmp(name, arch->name); | ||
445 | } | ||
446 | |||
447 | static int arch__cmp(const void *a, const void *b) | ||
448 | { | ||
449 | const struct arch *aa = a; | ||
450 | const struct arch *ab = b; | ||
451 | |||
452 | return strcmp(aa->name, ab->name); | ||
453 | } | ||
454 | |||
455 | static void arch__sort(void) | ||
456 | { | ||
457 | const int nmemb = ARRAY_SIZE(architectures); | ||
458 | |||
459 | qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); | ||
460 | } | ||
461 | |||
462 | static struct arch *arch__find(const char *name) | ||
463 | { | ||
464 | const int nmemb = ARRAY_SIZE(architectures); | ||
485 | static bool sorted; | 465 | static bool sorted; |
486 | 466 | ||
487 | if (!sorted) { | 467 | if (!sorted) { |
488 | ins__sort(); | 468 | arch__sort(); |
489 | sorted = true; | 469 | sorted = true; |
490 | } | 470 | } |
491 | 471 | ||
492 | return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp); | 472 | return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); |
493 | } | 473 | } |
494 | 474 | ||
495 | int symbol__alloc_hist(struct symbol *sym) | 475 | int symbol__alloc_hist(struct symbol *sym) |
@@ -709,9 +689,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) | |||
709 | return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); | 689 | return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); |
710 | } | 690 | } |
711 | 691 | ||
712 | static void disasm_line__init_ins(struct disasm_line *dl, struct map *map) | 692 | static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map) |
713 | { | 693 | { |
714 | dl->ins = ins__find(dl->name); | 694 | dl->ins = ins__find(arch, dl->name); |
715 | 695 | ||
716 | if (dl->ins == NULL) | 696 | if (dl->ins == NULL) |
717 | return; | 697 | return; |
@@ -719,7 +699,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct map *map) | |||
719 | if (!dl->ins->ops) | 699 | if (!dl->ins->ops) |
720 | return; | 700 | return; |
721 | 701 | ||
722 | if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0) | 702 | if (dl->ins->ops->parse && dl->ins->ops->parse(arch, &dl->ops, map) < 0) |
723 | dl->ins = NULL; | 703 | dl->ins = NULL; |
724 | } | 704 | } |
725 | 705 | ||
@@ -762,6 +742,7 @@ out_free_name: | |||
762 | 742 | ||
763 | static struct disasm_line *disasm_line__new(s64 offset, char *line, | 743 | static struct disasm_line *disasm_line__new(s64 offset, char *line, |
764 | size_t privsize, int line_nr, | 744 | size_t privsize, int line_nr, |
745 | struct arch *arch, | ||
765 | struct map *map) | 746 | struct map *map) |
766 | { | 747 | { |
767 | struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); | 748 | struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); |
@@ -777,7 +758,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, | |||
777 | if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) | 758 | if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0) |
778 | goto out_free_line; | 759 | goto out_free_line; |
779 | 760 | ||
780 | disasm_line__init_ins(dl, map); | 761 | disasm_line__init_ins(dl, arch, map); |
781 | } | 762 | } |
782 | } | 763 | } |
783 | 764 | ||
@@ -1087,6 +1068,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st | |||
1087 | * The ops.raw part will be parsed further according to type of the instruction. | 1068 | * The ops.raw part will be parsed further according to type of the instruction. |
1088 | */ | 1069 | */ |
1089 | static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, | 1070 | static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, |
1071 | struct arch *arch, | ||
1090 | FILE *file, size_t privsize, | 1072 | FILE *file, size_t privsize, |
1091 | int *line_nr) | 1073 | int *line_nr) |
1092 | { | 1074 | { |
@@ -1149,7 +1131,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, | |||
1149 | parsed_line = tmp2 + 1; | 1131 | parsed_line = tmp2 + 1; |
1150 | } | 1132 | } |
1151 | 1133 | ||
1152 | dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map); | 1134 | dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); |
1153 | free(line); | 1135 | free(line); |
1154 | (*line_nr)++; | 1136 | (*line_nr)++; |
1155 | 1137 | ||
@@ -1280,10 +1262,23 @@ fallback: | |||
1280 | return 0; | 1262 | return 0; |
1281 | } | 1263 | } |
1282 | 1264 | ||
1283 | int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) | 1265 | static const char *annotate__norm_arch(const char *arch_name) |
1266 | { | ||
1267 | struct utsname uts; | ||
1268 | |||
1269 | if (!arch_name) { /* Assume we are annotating locally. */ | ||
1270 | if (uname(&uts) < 0) | ||
1271 | return NULL; | ||
1272 | arch_name = uts.machine; | ||
1273 | } | ||
1274 | return normalize_arch((char *)arch_name); | ||
1275 | } | ||
1276 | |||
1277 | int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize) | ||
1284 | { | 1278 | { |
1285 | struct dso *dso = map->dso; | 1279 | struct dso *dso = map->dso; |
1286 | char command[PATH_MAX * 2]; | 1280 | char command[PATH_MAX * 2]; |
1281 | struct arch *arch = NULL; | ||
1287 | FILE *file; | 1282 | FILE *file; |
1288 | char symfs_filename[PATH_MAX]; | 1283 | char symfs_filename[PATH_MAX]; |
1289 | struct kcore_extract kce; | 1284 | struct kcore_extract kce; |
@@ -1297,6 +1292,14 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) | |||
1297 | if (err) | 1292 | if (err) |
1298 | return err; | 1293 | return err; |
1299 | 1294 | ||
1295 | arch_name = annotate__norm_arch(arch_name); | ||
1296 | if (!arch_name) | ||
1297 | return -1; | ||
1298 | |||
1299 | arch = arch__find(arch_name); | ||
1300 | if (arch == NULL) | ||
1301 | return -ENOTSUP; | ||
1302 | |||
1300 | pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, | 1303 | pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, |
1301 | symfs_filename, sym->name, map->unmap_ip(map, sym->start), | 1304 | symfs_filename, sym->name, map->unmap_ip(map, sym->start), |
1302 | map->unmap_ip(map, sym->end)); | 1305 | map->unmap_ip(map, sym->end)); |
@@ -1395,7 +1398,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) | |||
1395 | 1398 | ||
1396 | nline = 0; | 1399 | nline = 0; |
1397 | while (!feof(file)) { | 1400 | while (!feof(file)) { |
1398 | if (symbol__parse_objdump_line(sym, map, file, privsize, | 1401 | if (symbol__parse_objdump_line(sym, map, arch, file, privsize, |
1399 | &lineno) < 0) | 1402 | &lineno) < 0) |
1400 | break; | 1403 | break; |
1401 | nline++; | 1404 | nline++; |
@@ -1793,7 +1796,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, | |||
1793 | struct rb_root source_line = RB_ROOT; | 1796 | struct rb_root source_line = RB_ROOT; |
1794 | u64 len; | 1797 | u64 len; |
1795 | 1798 | ||
1796 | if (symbol__disassemble(sym, map, 0) < 0) | 1799 | if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0) |
1797 | return -1; | 1800 | return -1; |
1798 | 1801 | ||
1799 | len = symbol__size(sym); | 1802 | len = symbol__size(sym); |
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 5bbcec173b82..8e490b5c91bc 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h | |||
@@ -34,9 +34,11 @@ struct ins_operands { | |||
34 | }; | 34 | }; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct arch; | ||
38 | |||
37 | struct ins_ops { | 39 | struct ins_ops { |
38 | void (*free)(struct ins_operands *ops); | 40 | void (*free)(struct ins_operands *ops); |
39 | int (*parse)(struct ins_operands *ops, struct map *map); | 41 | int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map); |
40 | int (*scnprintf)(struct ins *ins, char *bf, size_t size, | 42 | int (*scnprintf)(struct ins *ins, char *bf, size_t size, |
41 | struct ins_operands *ops); | 43 | struct ins_operands *ops); |
42 | }; | 44 | }; |
@@ -156,7 +158,7 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); | |||
156 | int symbol__alloc_hist(struct symbol *sym); | 158 | int symbol__alloc_hist(struct symbol *sym); |
157 | void symbol__annotate_zero_histograms(struct symbol *sym); | 159 | void symbol__annotate_zero_histograms(struct symbol *sym); |
158 | 160 | ||
159 | int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize); | 161 | int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize); |
160 | 162 | ||
161 | enum symbol_disassemble_errno { | 163 | enum symbol_disassemble_errno { |
162 | SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, | 164 | SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e58a2fbf3b16..b2365a63db45 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -1481,7 +1481,7 @@ retry_sample_id: | |||
1481 | 1481 | ||
1482 | group_fd = get_group_fd(evsel, cpu, thread); | 1482 | group_fd = get_group_fd(evsel, cpu, thread); |
1483 | retry_open: | 1483 | retry_open: |
1484 | pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", | 1484 | pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", |
1485 | pid, cpus->map[cpu], group_fd, flags); | 1485 | pid, cpus->map[cpu], group_fd, flags); |
1486 | 1486 | ||
1487 | FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, | 1487 | FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, |
@@ -1490,11 +1490,13 @@ retry_open: | |||
1490 | group_fd, flags); | 1490 | group_fd, flags); |
1491 | if (FD(evsel, cpu, thread) < 0) { | 1491 | if (FD(evsel, cpu, thread) < 0) { |
1492 | err = -errno; | 1492 | err = -errno; |
1493 | pr_debug2("sys_perf_event_open failed, error %d\n", | 1493 | pr_debug2("\nsys_perf_event_open failed, error %d\n", |
1494 | err); | 1494 | err); |
1495 | goto try_fallback; | 1495 | goto try_fallback; |
1496 | } | 1496 | } |
1497 | 1497 | ||
1498 | pr_debug2(" = %d\n", FD(evsel, cpu, thread)); | ||
1499 | |||
1498 | if (evsel->bpf_fd >= 0) { | 1500 | if (evsel->bpf_fd >= 0) { |
1499 | int evt_fd = FD(evsel, cpu, thread); | 1501 | int evt_fd = FD(evsel, cpu, thread); |
1500 | int bpf_fd = evsel->bpf_fd; | 1502 | int bpf_fd = evsel->bpf_fd; |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8cd7cd227483..27fa3a343577 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -391,6 +391,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, | |||
391 | #define EVSEL__PRINT_ONELINE (1<<4) | 391 | #define EVSEL__PRINT_ONELINE (1<<4) |
392 | #define EVSEL__PRINT_SRCLINE (1<<5) | 392 | #define EVSEL__PRINT_SRCLINE (1<<5) |
393 | #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) | 393 | #define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) |
394 | #define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) | ||
394 | 395 | ||
395 | struct callchain_cursor; | 396 | struct callchain_cursor; |
396 | 397 | ||
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 662a0a6182e7..53bb614feafb 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c | |||
@@ -108,7 +108,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, | |||
108 | int print_oneline = print_opts & EVSEL__PRINT_ONELINE; | 108 | int print_oneline = print_opts & EVSEL__PRINT_ONELINE; |
109 | int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; | 109 | int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; |
110 | int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; | 110 | int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; |
111 | int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW; | ||
111 | char s = print_oneline ? ' ' : '\t'; | 112 | char s = print_oneline ? ' ' : '\t'; |
113 | bool first = true; | ||
112 | 114 | ||
113 | if (sample->callchain) { | 115 | if (sample->callchain) { |
114 | struct addr_location node_al; | 116 | struct addr_location node_al; |
@@ -124,6 +126,9 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, | |||
124 | 126 | ||
125 | printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); | 127 | printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); |
126 | 128 | ||
129 | if (print_arrow && !first) | ||
130 | printed += fprintf(fp, " <-"); | ||
131 | |||
127 | if (print_ip) | 132 | if (print_ip) |
128 | printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); | 133 | printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); |
129 | 134 | ||
@@ -137,7 +142,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, | |||
137 | 142 | ||
138 | if (print_symoffset) { | 143 | if (print_symoffset) { |
139 | printed += __symbol__fprintf_symname_offs(node->sym, &node_al, | 144 | printed += __symbol__fprintf_symname_offs(node->sym, &node_al, |
140 | print_unknown_as_addr, fp); | 145 | print_unknown_as_addr, |
146 | true, fp); | ||
141 | } else { | 147 | } else { |
142 | printed += __symbol__fprintf_symname(node->sym, &node_al, | 148 | printed += __symbol__fprintf_symname(node->sym, &node_al, |
143 | print_unknown_as_addr, fp); | 149 | print_unknown_as_addr, fp); |
@@ -157,6 +163,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, | |||
157 | printed += fprintf(fp, "\n"); | 163 | printed += fprintf(fp, "\n"); |
158 | 164 | ||
159 | callchain_cursor_advance(cursor); | 165 | callchain_cursor_advance(cursor); |
166 | first = false; | ||
160 | } | 167 | } |
161 | } | 168 | } |
162 | 169 | ||
@@ -188,7 +195,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, | |||
188 | printed += fprintf(fp, " "); | 195 | printed += fprintf(fp, " "); |
189 | if (print_symoffset) { | 196 | if (print_symoffset) { |
190 | printed += __symbol__fprintf_symname_offs(al->sym, al, | 197 | printed += __symbol__fprintf_symname_offs(al->sym, al, |
191 | print_unknown_as_addr, fp); | 198 | print_unknown_as_addr, |
199 | true, fp); | ||
192 | } else { | 200 | } else { |
193 | printed += __symbol__fprintf_symname(al->sym, al, | 201 | printed += __symbol__fprintf_symname(al->sym, al, |
194 | print_unknown_as_addr, fp); | 202 | print_unknown_as_addr, fp); |
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index e50773286ef6..1d4ab53c60ca 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c | |||
@@ -280,6 +280,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) | |||
280 | u64 lock = data_src->mem_lock; | 280 | u64 lock = data_src->mem_lock; |
281 | int err = 0; | 281 | int err = 0; |
282 | 282 | ||
283 | #define HITM_INC(__f) \ | ||
284 | do { \ | ||
285 | stats->__f++; \ | ||
286 | stats->tot_hitm++; \ | ||
287 | } while (0) | ||
288 | |||
283 | #define P(a, b) PERF_MEM_##a##_##b | 289 | #define P(a, b) PERF_MEM_##a##_##b |
284 | 290 | ||
285 | stats->nr_entries++; | 291 | stats->nr_entries++; |
@@ -303,7 +309,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) | |||
303 | if (lvl & P(LVL, L2 )) stats->ld_l2hit++; | 309 | if (lvl & P(LVL, L2 )) stats->ld_l2hit++; |
304 | if (lvl & P(LVL, L3 )) { | 310 | if (lvl & P(LVL, L3 )) { |
305 | if (snoop & P(SNOOP, HITM)) | 311 | if (snoop & P(SNOOP, HITM)) |
306 | stats->lcl_hitm++; | 312 | HITM_INC(lcl_hitm); |
307 | else | 313 | else |
308 | stats->ld_llchit++; | 314 | stats->ld_llchit++; |
309 | } | 315 | } |
@@ -331,7 +337,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) | |||
331 | if (snoop & P(SNOOP, HIT)) | 337 | if (snoop & P(SNOOP, HIT)) |
332 | stats->rmt_hit++; | 338 | stats->rmt_hit++; |
333 | else if (snoop & P(SNOOP, HITM)) | 339 | else if (snoop & P(SNOOP, HITM)) |
334 | stats->rmt_hitm++; | 340 | HITM_INC(rmt_hitm); |
335 | } | 341 | } |
336 | 342 | ||
337 | if ((lvl & P(LVL, MISS))) | 343 | if ((lvl & P(LVL, MISS))) |
@@ -364,6 +370,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) | |||
364 | } | 370 | } |
365 | 371 | ||
366 | #undef P | 372 | #undef P |
373 | #undef HITM_INC | ||
367 | return err; | 374 | return err; |
368 | } | 375 | } |
369 | 376 | ||
@@ -390,6 +397,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) | |||
390 | stats->ld_llchit += add->ld_llchit; | 397 | stats->ld_llchit += add->ld_llchit; |
391 | stats->lcl_hitm += add->lcl_hitm; | 398 | stats->lcl_hitm += add->lcl_hitm; |
392 | stats->rmt_hitm += add->rmt_hitm; | 399 | stats->rmt_hitm += add->rmt_hitm; |
400 | stats->tot_hitm += add->tot_hitm; | ||
393 | stats->rmt_hit += add->rmt_hit; | 401 | stats->rmt_hit += add->rmt_hit; |
394 | stats->lcl_dram += add->lcl_dram; | 402 | stats->lcl_dram += add->lcl_dram; |
395 | stats->rmt_dram += add->rmt_dram; | 403 | stats->rmt_dram += add->rmt_dram; |
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index faf80403b519..40f72ee4f42a 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h | |||
@@ -59,6 +59,7 @@ struct c2c_stats { | |||
59 | u32 ld_llchit; /* count of loads that hit LLC */ | 59 | u32 ld_llchit; /* count of loads that hit LLC */ |
60 | u32 lcl_hitm; /* count of loads with local HITM */ | 60 | u32 lcl_hitm; /* count of loads with local HITM */ |
61 | u32 rmt_hitm; /* count of loads with remote HITM */ | 61 | u32 rmt_hitm; /* count of loads with remote HITM */ |
62 | u32 tot_hitm; /* count of loads with local and remote HITM */ | ||
62 | u32 rmt_hit; /* count of loads with remote hit clean; */ | 63 | u32 rmt_hit; /* count of loads with remote hit clean; */ |
63 | u32 lcl_dram; /* count of loads miss to local DRAM */ | 64 | u32 lcl_dram; /* count of loads miss to local DRAM */ |
64 | u32 rmt_dram; /* count of loads miss to remote DRAM */ | 65 | u32 rmt_dram; /* count of loads miss to remote DRAM */ |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2d0a905c879a..dec7e2d44885 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -282,7 +282,8 @@ int symbol__annotation_init(void); | |||
282 | struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); | 282 | struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); |
283 | size_t __symbol__fprintf_symname_offs(const struct symbol *sym, | 283 | size_t __symbol__fprintf_symname_offs(const struct symbol *sym, |
284 | const struct addr_location *al, | 284 | const struct addr_location *al, |
285 | bool unknown_as_addr, FILE *fp); | 285 | bool unknown_as_addr, |
286 | bool print_offsets, FILE *fp); | ||
286 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | 287 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, |
287 | const struct addr_location *al, FILE *fp); | 288 | const struct addr_location *al, FILE *fp); |
288 | size_t __symbol__fprintf_symname(const struct symbol *sym, | 289 | size_t __symbol__fprintf_symname(const struct symbol *sym, |
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index a680bdaa65dc..7c6b33e8e2d2 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c | |||
@@ -15,14 +15,15 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp) | |||
15 | 15 | ||
16 | size_t __symbol__fprintf_symname_offs(const struct symbol *sym, | 16 | size_t __symbol__fprintf_symname_offs(const struct symbol *sym, |
17 | const struct addr_location *al, | 17 | const struct addr_location *al, |
18 | bool unknown_as_addr, FILE *fp) | 18 | bool unknown_as_addr, |
19 | bool print_offsets, FILE *fp) | ||
19 | { | 20 | { |
20 | unsigned long offset; | 21 | unsigned long offset; |
21 | size_t length; | 22 | size_t length; |
22 | 23 | ||
23 | if (sym && sym->name) { | 24 | if (sym && sym->name) { |
24 | length = fprintf(fp, "%s", sym->name); | 25 | length = fprintf(fp, "%s", sym->name); |
25 | if (al) { | 26 | if (al && print_offsets) { |
26 | if (al->addr < sym->end) | 27 | if (al->addr < sym->end) |
27 | offset = al->addr - sym->start; | 28 | offset = al->addr - sym->start; |
28 | else | 29 | else |
@@ -40,19 +41,19 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym, | |||
40 | const struct addr_location *al, | 41 | const struct addr_location *al, |
41 | FILE *fp) | 42 | FILE *fp) |
42 | { | 43 | { |
43 | return __symbol__fprintf_symname_offs(sym, al, false, fp); | 44 | return __symbol__fprintf_symname_offs(sym, al, false, true, fp); |
44 | } | 45 | } |
45 | 46 | ||
46 | size_t __symbol__fprintf_symname(const struct symbol *sym, | 47 | size_t __symbol__fprintf_symname(const struct symbol *sym, |
47 | const struct addr_location *al, | 48 | const struct addr_location *al, |
48 | bool unknown_as_addr, FILE *fp) | 49 | bool unknown_as_addr, FILE *fp) |
49 | { | 50 | { |
50 | return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp); | 51 | return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp); |
51 | } | 52 | } |
52 | 53 | ||
53 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) | 54 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) |
54 | { | 55 | { |
55 | return __symbol__fprintf_symname_offs(sym, NULL, false, fp); | 56 | return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp); |
56 | } | 57 | } |
57 | 58 | ||
58 | size_t dso__fprintf_symbols_by_name(struct dso *dso, | 59 | size_t dso__fprintf_symbols_by_name(struct dso *dso, |