aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-12 14:28:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-12 14:28:13 -0400
commite6328a7abe7f8fcd32e9d3bcbd14ff2161bf71c9 (patch)
tree12df1e085c05269a9289a5b7bcdad864993e29ad
parent33f82bda010224e908e23e59150b4d36904affe9 (diff)
parent770e96125515daf1c7bc179323f2e0d488dfe6ac (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates from Ingo Molnar: "Perf tooling updates and fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf annotate browser: Help for cycling thru hottest instructions with TAB/shift+TAB perf stat: Only auto-merge events that are PMU aliases perf test: Add test case for PERF_SAMPLE_PHYS_ADDR perf script: Support physical address perf mem: Support physical address perf sort: Add sort option for physical address perf tools: Support new sample type for physical address perf vendor events powerpc: Remove duplicate events perf intel-pt: Fix syntax in documentation of config option perf test powerpc: Fix 'Object code reading' test perf trace: Support syscall name globbing perf syscalltbl: Support glob matching on syscall names perf report: Calculate the average cycles of iterations
-rw-r--r--tools/include/uapi/linux/perf_event.h4
-rw-r--r--tools/perf/Documentation/intel-pt.txt2
-rw-r--r--tools/perf/Documentation/perf-mem.txt4
-rw-r--r--tools/perf/Documentation/perf-record.txt5
-rw-r--r--tools/perf/Documentation/perf-report.txt1
-rw-r--r--tools/perf/Documentation/perf-script.txt2
-rw-r--r--tools/perf/Documentation/perf-trace.txt2
-rw-r--r--tools/perf/builtin-mem.c97
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-script.c15
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/builtin-trace.c39
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/frontend.json7
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/other.json120
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pipeline.json7
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pmc.json7
-rw-r--r--tools/perf/tests/code-reading.c5
-rw-r--r--tools/perf/tests/sample-parsing.c6
-rw-r--r--tools/perf/ui/browsers/annotate.c3
-rw-r--r--tools/perf/ui/browsers/hists.c8
-rw-r--r--tools/perf/ui/stdio/hist.c10
-rw-r--r--tools/perf/util/callchain.c49
-rw-r--r--tools/perf/util/callchain.h9
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evsel.c19
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/machine.c96
-rw-r--r--tools/perf/util/parse-events.c24
-rw-r--r--tools/perf/util/session.c3
-rw-r--r--tools/perf/util/sort.c42
-rw-r--r--tools/perf/util/sort.h1
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/syscalltbl.c33
-rw-r--r--tools/perf/util/syscalltbl.h3
37 files changed, 368 insertions, 268 deletions
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 2a37ae925d85..140ae638cfd6 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
139 PERF_SAMPLE_IDENTIFIER = 1U << 16, 139 PERF_SAMPLE_IDENTIFIER = 1U << 16,
140 PERF_SAMPLE_TRANSACTION = 1U << 17, 140 PERF_SAMPLE_TRANSACTION = 1U << 17,
141 PERF_SAMPLE_REGS_INTR = 1U << 18, 141 PERF_SAMPLE_REGS_INTR = 1U << 18,
142 PERF_SAMPLE_PHYS_ADDR = 1U << 19,
142 143
143 PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ 144 PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
144}; 145};
145 146
146/* 147/*
@@ -814,6 +815,7 @@ enum perf_event_type {
814 * { u64 transaction; } && PERF_SAMPLE_TRANSACTION 815 * { u64 transaction; } && PERF_SAMPLE_TRANSACTION
815 * { u64 abi; # enum perf_sample_regs_abi 816 * { u64 abi; # enum perf_sample_regs_abi
816 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR 817 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
818 * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
817 * }; 819 * };
818 */ 820 */
819 PERF_RECORD_SAMPLE = 9, 821 PERF_RECORD_SAMPLE = 9,
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index ab1b0825130a..76971d2e4164 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter.
873 873
874 $ cat ~/.perfconfig 874 $ cat ~/.perfconfig
875 [intel-pt] 875 [intel-pt]
876 mispred-all 876 mispred-all = on
877 877
878 $ perf record -e intel_pt//u ./sort 3000 878 $ perf record -e intel_pt//u ./sort 3000
879 Bubble sorting array of 3000 elements 879 Bubble sorting array of 3000 elements
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 73496320fca3..4be08a1e3f8d 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -59,6 +59,10 @@ OPTIONS
59--ldload:: 59--ldload::
60 Specify desired latency for loads event. 60 Specify desired latency for loads event.
61 61
62-p::
63--phys-data::
64 Record/Report sample physical addresses
65
62SEE ALSO 66SEE ALSO
63-------- 67--------
64linkperf:perf-record[1], linkperf:perf-report[1] 68linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9bdea047c5db..e397453e5a46 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -249,7 +249,10 @@ OPTIONS
249 249
250-d:: 250-d::
251--data:: 251--data::
252 Record the sample addresses. 252 Record the sample virtual addresses.
253
254--phys-data::
255 Record the sample physical addresses.
253 256
254-T:: 257-T::
255--timestamp:: 258--timestamp::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9fa84617181e..383a98d992ed 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -137,6 +137,7 @@ OPTIONS
137 - mem: type of memory access for the data at the time of the sample 137 - mem: type of memory access for the data at the time of the sample
138 - snoop: type of snoop (if any) for the data at the time of the sample 138 - snoop: type of snoop (if any) for the data at the time of the sample
139 - dcacheline: the cacheline the data address is on at the time of the sample 139 - dcacheline: the cacheline the data address is on at the time of the sample
140 - phys_daddr: physical address of data being executed on at the time of sample
140 141
141 And the default sort keys are changed to local_weight, mem, sym, dso, 142 And the default sort keys are changed to local_weight, mem, sym, dso,
142 symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. 143 symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5ee8796be96e..18dfcfa38454 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
119 srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, 119 srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
120 callindent, insn, insnlen, synth. 120 callindent, insn, insnlen, synth, phys_addr.
121 Field list can be prepended with the type, trace, sw or hw, 121 Field list can be prepended with the type, trace, sw or hw,
122 to indicate to which event type the field list applies. 122 to indicate to which event type the field list applies.
123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace 123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index c1e3288a2dfb..d53bea6bd571 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -37,7 +37,7 @@ OPTIONS
37--expr:: 37--expr::
38--event:: 38--event::
39 List of syscalls and other perf events (tracepoints, HW cache events, 39 List of syscalls and other perf events (tracepoints, HW cache events,
40 etc) to show. 40 etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
41 See 'perf list' for a complete list of events. 41 See 'perf list' for a complete list of events.
42 Prefixing with ! shows all syscalls but the ones specified. You may 42 Prefixing with ! shows all syscalls but the ones specified. You may
43 need to escape it. 43 need to escape it.
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index e001c0290793..0f15634ef82c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -23,6 +23,7 @@ struct perf_mem {
23 bool hide_unresolved; 23 bool hide_unresolved;
24 bool dump_raw; 24 bool dump_raw;
25 bool force; 25 bool force;
26 bool phys_addr;
26 int operation; 27 int operation;
27 const char *cpu_list; 28 const char *cpu_list;
28 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 29 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
101 102
102 rec_argv[i++] = "-d"; 103 rec_argv[i++] = "-d";
103 104
105 if (mem->phys_addr)
106 rec_argv[i++] = "--phys-data";
107
104 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { 108 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
105 if (!perf_mem_events[j].record) 109 if (!perf_mem_events[j].record)
106 continue; 110 continue;
@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool,
161 if (al.map != NULL) 165 if (al.map != NULL)
162 al.map->dso->hit = 1; 166 al.map->dso->hit = 1;
163 167
164 if (symbol_conf.field_sep) { 168 if (mem->phys_addr) {
165 fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 169 if (symbol_conf.field_sep) {
166 "%s0x%"PRIx64"%s%s:%s\n"; 170 fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
171 "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
172 } else {
173 fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
174 "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
175 "%s%s:%s\n";
176 symbol_conf.field_sep = " ";
177 }
178
179 printf(fmt,
180 sample->pid,
181 symbol_conf.field_sep,
182 sample->tid,
183 symbol_conf.field_sep,
184 sample->ip,
185 symbol_conf.field_sep,
186 sample->addr,
187 symbol_conf.field_sep,
188 sample->phys_addr,
189 symbol_conf.field_sep,
190 sample->weight,
191 symbol_conf.field_sep,
192 sample->data_src,
193 symbol_conf.field_sep,
194 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
195 al.sym ? al.sym->name : "???");
167 } else { 196 } else {
168 fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 197 if (symbol_conf.field_sep) {
169 "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; 198 fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
170 symbol_conf.field_sep = " "; 199 "%s0x%"PRIx64"%s%s:%s\n";
171 } 200 } else {
201 fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
202 "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
203 symbol_conf.field_sep = " ";
204 }
172 205
173 printf(fmt, 206 printf(fmt,
174 sample->pid, 207 sample->pid,
175 symbol_conf.field_sep, 208 symbol_conf.field_sep,
176 sample->tid, 209 sample->tid,
177 symbol_conf.field_sep, 210 symbol_conf.field_sep,
178 sample->ip, 211 sample->ip,
179 symbol_conf.field_sep, 212 symbol_conf.field_sep,
180 sample->addr, 213 sample->addr,
181 symbol_conf.field_sep, 214 symbol_conf.field_sep,
182 sample->weight, 215 sample->weight,
183 symbol_conf.field_sep, 216 symbol_conf.field_sep,
184 sample->data_src, 217 sample->data_src,
185 symbol_conf.field_sep, 218 symbol_conf.field_sep,
186 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", 219 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
187 al.sym ? al.sym->name : "???"); 220 al.sym ? al.sym->name : "???");
221 }
188out_put: 222out_put:
189 addr_location__put(&al); 223 addr_location__put(&al);
190 return 0; 224 return 0;
@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem)
224 if (ret < 0) 258 if (ret < 0)
225 goto out_delete; 259 goto out_delete;
226 260
227 printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); 261 if (mem->phys_addr)
262 printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
263 else
264 printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
228 265
229 ret = perf_session__process_events(session); 266 ret = perf_session__process_events(session);
230 267
@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
254 * there is no weight (cost) associated with stores, so don't print 291 * there is no weight (cost) associated with stores, so don't print
255 * the column 292 * the column
256 */ 293 */
257 if (!(mem->operation & MEM_OPERATION_LOAD)) 294 if (!(mem->operation & MEM_OPERATION_LOAD)) {
258 rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," 295 if (mem->phys_addr)
259 "dso_daddr,tlb,locked"; 296 rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
297 "dso_daddr,tlb,locked,phys_daddr";
298 else
299 rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
300 "dso_daddr,tlb,locked";
301 } else if (mem->phys_addr)
302 rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
303 "dso_daddr,snoop,tlb,locked,phys_daddr";
260 304
261 for (j = 1; j < argc; j++, i++) 305 for (j = 1; j < argc; j++, i++)
262 rep_argv[i] = argv[j]; 306 rep_argv[i] = argv[j];
@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv)
373 "separator for columns, no spaces will be added" 417 "separator for columns, no spaces will be added"
374 " between columns '.' is reserved."), 418 " between columns '.' is reserved."),
375 OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), 419 OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
420 OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
376 OPT_END() 421 OPT_END()
377 }; 422 };
378 const char *const mem_subcommands[] = { "record", "report", NULL }; 423 const char *const mem_subcommands[] = { "record", "report", NULL };
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 36d7117a7562..56f8142ff97f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1604,6 +1604,8 @@ static struct option __record_options[] = {
1604 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1604 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1605 "per thread counts"), 1605 "per thread counts"),
1606 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1606 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1607 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1608 "Record the sample physical addresses"),
1607 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1609 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1608 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1610 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1609 &record.opts.sample_time_set, 1611 &record.opts.sample_time_set,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 378f76cdf923..3d4c3b5e1868 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -87,6 +87,7 @@ enum perf_output_field {
87 PERF_OUTPUT_BRSTACKINSN = 1U << 23, 87 PERF_OUTPUT_BRSTACKINSN = 1U << 23,
88 PERF_OUTPUT_BRSTACKOFF = 1U << 24, 88 PERF_OUTPUT_BRSTACKOFF = 1U << 24,
89 PERF_OUTPUT_SYNTH = 1U << 25, 89 PERF_OUTPUT_SYNTH = 1U << 25,
90 PERF_OUTPUT_PHYS_ADDR = 1U << 26,
90}; 91};
91 92
92struct output_option { 93struct output_option {
@@ -119,6 +120,7 @@ struct output_option {
119 {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, 120 {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
120 {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, 121 {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
121 {.str = "synth", .field = PERF_OUTPUT_SYNTH}, 122 {.str = "synth", .field = PERF_OUTPUT_SYNTH},
123 {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
122}; 124};
123 125
124enum { 126enum {
@@ -175,7 +177,8 @@ static struct {
175 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | 177 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
176 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | 178 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
177 PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | 179 PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR |
178 PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, 180 PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
181 PERF_OUTPUT_PHYS_ADDR,
179 182
180 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, 183 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
181 }, 184 },
@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
382 PERF_OUTPUT_IREGS)) 385 PERF_OUTPUT_IREGS))
383 return -EINVAL; 386 return -EINVAL;
384 387
388 if (PRINT_FIELD(PHYS_ADDR) &&
389 perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
390 PERF_OUTPUT_PHYS_ADDR))
391 return -EINVAL;
392
385 return 0; 393 return 0;
386} 394}
387 395
@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script,
1446 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) 1454 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
1447 print_sample_bpf_output(sample); 1455 print_sample_bpf_output(sample);
1448 print_insn(sample, attr, thread, machine); 1456 print_insn(sample, attr, thread, machine);
1457
1458 if (PRINT_FIELD(PHYS_ADDR))
1459 printf("%16" PRIx64, sample->phys_addr);
1449 printf("\n"); 1460 printf("\n");
1450} 1461}
1451 1462
@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv)
2729 "Valid types: hw,sw,trace,raw,synth. " 2740 "Valid types: hw,sw,trace,raw,synth. "
2730 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2741 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
2731 "addr,symoff,period,iregs,brstack,brstacksym,flags," 2742 "addr,symoff,period,iregs,brstack,brstacksym,flags,"
2732 "bpf-output,callindent,insn,insnlen,brstackinsn,synth", 2743 "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
2733 parse_output_fields), 2744 parse_output_fields),
2734 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2745 OPT_BOOLEAN('a', "all-cpus", &system_wide,
2735 "system-wide collection from all CPUs"), 2746 "system-wide collection from all CPUs"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 866da7aa54bf..85e992d9215b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter,
1257 if (counter->merged_stat) 1257 if (counter->merged_stat)
1258 return false; 1258 return false;
1259 cb(counter, data, true); 1259 cb(counter, data, true);
1260 if (!no_merge) 1260 if (!no_merge && counter->auto_merge_stats)
1261 collect_all_aliases(counter, cb, data); 1261 collect_all_aliases(counter, cb, data);
1262 return true; 1262 return true;
1263} 1263}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d59cdadf3a79..771ddab94bb0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1261static int trace__validate_ev_qualifier(struct trace *trace) 1261static int trace__validate_ev_qualifier(struct trace *trace)
1262{ 1262{
1263 int err = 0, i; 1263 int err = 0, i;
1264 size_t nr_allocated;
1264 struct str_node *pos; 1265 struct str_node *pos;
1265 1266
1266 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1267 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
1274 goto out; 1275 goto out;
1275 } 1276 }
1276 1277
1278 nr_allocated = trace->ev_qualifier_ids.nr;
1277 i = 0; 1279 i = 0;
1278 1280
1279 strlist__for_each_entry(pos, trace->ev_qualifier) { 1281 strlist__for_each_entry(pos, trace->ev_qualifier) {
1280 const char *sc = pos->s; 1282 const char *sc = pos->s;
1281 int id = syscalltbl__id(trace->sctbl, sc); 1283 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1282 1284
1283 if (id < 0) { 1285 if (id < 0) {
1286 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1287 if (id >= 0)
1288 goto matches;
1289
1284 if (err == 0) { 1290 if (err == 0) {
1285 fputs("Error:\tInvalid syscall ", trace->output); 1291 fputs("Error:\tInvalid syscall ", trace->output);
1286 err = -EINVAL; 1292 err = -EINVAL;
@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace)
1290 1296
1291 fputs(sc, trace->output); 1297 fputs(sc, trace->output);
1292 } 1298 }
1293 1299matches:
1294 trace->ev_qualifier_ids.entries[i++] = id; 1300 trace->ev_qualifier_ids.entries[i++] = id;
1301 if (match_next == -1)
1302 continue;
1303
1304 while (1) {
1305 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1306 if (id < 0)
1307 break;
1308 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1309 void *entries;
1310
1311 nr_allocated += 8;
1312 entries = realloc(trace->ev_qualifier_ids.entries,
1313 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1314 if (entries == NULL) {
1315 err = -ENOMEM;
1316 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1317 goto out_free;
1318 }
1319 trace->ev_qualifier_ids.entries = entries;
1320 }
1321 trace->ev_qualifier_ids.nr++;
1322 trace->ev_qualifier_ids.entries[i++] = id;
1323 }
1295 } 1324 }
1296 1325
1297 if (err < 0) { 1326 if (err < 0) {
1298 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1327 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1299 "\nHint:\tand: 'man syscalls'\n", trace->output); 1328 "\nHint:\tand: 'man syscalls'\n", trace->output);
1329out_free:
1300 zfree(&trace->ev_qualifier_ids.entries); 1330 zfree(&trace->ev_qualifier_ids.entries);
1301 trace->ev_qualifier_ids.nr = 0; 1331 trace->ev_qualifier_ids.nr = 0;
1302 } 1332 }
@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
2814 struct trace *trace = (struct trace *)opt->value; 2844 struct trace *trace = (struct trace *)opt->value;
2815 const char *s = str; 2845 const char *s = str;
2816 char *sep = NULL, *lists[2] = { NULL, NULL, }; 2846 char *sep = NULL, *lists[2] = { NULL, NULL, };
2817 int len = strlen(str) + 1, err = -1, list; 2847 int len = strlen(str) + 1, err = -1, list, idx;
2818 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); 2848 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2819 char group_name[PATH_MAX]; 2849 char group_name[PATH_MAX];
2820 2850
@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
2831 *sep = '\0'; 2861 *sep = '\0';
2832 2862
2833 list = 0; 2863 list = 0;
2834 if (syscalltbl__id(trace->sctbl, s) >= 0) { 2864 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2865 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
2835 list = 1; 2866 list = 1;
2836 } else { 2867 } else {
2837 path__join(group_name, sizeof(group_name), strace_groups_dir, s); 2868 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2c010dd6a79d..dc442ba21bf6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -43,6 +43,7 @@ struct record_opts {
43 bool no_samples; 43 bool no_samples;
44 bool raw_samples; 44 bool raw_samples;
45 bool sample_address; 45 bool sample_address;
46 bool sample_phys_addr;
46 bool sample_weight; 47 bool sample_weight;
47 bool sample_time; 48 bool sample_time;
48 bool sample_time_set; 49 bool sample_time_set;
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
index 7e62c46d7a20..c63a919eda98 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -80,11 +80,6 @@
80 "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." 80 "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
81 }, 81 },
82 {, 82 {,
83 "EventCode": "0x400F0",
84 "EventName": "PM_LD_MISS_L1",
85 "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
86 },
87 {,
88 "EventCode": "0x2E01A", 83 "EventCode": "0x2E01A",
89 "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", 84 "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
90 "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete" 85 "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete"
@@ -374,4 +369,4 @@
374 "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", 369 "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
375 "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" 370 "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
376 } 371 }
377] \ No newline at end of file 372]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
index 00f3d2a21f31..54cc3be00fc2 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -605,11 +605,6 @@
605 "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" 605 "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
606 }, 606 },
607 {, 607 {,
608 "EventCode": "0x3689E",
609 "EventName": "PM_L2_RTY_LD",
610 "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
611 },
612 {,
613 "EventCode": "0xE08C", 608 "EventCode": "0xE08C",
614 "EventName": "PM_LSU0_ERAT_HIT", 609 "EventName": "PM_LSU0_ERAT_HIT",
615 "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" 610 "BriefDescription": "Primary ERAT hit. There is no secondary ERAT"
@@ -715,11 +710,6 @@
715 "BriefDescription": "Lifetime, sample of RD machine 0 valid" 710 "BriefDescription": "Lifetime, sample of RD machine 0 valid"
716 }, 711 },
717 {, 712 {,
718 "EventCode": "0x468B4",
719 "EventName": "PM_L3_RD0_BUSY",
720 "BriefDescription": "Lifetime, sample of RD machine 0 valid"
721 },
722 {,
723 "EventCode": "0x46080", 713 "EventCode": "0x46080",
724 "EventName": "PM_L2_DISP_ALL_L2MISS", 714 "EventName": "PM_L2_DISP_ALL_L2MISS",
725 "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" 715 "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)"
@@ -850,21 +840,11 @@
850 "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" 840 "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
851 }, 841 },
852 {, 842 {,
853 "EventCode": "0x2608C",
854 "EventName": "PM_RC0_BUSY",
855 "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
856 },
857 {,
858 "EventCode": "0x36082", 843 "EventCode": "0x36082",
859 "EventName": "PM_L2_LD_DISP", 844 "EventName": "PM_L2_LD_DISP",
860 "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." 845 "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
861 }, 846 },
862 {, 847 {,
863 "EventCode": "0x1609E",
864 "EventName": "PM_L2_LD_DISP",
865 "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)"
866 },
867 {,
868 "EventCode": "0xF8B0", 848 "EventCode": "0xF8B0",
869 "EventName": "PM_L3_SW_PREF", 849 "EventName": "PM_L3_SW_PREF",
870 "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest" 850 "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest"
@@ -1040,11 +1020,6 @@
1040 "BriefDescription": "L3 castouts in Mepf state for this thread" 1020 "BriefDescription": "L3 castouts in Mepf state for this thread"
1041 }, 1021 },
1042 {, 1022 {,
1043 "EventCode": "0x168A0",
1044 "EventName": "PM_L3_CO_MEPF",
1045 "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
1046 },
1047 {,
1048 "EventCode": "0x460A2", 1023 "EventCode": "0x460A2",
1049 "EventName": "PM_L3_LAT_CI_HIT", 1024 "EventName": "PM_L3_LAT_CI_HIT",
1050 "BriefDescription": "L3 Lateral Castins Hit" 1025 "BriefDescription": "L3 Lateral Castins Hit"
@@ -1150,11 +1125,6 @@
1150 "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" 1125 "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
1151 }, 1126 },
1152 {, 1127 {,
1153 "EventCode": "0x4689E",
1154 "EventName": "PM_L2_RTY_ST",
1155 "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
1156 },
1157 {,
1158 "EventCode": "0x24040", 1128 "EventCode": "0x24040",
1159 "EventName": "PM_INST_FROM_L2_MEPF", 1129 "EventName": "PM_INST_FROM_L2_MEPF",
1160 "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)" 1130 "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)"
@@ -1255,11 +1225,6 @@
1255 "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" 1225 "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
1256 }, 1226 },
1257 {, 1227 {,
1258 "EventCode": "0x4608C",
1259 "EventName": "PM_CO0_BUSY",
1260 "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
1261 },
1262 {,
1263 "EventCode": "0x2C122", 1228 "EventCode": "0x2C122",
1264 "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", 1229 "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
1265 "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load" 1230 "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load"
@@ -1395,11 +1360,6 @@
1395 "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request" 1360 "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
1396 }, 1361 },
1397 {, 1362 {,
1398 "EventCode": "0x40006",
1399 "EventName": "PM_ISLB_MISS",
1400 "BriefDescription": "Number of ISLB misses for this thread"
1401 },
1402 {,
1403 "EventCode": "0xD8A8", 1363 "EventCode": "0xD8A8",
1404 "EventName": "PM_ISLB_MISS", 1364 "EventName": "PM_ISLB_MISS",
1405 "BriefDescription": "Instruction SLB miss - Total of all segment sizes" 1365 "BriefDescription": "Instruction SLB miss - Total of all segment sizes"
@@ -1515,11 +1475,6 @@
1515 "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." 1475 "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
1516 }, 1476 },
1517 {, 1477 {,
1518 "EventCode": "0x3609E",
1519 "EventName": "PM_L2_INST",
1520 "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
1521 },
1522 {,
1523 "EventCode": "0x3504C", 1478 "EventCode": "0x3504C",
1524 "EventName": "PM_IPTEG_FROM_DL4", 1479 "EventName": "PM_IPTEG_FROM_DL4",
1525 "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request" 1480 "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request"
@@ -1690,11 +1645,6 @@
1690 "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" 1645 "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
1691 }, 1646 },
1692 {, 1647 {,
1693 "EventCode": "0x2609E",
1694 "EventName": "PM_L2_LD_HIT",
1695 "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread"
1696 },
1697 {,
1698 "EventCode": "0x168AC", 1648 "EventCode": "0x168AC",
1699 "EventName": "PM_L3_CI_USAGE", 1649 "EventName": "PM_L3_CI_USAGE",
1700 "BriefDescription": "Rotating sample of 16 CI or CO actives" 1650 "BriefDescription": "Rotating sample of 16 CI or CO actives"
@@ -1795,21 +1745,11 @@
1795 "BriefDescription": "Rotating sample of 8 WI valid" 1745 "BriefDescription": "Rotating sample of 8 WI valid"
1796 }, 1746 },
1797 {, 1747 {,
1798 "EventCode": "0x260B6",
1799 "EventName": "PM_L3_WI0_BUSY",
1800 "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
1801 },
1802 {,
1803 "EventCode": "0x368AC", 1748 "EventCode": "0x368AC",
1804 "EventName": "PM_L3_CO0_BUSY", 1749 "EventName": "PM_L3_CO0_BUSY",
1805 "BriefDescription": "Lifetime, sample of CO machine 0 valid" 1750 "BriefDescription": "Lifetime, sample of CO machine 0 valid"
1806 }, 1751 },
1807 {, 1752 {,
1808 "EventCode": "0x468AC",
1809 "EventName": "PM_L3_CO0_BUSY",
1810 "BriefDescription": "Lifetime, sample of CO machine 0 valid"
1811 },
1812 {,
1813 "EventCode": "0x2E040", 1753 "EventCode": "0x2E040",
1814 "EventName": "PM_DPTEG_FROM_L2_MEPF", 1754 "EventName": "PM_DPTEG_FROM_L2_MEPF",
1815 "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" 1755 "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
@@ -1840,11 +1780,6 @@
1840 "BriefDescription": "L3 PF received retry port 0, every retry counted" 1780 "BriefDescription": "L3 PF received retry port 0, every retry counted"
1841 }, 1781 },
1842 {, 1782 {,
1843 "EventCode": "0x260AE",
1844 "EventName": "PM_L3_P0_PF_RTY",
1845 "BriefDescription": "L3 PF received retry port 0, every retry counted"
1846 },
1847 {,
1848 "EventCode": "0x268B2", 1783 "EventCode": "0x268B2",
1849 "EventName": "PM_L3_LOC_GUESS_WRONG", 1784 "EventName": "PM_L3_LOC_GUESS_WRONG",
1850 "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" 1785 "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low"
@@ -1895,11 +1830,6 @@
1895 "BriefDescription": "Lifetime, sample of snooper machine 0 valid" 1830 "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
1896 }, 1831 },
1897 {, 1832 {,
1898 "EventCode": "0x460AC",
1899 "EventName": "PM_L3_SN0_BUSY",
1900 "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
1901 },
1902 {,
1903 "EventCode": "0x3005C", 1833 "EventCode": "0x3005C",
1904 "EventName": "PM_BFU_BUSY", 1834 "EventName": "PM_BFU_BUSY",
1905 "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity" 1835 "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity"
@@ -1935,11 +1865,6 @@
1935 "BriefDescription": "Lifetime, sample of PF machine 0 valid" 1865 "BriefDescription": "Lifetime, sample of PF machine 0 valid"
1936 }, 1866 },
1937 {, 1867 {,
1938 "EventCode": "0x460B4",
1939 "EventName": "PM_L3_PF0_BUSY",
1940 "BriefDescription": "Lifetime, sample of PF machine 0 valid"
1941 },
1942 {,
1943 "EventCode": "0xC0B0", 1868 "EventCode": "0xC0B0",
1944 "EventName": "PM_LSU_FLUSH_UE", 1869 "EventName": "PM_LSU_FLUSH_UE",
1945 "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" 1870 "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time"
@@ -2085,11 +2010,6 @@
2085 "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" 2010 "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
2086 }, 2011 },
2087 {, 2012 {,
2088 "EventCode": "0x468AE",
2089 "EventName": "PM_L3_P1_CO_RTY",
2090 "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
2091 },
2092 {,
2093 "EventCode": "0xC0AC", 2013 "EventCode": "0xC0AC",
2094 "EventName": "PM_LSU_FLUSH_EMSH", 2014 "EventName": "PM_LSU_FLUSH_EMSH",
2095 "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" 2015 "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
@@ -2195,11 +2115,6 @@
2195 "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" 2115 "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
2196 }, 2116 },
2197 {, 2117 {,
2198 "EventCode": "0x46886",
2199 "EventName": "PM_L2_SN_M_WR_DONE",
2200 "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
2201 },
2202 {,
2203 "EventCode": "0x489C", 2118 "EventCode": "0x489C",
2204 "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", 2119 "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
2205 "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" 2120 "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time"
@@ -2290,21 +2205,11 @@
2290 "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" 2205 "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
2291 }, 2206 },
2292 {, 2207 {,
2293 "EventCode": "0x26090",
2294 "EventName": "PM_SN0_BUSY",
2295 "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
2296 },
2297 {,
2298 "EventCode": "0x360AE", 2208 "EventCode": "0x360AE",
2299 "EventName": "PM_L3_P0_CO_RTY", 2209 "EventName": "PM_L3_P0_CO_RTY",
2300 "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" 2210 "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
2301 }, 2211 },
2302 {, 2212 {,
2303 "EventCode": "0x460AE",
2304 "EventName": "PM_L3_P0_CO_RTY",
2305 "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
2306 },
2307 {,
2308 "EventCode": "0x168A8", 2213 "EventCode": "0x168A8",
2309 "EventName": "PM_L3_WI_USAGE", 2214 "EventName": "PM_L3_WI_USAGE",
2310 "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid" 2215 "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid"
@@ -2340,26 +2245,11 @@
2340 "BriefDescription": "L3 PF received retry port 1, every retry counted" 2245 "BriefDescription": "L3 PF received retry port 1, every retry counted"
2341 }, 2246 },
2342 {, 2247 {,
2343 "EventCode": "0x268AE",
2344 "EventName": "PM_L3_P1_PF_RTY",
2345 "BriefDescription": "L3 PF received retry port 3, every retry counted"
2346 },
2347 {,
2348 "EventCode": "0x46082", 2248 "EventCode": "0x46082",
2349 "EventName": "PM_L2_ST_DISP", 2249 "EventName": "PM_L2_ST_DISP",
2350 "BriefDescription": "All successful D-side store dispatches for this thread " 2250 "BriefDescription": "All successful D-side store dispatches for this thread "
2351 }, 2251 },
2352 {, 2252 {,
2353 "EventCode": "0x1689E",
2354 "EventName": "PM_L2_ST_DISP",
2355 "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
2356 },
2357 {,
2358 "EventCode": "0x36880",
2359 "EventName": "PM_L2_INST_MISS",
2360 "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
2361 },
2362 {,
2363 "EventCode": "0x4609E", 2253 "EventCode": "0x4609E",
2364 "EventName": "PM_L2_INST_MISS", 2254 "EventName": "PM_L2_INST_MISS",
2365 "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" 2255 "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
@@ -2430,11 +2320,6 @@
2430 "BriefDescription": "# PPC Dispatched" 2320 "BriefDescription": "# PPC Dispatched"
2431 }, 2321 },
2432 {, 2322 {,
2433 "EventCode": "0x300F2",
2434 "EventName": "PM_INST_DISP",
2435 "BriefDescription": "# PPC Dispatched"
2436 },
2437 {,
2438 "EventCode": "0x4E05E", 2323 "EventCode": "0x4E05E",
2439 "EventName": "PM_TM_OUTER_TBEGIN_DISP", 2324 "EventName": "PM_TM_OUTER_TBEGIN_DISP",
2440 "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions" 2325 "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions"
@@ -2460,11 +2345,6 @@
2460 "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" 2345 "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
2461 }, 2346 },
2462 {, 2347 {,
2463 "EventCode": "0x2689E",
2464 "EventName": "PM_L2_ST_HIT",
2465 "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread"
2466 },
2467 {,
2468 "EventCode": "0x360A8", 2348 "EventCode": "0x360A8",
2469 "EventName": "PM_L3_CO", 2349 "EventName": "PM_L3_CO",
2470 "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))" 2350 "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))"
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
index 47a82568a8df..bc2db636dabf 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -420,11 +420,6 @@
420 "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" 420 "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
421 }, 421 },
422 {, 422 {,
423 "EventCode": "0x10016",
424 "EventName": "PM_DSLB_MISS",
425 "BriefDescription": "Data SLB Miss - Total of all segment sizes"
426 },
427 {,
428 "EventCode": "0xD0A8", 423 "EventCode": "0xD0A8",
429 "EventName": "PM_DSLB_MISS", 424 "EventName": "PM_DSLB_MISS",
430 "BriefDescription": "Data SLB Miss - Total of all segment sizes" 425 "BriefDescription": "Data SLB Miss - Total of all segment sizes"
@@ -554,4 +549,4 @@
554 "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", 549 "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
555 "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" 550 "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
556 } 551 }
557] \ No newline at end of file 552]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
index a2c95a99e168..3ef8a10aac86 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -5,11 +5,6 @@
5 "BriefDescription": "Branches that are not strongly biased" 5 "BriefDescription": "Branches that are not strongly biased"
6 }, 6 },
7 {, 7 {,
8 "EventCode": "0x40036",
9 "EventName": "PM_BR_2PATH",
10 "BriefDescription": "Branches that are not strongly biased"
11 },
12 {,
13 "EventCode": "0x40056", 8 "EventCode": "0x40056",
14 "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", 9 "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
15 "BriefDescription": "Local memory above threshold for LSU medium" 10 "BriefDescription": "Local memory above threshold for LSU medium"
@@ -124,4 +119,4 @@
124 "EventName": "PM_1FLOP_CMPL", 119 "EventName": "PM_1FLOP_CMPL",
125 "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" 120 "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
126 } 121 }
127] \ No newline at end of file 122]
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 761c5a448c56..466a462b26d1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
237 237
238 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); 238 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
239 if (!al.map || !al.map->dso) { 239 if (!al.map || !al.map->dso) {
240 if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
241 pr_debug("Hypervisor address can not be resolved - skipping\n");
242 return 0;
243 }
244
240 pr_debug("thread__find_addr_map failed\n"); 245 pr_debug("thread__find_addr_map failed\n");
241 return -1; 246 return -1;
242 } 247 }
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 6d028f42b3cf..c3858487159d 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1,
141 } 141 }
142 } 142 }
143 143
144 if (type & PERF_SAMPLE_PHYS_ADDR)
145 COMP(phys_addr);
146
144 return true; 147 return true;
145} 148}
146 149
@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
206 .mask = sample_regs, 209 .mask = sample_regs,
207 .regs = regs, 210 .regs = regs,
208 }, 211 },
212 .phys_addr = 113,
209 }; 213 };
210 struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; 214 struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
211 struct perf_sample sample_out; 215 struct perf_sample sample_out;
@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
305 * were added. Please actually update the test rather than just change 309 * were added. Please actually update the test rather than just change
306 * the condition below. 310 * the condition below.
307 */ 311 */
308 if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) { 312 if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
309 pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); 313 pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
310 return -1; 314 return -1;
311 } 315 }
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ba0aee576a2b..786fecaf578e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
829 "q/ESC/CTRL+C Exit\n\n" 829 "q/ESC/CTRL+C Exit\n\n"
830 "ENTER Go to target\n" 830 "ENTER Go to target\n"
831 "ESC Exit\n" 831 "ESC Exit\n"
832 "H Cycle thru hottest instructions\n" 832 "H Go to hottest instruction\n"
833 "TAB/shift+TAB Cycle thru hottest instructions\n"
833 "j Toggle showing jump to target arrows\n" 834 "j Toggle showing jump to target arrows\n"
834 "J Toggle showing number of jump sources on targets\n" 835 "J Toggle showing number of jump sources on targets\n"
835 "n Search next string\n" 836 "n Search next string\n"
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4bc2462bc2c..13dfb0a0bdeb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
931 browser->show_dso); 931 browser->show_dso);
932 932
933 if (symbol_conf.show_branchflag_count) { 933 if (symbol_conf.show_branchflag_count) {
934 if (need_percent) 934 callchain_list_counts__printf_value(chain, NULL,
935 callchain_list_counts__printf_value(node, chain, NULL, 935 buf, sizeof(buf));
936 buf, sizeof(buf));
937 else
938 callchain_list_counts__printf_value(NULL, chain, NULL,
939 buf, sizeof(buf));
940 936
941 if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) 937 if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
942 str = "Not enough memory!"; 938 str = "Not enough memory!";
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5c95b8301c67..8bdb7a500181 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
124 str = callchain_list__sym_name(chain, bf, sizeof(bf), false); 124 str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
125 125
126 if (symbol_conf.show_branchflag_count) { 126 if (symbol_conf.show_branchflag_count) {
127 if (!period) 127 callchain_list_counts__printf_value(chain, NULL,
128 callchain_list_counts__printf_value(node, chain, NULL, 128 buf, sizeof(buf));
129 buf, sizeof(buf));
130 else
131 callchain_list_counts__printf_value(NULL, chain, NULL,
132 buf, sizeof(buf));
133 129
134 if (asprintf(&alloc_str, "%s%s", str, buf) < 0) 130 if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
135 str = "Not enough memory!"; 131 str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
313 309
314 if (symbol_conf.show_branchflag_count) 310 if (symbol_conf.show_branchflag_count)
315 ret += callchain_list_counts__printf_value( 311 ret += callchain_list_counts__printf_value(
316 NULL, chain, fp, NULL, 0); 312 chain, fp, NULL, 0);
317 ret += fprintf(fp, "\n"); 313 ret += fprintf(fp, "\n");
318 314
319 if (++entries_printed == callchain_param.print_limit) 315 if (++entries_printed == callchain_param.print_limit)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f320b0777e0d..510b513e0f01 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
588 call->cycles_count = 588 call->cycles_count =
589 cursor_node->branch_flags.cycles; 589 cursor_node->branch_flags.cycles;
590 call->iter_count = cursor_node->nr_loop_iter; 590 call->iter_count = cursor_node->nr_loop_iter;
591 call->samples_count = cursor_node->samples; 591 call->iter_cycles = cursor_node->iter_cycles;
592 } 592 }
593 } 593 }
594 594
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
722 cnode->cycles_count += 722 cnode->cycles_count +=
723 node->branch_flags.cycles; 723 node->branch_flags.cycles;
724 cnode->iter_count += node->nr_loop_iter; 724 cnode->iter_count += node->nr_loop_iter;
725 cnode->samples_count += node->samples; 725 cnode->iter_cycles += node->iter_cycles;
726 } 726 }
727 } 727 }
728 728
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
998int callchain_cursor_append(struct callchain_cursor *cursor, 998int callchain_cursor_append(struct callchain_cursor *cursor,
999 u64 ip, struct map *map, struct symbol *sym, 999 u64 ip, struct map *map, struct symbol *sym,
1000 bool branch, struct branch_flags *flags, 1000 bool branch, struct branch_flags *flags,
1001 int nr_loop_iter, int samples, u64 branch_from) 1001 int nr_loop_iter, u64 iter_cycles, u64 branch_from)
1002{ 1002{
1003 struct callchain_cursor_node *node = *cursor->last; 1003 struct callchain_cursor_node *node = *cursor->last;
1004 1004
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
1016 node->sym = sym; 1016 node->sym = sym;
1017 node->branch = branch; 1017 node->branch = branch;
1018 node->nr_loop_iter = nr_loop_iter; 1018 node->nr_loop_iter = nr_loop_iter;
1019 node->samples = samples; 1019 node->iter_cycles = iter_cycles;
1020 1020
1021 if (flags) 1021 if (flags)
1022 memcpy(&node->branch_flags, flags, 1022 memcpy(&node->branch_flags, flags,
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
1306static int branch_from_str(char *bf, int bfsize, 1306static int branch_from_str(char *bf, int bfsize,
1307 u64 branch_count, 1307 u64 branch_count,
1308 u64 cycles_count, u64 iter_count, 1308 u64 cycles_count, u64 iter_count,
1309 u64 samples_count) 1309 u64 iter_cycles)
1310{ 1310{
1311 int printed = 0, i = 0; 1311 int printed = 0, i = 0;
1312 u64 cycles; 1312 u64 cycles;
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
1318 bf + printed, bfsize - printed); 1318 bf + printed, bfsize - printed);
1319 } 1319 }
1320 1320
1321 if (iter_count && samples_count) { 1321 if (iter_count) {
1322 printed += count_pri64_printf(i++, "iterations", 1322 printed += count_pri64_printf(i++, "iter",
1323 iter_count / samples_count, 1323 iter_count,
1324 bf + printed, bfsize - printed);
1325
1326 printed += count_pri64_printf(i++, "avg_cycles",
1327 iter_cycles / iter_count,
1324 bf + printed, bfsize - printed); 1328 bf + printed, bfsize - printed);
1325 } 1329 }
1326 1330
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
1333static int counts_str_build(char *bf, int bfsize, 1337static int counts_str_build(char *bf, int bfsize,
1334 u64 branch_count, u64 predicted_count, 1338 u64 branch_count, u64 predicted_count,
1335 u64 abort_count, u64 cycles_count, 1339 u64 abort_count, u64 cycles_count,
1336 u64 iter_count, u64 samples_count, 1340 u64 iter_count, u64 iter_cycles,
1337 struct branch_type_stat *brtype_stat) 1341 struct branch_type_stat *brtype_stat)
1338{ 1342{
1339 int printed; 1343 int printed;
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
1346 predicted_count, abort_count, brtype_stat); 1350 predicted_count, abort_count, brtype_stat);
1347 } else { 1351 } else {
1348 printed = branch_from_str(bf, bfsize, branch_count, 1352 printed = branch_from_str(bf, bfsize, branch_count,
1349 cycles_count, iter_count, samples_count); 1353 cycles_count, iter_count, iter_cycles);
1350 } 1354 }
1351 1355
1352 if (!printed) 1356 if (!printed)
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
1358static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, 1362static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
1359 u64 branch_count, u64 predicted_count, 1363 u64 branch_count, u64 predicted_count,
1360 u64 abort_count, u64 cycles_count, 1364 u64 abort_count, u64 cycles_count,
1361 u64 iter_count, u64 samples_count, 1365 u64 iter_count, u64 iter_cycles,
1362 struct branch_type_stat *brtype_stat) 1366 struct branch_type_stat *brtype_stat)
1363{ 1367{
1364 char str[256]; 1368 char str[256];
1365 1369
1366 counts_str_build(str, sizeof(str), branch_count, 1370 counts_str_build(str, sizeof(str), branch_count,
1367 predicted_count, abort_count, cycles_count, 1371 predicted_count, abort_count, cycles_count,
1368 iter_count, samples_count, brtype_stat); 1372 iter_count, iter_cycles, brtype_stat);
1369 1373
1370 if (fp) 1374 if (fp)
1371 return fprintf(fp, "%s", str); 1375 return fprintf(fp, "%s", str);
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
1373 return scnprintf(bf, bfsize, "%s", str); 1377 return scnprintf(bf, bfsize, "%s", str);
1374} 1378}
1375 1379
1376int callchain_list_counts__printf_value(struct callchain_node *node, 1380int callchain_list_counts__printf_value(struct callchain_list *clist,
1377 struct callchain_list *clist,
1378 FILE *fp, char *bf, int bfsize) 1381 FILE *fp, char *bf, int bfsize)
1379{ 1382{
1380 u64 branch_count, predicted_count; 1383 u64 branch_count, predicted_count;
1381 u64 abort_count, cycles_count; 1384 u64 abort_count, cycles_count;
1382 u64 iter_count = 0, samples_count = 0; 1385 u64 iter_count, iter_cycles;
1383 1386
1384 branch_count = clist->branch_count; 1387 branch_count = clist->branch_count;
1385 predicted_count = clist->predicted_count; 1388 predicted_count = clist->predicted_count;
1386 abort_count = clist->abort_count; 1389 abort_count = clist->abort_count;
1387 cycles_count = clist->cycles_count; 1390 cycles_count = clist->cycles_count;
1388 1391 iter_count = clist->iter_count;
1389 if (node) { 1392 iter_cycles = clist->iter_cycles;
1390 struct callchain_list *call;
1391
1392 list_for_each_entry(call, &node->val, list) {
1393 iter_count += call->iter_count;
1394 samples_count += call->samples_count;
1395 }
1396 }
1397 1393
1398 return callchain_counts_printf(fp, bf, bfsize, branch_count, 1394 return callchain_counts_printf(fp, bf, bfsize, branch_count,
1399 predicted_count, abort_count, 1395 predicted_count, abort_count,
1400 cycles_count, iter_count, samples_count, 1396 cycles_count, iter_count, iter_cycles,
1401 &clist->brtype_stat); 1397 &clist->brtype_stat);
1402} 1398}
1403 1399
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
1523 1519
1524 rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, 1520 rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
1525 node->branch, &node->branch_flags, 1521 node->branch, &node->branch_flags,
1526 node->nr_loop_iter, node->samples, 1522 node->nr_loop_iter,
1523 node->iter_cycles,
1527 node->branch_from); 1524 node->branch_from);
1528 if (rc) 1525 if (rc)
1529 break; 1526 break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 97738201464a..1ed6fc61d0a5 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
119 u64 abort_count; 119 u64 abort_count;
120 u64 cycles_count; 120 u64 cycles_count;
121 u64 iter_count; 121 u64 iter_count;
122 u64 samples_count; 122 u64 iter_cycles;
123 struct branch_type_stat brtype_stat; 123 struct branch_type_stat brtype_stat;
124 char *srcline; 124 char *srcline;
125 struct list_head list; 125 struct list_head list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
139 struct branch_flags branch_flags; 139 struct branch_flags branch_flags;
140 u64 branch_from; 140 u64 branch_from;
141 int nr_loop_iter; 141 int nr_loop_iter;
142 int samples; 142 u64 iter_cycles;
143 struct callchain_cursor_node *next; 143 struct callchain_cursor_node *next;
144}; 144};
145 145
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
201int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, 201int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
202 struct map *map, struct symbol *sym, 202 struct map *map, struct symbol *sym,
203 bool branch, struct branch_flags *flags, 203 bool branch, struct branch_flags *flags,
204 int nr_loop_iter, int samples, u64 branch_from); 204 int nr_loop_iter, u64 iter_cycles, u64 branch_from);
205 205
206/* Close a cursor writing session. Initialize for the reader */ 206/* Close a cursor writing session. Initialize for the reader */
207static inline void callchain_cursor_commit(struct callchain_cursor *cursor) 207static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
282int callchain_node__fprintf_value(struct callchain_node *node, 282int callchain_node__fprintf_value(struct callchain_node *node,
283 FILE *fp, u64 total); 283 FILE *fp, u64 total);
284 284
285int callchain_list_counts__printf_value(struct callchain_node *node, 285int callchain_list_counts__printf_value(struct callchain_list *clist,
286 struct callchain_list *clist,
287 FILE *fp, char *bf, int bfsize); 286 FILE *fp, char *bf, int bfsize);
288 287
289void free_callchain(struct callchain_root *root); 288void free_callchain(struct callchain_root *root);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 423ac82605f3..ee7bcc898d35 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -200,6 +200,7 @@ struct perf_sample {
200 u32 cpu; 200 u32 cpu;
201 u32 raw_size; 201 u32 raw_size;
202 u64 data_src; 202 u64 data_src;
203 u64 phys_addr;
203 u32 flags; 204 u32 flags;
204 u16 insn_len; 205 u16 insn_len;
205 u8 cpumode; 206 u8 cpumode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d9bd632ed7db..4bb89373eb52 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
955 if (opts->sample_address) 955 if (opts->sample_address)
956 perf_evsel__set_sample_bit(evsel, DATA_SRC); 956 perf_evsel__set_sample_bit(evsel, DATA_SRC);
957 957
958 if (opts->sample_phys_addr)
959 perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
960
958 if (opts->no_buffering) { 961 if (opts->no_buffering) {
959 attr->watermark = 0; 962 attr->watermark = 0;
960 attr->wakeup_events = 1; 963 attr->wakeup_events = 1;
@@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
1464 bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), 1467 bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
1465 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), 1468 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
1466 bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), 1469 bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
1467 bit_name(WEIGHT), 1470 bit_name(WEIGHT), bit_name(PHYS_ADDR),
1468 { .name = NULL, } 1471 { .name = NULL, }
1469 }; 1472 };
1470#undef bit_name 1473#undef bit_name
@@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
2206 } 2209 }
2207 } 2210 }
2208 2211
2212 data->phys_addr = 0;
2213 if (type & PERF_SAMPLE_PHYS_ADDR) {
2214 data->phys_addr = *array;
2215 array++;
2216 }
2217
2209 return 0; 2218 return 0;
2210} 2219}
2211 2220
@@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
2311 } 2320 }
2312 } 2321 }
2313 2322
2323 if (type & PERF_SAMPLE_PHYS_ADDR)
2324 result += sizeof(u64);
2325
2314 return result; 2326 return result;
2315} 2327}
2316 2328
@@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
2500 } 2512 }
2501 } 2513 }
2502 2514
2515 if (type & PERF_SAMPLE_PHYS_ADDR) {
2516 *array = sample->phys_addr;
2517 array++;
2518 }
2519
2503 return 0; 2520 return 0;
2504} 2521}
2505 2522
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 351d3b2d8887..dd2c4b5112a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -131,6 +131,7 @@ struct perf_evsel {
131 bool cmdline_group_boundary; 131 bool cmdline_group_boundary;
132 struct list_head config_terms; 132 struct list_head config_terms;
133 int bpf_fd; 133 int bpf_fd;
134 bool auto_merge_stats;
134 bool merged_stat; 135 bool merged_stat;
135 const char * metric_expr; 136 const char * metric_expr;
136 const char * metric_name; 137 const char * metric_name;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9453b2e27015..e60d8d8ea4c2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
167 symlen = unresolved_col_width + 4 + 2; 167 symlen = unresolved_col_width + 4 + 2;
168 hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); 168 hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
169 } 169 }
170
171 hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
172 unresolved_col_width + 4 + 2);
173
170 } else { 174 } else {
171 symlen = unresolved_col_width + 4 + 2; 175 symlen = unresolved_col_width + 4 + 2;
172 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); 176 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee3670a388df..e60dda26a920 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
47 HISTC_GLOBAL_WEIGHT, 47 HISTC_GLOBAL_WEIGHT,
48 HISTC_MEM_DADDR_SYMBOL, 48 HISTC_MEM_DADDR_SYMBOL,
49 HISTC_MEM_DADDR_DSO, 49 HISTC_MEM_DADDR_DSO,
50 HISTC_MEM_PHYS_DADDR,
50 HISTC_MEM_LOCKED, 51 HISTC_MEM_LOCKED,
51 HISTC_MEM_TLB, 52 HISTC_MEM_TLB,
52 HISTC_MEM_LVL, 53 HISTC_MEM_LVL,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5c8eacaca4f4..df709363ef69 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread,
1635 ams->al_addr = al.addr; 1635 ams->al_addr = al.addr;
1636 ams->sym = al.sym; 1636 ams->sym = al.sym;
1637 ams->map = al.map; 1637 ams->map = al.map;
1638 ams->phys_addr = 0;
1638} 1639}
1639 1640
1640static void ip__resolve_data(struct thread *thread, 1641static void ip__resolve_data(struct thread *thread,
1641 u8 m, struct addr_map_symbol *ams, u64 addr) 1642 u8 m, struct addr_map_symbol *ams,
1643 u64 addr, u64 phys_addr)
1642{ 1644{
1643 struct addr_location al; 1645 struct addr_location al;
1644 1646
@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread,
1658 ams->al_addr = al.addr; 1660 ams->al_addr = al.addr;
1659 ams->sym = al.sym; 1661 ams->sym = al.sym;
1660 ams->map = al.map; 1662 ams->map = al.map;
1663 ams->phys_addr = phys_addr;
1661} 1664}
1662 1665
1663struct mem_info *sample__resolve_mem(struct perf_sample *sample, 1666struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
1669 return NULL; 1672 return NULL;
1670 1673
1671 ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); 1674 ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
1672 ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr); 1675 ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
1676 sample->addr, sample->phys_addr);
1673 mi->data_src.val = sample->data_src; 1677 mi->data_src.val = sample->data_src;
1674 1678
1675 return mi; 1679 return mi;
1676} 1680}
1677 1681
1682struct iterations {
1683 int nr_loop_iter;
1684 u64 cycles;
1685};
1686
1678static int add_callchain_ip(struct thread *thread, 1687static int add_callchain_ip(struct thread *thread,
1679 struct callchain_cursor *cursor, 1688 struct callchain_cursor *cursor,
1680 struct symbol **parent, 1689 struct symbol **parent,
@@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread,
1683 u64 ip, 1692 u64 ip,
1684 bool branch, 1693 bool branch,
1685 struct branch_flags *flags, 1694 struct branch_flags *flags,
1686 int nr_loop_iter, 1695 struct iterations *iter,
1687 int samples,
1688 u64 branch_from) 1696 u64 branch_from)
1689{ 1697{
1690 struct addr_location al; 1698 struct addr_location al;
1699 int nr_loop_iter = 0;
1700 u64 iter_cycles = 0;
1691 1701
1692 al.filtered = 0; 1702 al.filtered = 0;
1693 al.sym = NULL; 1703 al.sym = NULL;
@@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread,
1737 1747
1738 if (symbol_conf.hide_unresolved && al.sym == NULL) 1748 if (symbol_conf.hide_unresolved && al.sym == NULL)
1739 return 0; 1749 return 0;
1750
1751 if (iter) {
1752 nr_loop_iter = iter->nr_loop_iter;
1753 iter_cycles = iter->cycles;
1754 }
1755
1740 return callchain_cursor_append(cursor, al.addr, al.map, al.sym, 1756 return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
1741 branch, flags, nr_loop_iter, samples, 1757 branch, flags, nr_loop_iter,
1742 branch_from); 1758 iter_cycles, branch_from);
1743} 1759}
1744 1760
1745struct branch_info *sample__resolve_bstack(struct perf_sample *sample, 1761struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
1760 return bi; 1776 return bi;
1761} 1777}
1762 1778
1779static void save_iterations(struct iterations *iter,
1780 struct branch_entry *be, int nr)
1781{
1782 int i;
1783
1784 iter->nr_loop_iter = nr;
1785 iter->cycles = 0;
1786
1787 for (i = 0; i < nr; i++)
1788 iter->cycles += be[i].flags.cycles;
1789}
1790
1763#define CHASHSZ 127 1791#define CHASHSZ 127
1764#define CHASHBITS 7 1792#define CHASHBITS 7
1765#define NO_ENTRY 0xff 1793#define NO_ENTRY 0xff
@@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
1767#define PERF_MAX_BRANCH_DEPTH 127 1795#define PERF_MAX_BRANCH_DEPTH 127
1768 1796
1769/* Remove loops. */ 1797/* Remove loops. */
1770static int remove_loops(struct branch_entry *l, int nr) 1798static int remove_loops(struct branch_entry *l, int nr,
1799 struct iterations *iter)
1771{ 1800{
1772 int i, j, off; 1801 int i, j, off;
1773 unsigned char chash[CHASHSZ]; 1802 unsigned char chash[CHASHSZ];
@@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr)
1792 break; 1821 break;
1793 } 1822 }
1794 if (is_loop) { 1823 if (is_loop) {
1795 memmove(l + i, l + i + off, 1824 j = nr - (i + off);
1796 (nr - (i + off)) * sizeof(*l)); 1825 if (j > 0) {
1826 save_iterations(iter + i + off,
1827 l + i, off);
1828
1829 memmove(iter + i, iter + i + off,
1830 j * sizeof(*iter));
1831
1832 memmove(l + i, l + i + off,
1833 j * sizeof(*l));
1834 }
1835
1797 nr -= off; 1836 nr -= off;
1798 } 1837 }
1799 } 1838 }
@@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
1883 1922
1884 err = add_callchain_ip(thread, cursor, parent, 1923 err = add_callchain_ip(thread, cursor, parent,
1885 root_al, &cpumode, ip, 1924 root_al, &cpumode, ip,
1886 branch, flags, 0, 0, 1925 branch, flags, NULL,
1887 branch_from); 1926 branch_from);
1888 if (err) 1927 if (err)
1889 return (err < 0) ? err : 0; 1928 return (err < 0) ? err : 0;
@@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1909 int i, j, err, nr_entries; 1948 int i, j, err, nr_entries;
1910 int skip_idx = -1; 1949 int skip_idx = -1;
1911 int first_call = 0; 1950 int first_call = 0;
1912 int nr_loop_iter;
1913 1951
1914 if (chain) 1952 if (chain)
1915 chain_nr = chain->nr; 1953 chain_nr = chain->nr;
@@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1942 if (branch && callchain_param.branch_callstack) { 1980 if (branch && callchain_param.branch_callstack) {
1943 int nr = min(max_stack, (int)branch->nr); 1981 int nr = min(max_stack, (int)branch->nr);
1944 struct branch_entry be[nr]; 1982 struct branch_entry be[nr];
1983 struct iterations iter[nr];
1945 1984
1946 if (branch->nr > PERF_MAX_BRANCH_DEPTH) { 1985 if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
1947 pr_warning("corrupted branch chain. skipping...\n"); 1986 pr_warning("corrupted branch chain. skipping...\n");
@@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1972 be[i] = branch->entries[branch->nr - i - 1]; 2011 be[i] = branch->entries[branch->nr - i - 1];
1973 } 2012 }
1974 2013
1975 nr_loop_iter = nr; 2014 memset(iter, 0, sizeof(struct iterations) * nr);
1976 nr = remove_loops(be, nr); 2015 nr = remove_loops(be, nr, iter);
1977
1978 /*
1979 * Get the number of iterations.
1980 * It's only approximation, but good enough in practice.
1981 */
1982 if (nr_loop_iter > nr)
1983 nr_loop_iter = nr_loop_iter - nr + 1;
1984 else
1985 nr_loop_iter = 0;
1986 2016
1987 for (i = 0; i < nr; i++) { 2017 for (i = 0; i < nr; i++) {
1988 if (i == nr - 1) 2018 err = add_callchain_ip(thread, cursor, parent,
1989 err = add_callchain_ip(thread, cursor, parent, 2019 root_al,
1990 root_al, 2020 NULL, be[i].to,
1991 NULL, be[i].to, 2021 true, &be[i].flags,
1992 true, &be[i].flags, 2022 NULL, be[i].from);
1993 nr_loop_iter, 1,
1994 be[i].from);
1995 else
1996 err = add_callchain_ip(thread, cursor, parent,
1997 root_al,
1998 NULL, be[i].to,
1999 true, &be[i].flags,
2000 0, 0, be[i].from);
2001 2023
2002 if (!err) 2024 if (!err)
2003 err = add_callchain_ip(thread, cursor, parent, root_al, 2025 err = add_callchain_ip(thread, cursor, parent, root_al,
2004 NULL, be[i].from, 2026 NULL, be[i].from,
2005 true, &be[i].flags, 2027 true, &be[i].flags,
2006 0, 0, 0); 2028 &iter[i], 0);
2007 if (err == -EINVAL) 2029 if (err == -EINVAL)
2008 break; 2030 break;
2009 if (err) 2031 if (err)
@@ -2037,7 +2059,7 @@ check_calls:
2037 2059
2038 err = add_callchain_ip(thread, cursor, parent, 2060 err = add_callchain_ip(thread, cursor, parent,
2039 root_al, &cpumode, ip, 2061 root_al, &cpumode, ip,
2040 false, NULL, 0, 0, 0); 2062 false, NULL, NULL, 0);
2041 2063
2042 if (err) 2064 if (err)
2043 return (err < 0) ? err : 0; 2065 return (err < 0) ? err : 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f44aeba51d1f..f6257fb4f08c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -310,7 +310,7 @@ static struct perf_evsel *
310__add_event(struct list_head *list, int *idx, 310__add_event(struct list_head *list, int *idx,
311 struct perf_event_attr *attr, 311 struct perf_event_attr *attr,
312 char *name, struct cpu_map *cpus, 312 char *name, struct cpu_map *cpus,
313 struct list_head *config_terms) 313 struct list_head *config_terms, bool auto_merge_stats)
314{ 314{
315 struct perf_evsel *evsel; 315 struct perf_evsel *evsel;
316 316
@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx,
324 evsel->cpus = cpu_map__get(cpus); 324 evsel->cpus = cpu_map__get(cpus);
325 evsel->own_cpus = cpu_map__get(cpus); 325 evsel->own_cpus = cpu_map__get(cpus);
326 evsel->system_wide = !!cpus; 326 evsel->system_wide = !!cpus;
327 evsel->auto_merge_stats = auto_merge_stats;
327 328
328 if (name) 329 if (name)
329 evsel->name = strdup(name); 330 evsel->name = strdup(name);
@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx,
339 struct perf_event_attr *attr, char *name, 340 struct perf_event_attr *attr, char *name,
340 struct list_head *config_terms) 341 struct list_head *config_terms)
341{ 342{
342 return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; 343 return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
343} 344}
344 345
345static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) 346static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
1209 get_config_name(head_config), &config_terms); 1210 get_config_name(head_config), &config_terms);
1210} 1211}
1211 1212
1212int parse_events_add_pmu(struct parse_events_state *parse_state, 1213static int __parse_events_add_pmu(struct parse_events_state *parse_state,
1213 struct list_head *list, char *name, 1214 struct list_head *list, char *name,
1214 struct list_head *head_config) 1215 struct list_head *head_config, bool auto_merge_stats)
1215{ 1216{
1216 struct perf_event_attr attr; 1217 struct perf_event_attr attr;
1217 struct perf_pmu_info info; 1218 struct perf_pmu_info info;
@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1232 1233
1233 if (!head_config) { 1234 if (!head_config) {
1234 attr.type = pmu->type; 1235 attr.type = pmu->type;
1235 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL); 1236 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
1236 return evsel ? 0 : -ENOMEM; 1237 return evsel ? 0 : -ENOMEM;
1237 } 1238 }
1238 1239
@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1254 1255
1255 evsel = __add_event(list, &parse_state->idx, &attr, 1256 evsel = __add_event(list, &parse_state->idx, &attr,
1256 get_config_name(head_config), pmu->cpus, 1257 get_config_name(head_config), pmu->cpus,
1257 &config_terms); 1258 &config_terms, auto_merge_stats);
1258 if (evsel) { 1259 if (evsel) {
1259 evsel->unit = info.unit; 1260 evsel->unit = info.unit;
1260 evsel->scale = info.scale; 1261 evsel->scale = info.scale;
@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1267 return evsel ? 0 : -ENOMEM; 1268 return evsel ? 0 : -ENOMEM;
1268} 1269}
1269 1270
1271int parse_events_add_pmu(struct parse_events_state *parse_state,
1272 struct list_head *list, char *name,
1273 struct list_head *head_config)
1274{
1275 return __parse_events_add_pmu(parse_state, list, name, head_config, false);
1276}
1277
1270int parse_events_multi_pmu_add(struct parse_events_state *parse_state, 1278int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
1271 char *str, struct list_head **listp) 1279 char *str, struct list_head **listp)
1272{ 1280{
@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
1296 return -1; 1304 return -1;
1297 list_add_tail(&term->list, head); 1305 list_add_tail(&term->list, head);
1298 1306
1299 if (!parse_events_add_pmu(parse_state, list, 1307 if (!__parse_events_add_pmu(parse_state, list,
1300 pmu->name, head)) { 1308 pmu->name, head, true)) {
1301 pr_debug("%s -> %s/%s/\n", str, 1309 pr_debug("%s -> %s/%s/\n", str,
1302 pmu->name, alias->str); 1310 pmu->name, alias->str);
1303 ok++; 1311 ok++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ac863691605f..a7ebd9fe8e40 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
1120 if (sample_type & PERF_SAMPLE_DATA_SRC) 1120 if (sample_type & PERF_SAMPLE_DATA_SRC)
1121 printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); 1121 printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
1122 1122
1123 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
1124 printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
1125
1123 if (sample_type & PERF_SAMPLE_TRANSACTION) 1126 if (sample_type & PERF_SAMPLE_TRANSACTION)
1124 printf("... transaction: %" PRIx64 "\n", sample->transaction); 1127 printf("... transaction: %" PRIx64 "\n", sample->transaction);
1125 1128
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 12359bd986db..eb3ab902a1c0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1316,6 +1316,47 @@ struct sort_entry sort_mem_dcacheline = {
1316}; 1316};
1317 1317
1318static int64_t 1318static int64_t
1319sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
1320{
1321 uint64_t l = 0, r = 0;
1322
1323 if (left->mem_info)
1324 l = left->mem_info->daddr.phys_addr;
1325 if (right->mem_info)
1326 r = right->mem_info->daddr.phys_addr;
1327
1328 return (int64_t)(r - l);
1329}
1330
1331static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
1332 size_t size, unsigned int width)
1333{
1334 uint64_t addr = 0;
1335 size_t ret = 0;
1336 size_t len = BITS_PER_LONG / 4;
1337
1338 addr = he->mem_info->daddr.phys_addr;
1339
1340 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
1341
1342 ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
1343
1344 ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
1345
1346 if (ret > width)
1347 bf[width] = '\0';
1348
1349 return width;
1350}
1351
1352struct sort_entry sort_mem_phys_daddr = {
1353 .se_header = "Data Physical Address",
1354 .se_cmp = sort__phys_daddr_cmp,
1355 .se_snprintf = hist_entry__phys_daddr_snprintf,
1356 .se_width_idx = HISTC_MEM_PHYS_DADDR,
1357};
1358
1359static int64_t
1319sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) 1360sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
1320{ 1361{
1321 if (!left->branch_info || !right->branch_info) 1362 if (!left->branch_info || !right->branch_info)
@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
1547 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), 1588 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
1548 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), 1589 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
1549 DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), 1590 DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
1591 DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
1550}; 1592};
1551 1593
1552#undef DIM 1594#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b7c75597e18f..f36dc4980a6c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -245,6 +245,7 @@ enum sort_type {
245 SORT_MEM_SNOOP, 245 SORT_MEM_SNOOP,
246 SORT_MEM_DCACHELINE, 246 SORT_MEM_DCACHELINE,
247 SORT_MEM_IADDR_SYMBOL, 247 SORT_MEM_IADDR_SYMBOL,
248 SORT_MEM_PHYS_DADDR,
248}; 249};
249 250
250/* 251/*
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index d00a012cfdfb..2bd6a1f01a1c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -186,6 +186,7 @@ struct addr_map_symbol {
186 struct symbol *sym; 186 struct symbol *sym;
187 u64 addr; 187 u64 addr;
188 u64 al_addr; 188 u64 al_addr;
189 u64 phys_addr;
189}; 190};
190 191
191struct branch_info { 192struct branch_info {
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index bbb4c1957578..19e5db90394c 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -19,6 +19,7 @@
19#ifdef HAVE_SYSCALL_TABLE 19#ifdef HAVE_SYSCALL_TABLE
20#include <linux/compiler.h> 20#include <linux/compiler.h>
21#include <string.h> 21#include <string.h>
22#include "string2.h"
22#include "util.h" 23#include "util.h"
23 24
24#if defined(__x86_64__) 25#if defined(__x86_64__)
@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
105 return sc ? sc->id : -1; 106 return sc ? sc->id : -1;
106} 107}
107 108
109int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
110{
111 int i;
112 struct syscall *syscalls = tbl->syscalls.entries;
113
114 for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
115 if (strglobmatch(syscalls[i].name, syscall_glob)) {
116 *idx = i;
117 return syscalls[i].id;
118 }
119 }
120
121 return -1;
122}
123
124int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
125{
126 *idx = -1;
127 return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
128}
129
108#else /* HAVE_SYSCALL_TABLE */ 130#else /* HAVE_SYSCALL_TABLE */
109 131
110#include <libaudit.h> 132#include <libaudit.h>
@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
131{ 153{
132 return audit_name_to_syscall(name, tbl->audit_machine); 154 return audit_name_to_syscall(name, tbl->audit_machine);
133} 155}
156
157int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
158 const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
159{
160 return -1;
161}
162
163int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
164{
165 return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
166}
134#endif /* HAVE_SYSCALL_TABLE */ 167#endif /* HAVE_SYSCALL_TABLE */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index e2951510484f..e9fb8786da7c 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl);
17const char *syscalltbl__name(const struct syscalltbl *tbl, int id); 17const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
18int syscalltbl__id(struct syscalltbl *tbl, const char *name); 18int syscalltbl__id(struct syscalltbl *tbl, const char *name);
19 19
20int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
21int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
22
20#endif /* __PERF_SYSCALLTBL_H */ 23#endif /* __PERF_SYSCALLTBL_H */