diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-12 14:28:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-12 14:28:13 -0400 |
commit | e6328a7abe7f8fcd32e9d3bcbd14ff2161bf71c9 (patch) | |
tree | 12df1e085c05269a9289a5b7bcdad864993e29ad | |
parent | 33f82bda010224e908e23e59150b4d36904affe9 (diff) | |
parent | 770e96125515daf1c7bc179323f2e0d488dfe6ac (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf tooling updates from Ingo Molnar:
"Perf tooling updates and fixes"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf annotate browser: Help for cycling thru hottest instructions with TAB/shift+TAB
perf stat: Only auto-merge events that are PMU aliases
perf test: Add test case for PERF_SAMPLE_PHYS_ADDR
perf script: Support physical address
perf mem: Support physical address
perf sort: Add sort option for physical address
perf tools: Support new sample type for physical address
perf vendor events powerpc: Remove duplicate events
perf intel-pt: Fix syntax in documentation of config option
perf test powerpc: Fix 'Object code reading' test
perf trace: Support syscall name globbing
perf syscalltbl: Support glob matching on syscall names
perf report: Calculate the average cycles of iterations
37 files changed, 368 insertions, 268 deletions
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 2a37ae925d85..140ae638cfd6 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h | |||
@@ -139,8 +139,9 @@ enum perf_event_sample_format { | |||
139 | PERF_SAMPLE_IDENTIFIER = 1U << 16, | 139 | PERF_SAMPLE_IDENTIFIER = 1U << 16, |
140 | PERF_SAMPLE_TRANSACTION = 1U << 17, | 140 | PERF_SAMPLE_TRANSACTION = 1U << 17, |
141 | PERF_SAMPLE_REGS_INTR = 1U << 18, | 141 | PERF_SAMPLE_REGS_INTR = 1U << 18, |
142 | PERF_SAMPLE_PHYS_ADDR = 1U << 19, | ||
142 | 143 | ||
143 | PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ | 144 | PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ |
144 | }; | 145 | }; |
145 | 146 | ||
146 | /* | 147 | /* |
@@ -814,6 +815,7 @@ enum perf_event_type { | |||
814 | * { u64 transaction; } && PERF_SAMPLE_TRANSACTION | 815 | * { u64 transaction; } && PERF_SAMPLE_TRANSACTION |
815 | * { u64 abi; # enum perf_sample_regs_abi | 816 | * { u64 abi; # enum perf_sample_regs_abi |
816 | * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR | 817 | * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR |
818 | * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR | ||
817 | * }; | 819 | * }; |
818 | */ | 820 | */ |
819 | PERF_RECORD_SAMPLE = 9, | 821 | PERF_RECORD_SAMPLE = 9, |
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index ab1b0825130a..76971d2e4164 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt | |||
@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter. | |||
873 | 873 | ||
874 | $ cat ~/.perfconfig | 874 | $ cat ~/.perfconfig |
875 | [intel-pt] | 875 | [intel-pt] |
876 | mispred-all | 876 | mispred-all = on |
877 | 877 | ||
878 | $ perf record -e intel_pt//u ./sort 3000 | 878 | $ perf record -e intel_pt//u ./sort 3000 |
879 | Bubble sorting array of 3000 elements | 879 | Bubble sorting array of 3000 elements |
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 73496320fca3..4be08a1e3f8d 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt | |||
@@ -59,6 +59,10 @@ OPTIONS | |||
59 | --ldload:: | 59 | --ldload:: |
60 | Specify desired latency for loads event. | 60 | Specify desired latency for loads event. |
61 | 61 | ||
62 | -p:: | ||
63 | --phys-data:: | ||
64 | Record/Report sample physical addresses | ||
65 | |||
62 | SEE ALSO | 66 | SEE ALSO |
63 | -------- | 67 | -------- |
64 | linkperf:perf-record[1], linkperf:perf-report[1] | 68 | linkperf:perf-record[1], linkperf:perf-report[1] |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9bdea047c5db..e397453e5a46 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -249,7 +249,10 @@ OPTIONS | |||
249 | 249 | ||
250 | -d:: | 250 | -d:: |
251 | --data:: | 251 | --data:: |
252 | Record the sample addresses. | 252 | Record the sample virtual addresses. |
253 | |||
254 | --phys-data:: | ||
255 | Record the sample physical addresses. | ||
253 | 256 | ||
254 | -T:: | 257 | -T:: |
255 | --timestamp:: | 258 | --timestamp:: |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9fa84617181e..383a98d992ed 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -137,6 +137,7 @@ OPTIONS | |||
137 | - mem: type of memory access for the data at the time of the sample | 137 | - mem: type of memory access for the data at the time of the sample |
138 | - snoop: type of snoop (if any) for the data at the time of the sample | 138 | - snoop: type of snoop (if any) for the data at the time of the sample |
139 | - dcacheline: the cacheline the data address is on at the time of the sample | 139 | - dcacheline: the cacheline the data address is on at the time of the sample |
140 | - phys_daddr: physical address of data being executed on at the time of sample | ||
140 | 141 | ||
141 | And the default sort keys are changed to local_weight, mem, sym, dso, | 142 | And the default sort keys are changed to local_weight, mem, sym, dso, |
142 | symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. | 143 | symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. |
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5ee8796be96e..18dfcfa38454 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
@@ -117,7 +117,7 @@ OPTIONS | |||
117 | Comma separated list of fields to print. Options are: | 117 | Comma separated list of fields to print. Options are: |
118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, | 118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, |
119 | srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, | 119 | srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, |
120 | callindent, insn, insnlen, synth. | 120 | callindent, insn, insnlen, synth, phys_addr. |
121 | Field list can be prepended with the type, trace, sw or hw, | 121 | Field list can be prepended with the type, trace, sw or hw, |
122 | to indicate to which event type the field list applies. | 122 | to indicate to which event type the field list applies. |
123 | e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace | 123 | e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace |
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index c1e3288a2dfb..d53bea6bd571 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt | |||
@@ -37,7 +37,7 @@ OPTIONS | |||
37 | --expr:: | 37 | --expr:: |
38 | --event:: | 38 | --event:: |
39 | List of syscalls and other perf events (tracepoints, HW cache events, | 39 | List of syscalls and other perf events (tracepoints, HW cache events, |
40 | etc) to show. | 40 | etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc. |
41 | See 'perf list' for a complete list of events. | 41 | See 'perf list' for a complete list of events. |
42 | Prefixing with ! shows all syscalls but the ones specified. You may | 42 | Prefixing with ! shows all syscalls but the ones specified. You may |
43 | need to escape it. | 43 | need to escape it. |
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index e001c0290793..0f15634ef82c 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c | |||
@@ -23,6 +23,7 @@ struct perf_mem { | |||
23 | bool hide_unresolved; | 23 | bool hide_unresolved; |
24 | bool dump_raw; | 24 | bool dump_raw; |
25 | bool force; | 25 | bool force; |
26 | bool phys_addr; | ||
26 | int operation; | 27 | int operation; |
27 | const char *cpu_list; | 28 | const char *cpu_list; |
28 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | 29 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); |
@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) | |||
101 | 102 | ||
102 | rec_argv[i++] = "-d"; | 103 | rec_argv[i++] = "-d"; |
103 | 104 | ||
105 | if (mem->phys_addr) | ||
106 | rec_argv[i++] = "--phys-data"; | ||
107 | |||
104 | for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { | 108 | for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { |
105 | if (!perf_mem_events[j].record) | 109 | if (!perf_mem_events[j].record) |
106 | continue; | 110 | continue; |
@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool, | |||
161 | if (al.map != NULL) | 165 | if (al.map != NULL) |
162 | al.map->dso->hit = 1; | 166 | al.map->dso->hit = 1; |
163 | 167 | ||
164 | if (symbol_conf.field_sep) { | 168 | if (mem->phys_addr) { |
165 | fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 | 169 | if (symbol_conf.field_sep) { |
166 | "%s0x%"PRIx64"%s%s:%s\n"; | 170 | fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64 |
171 | "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; | ||
172 | } else { | ||
173 | fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 | ||
174 | "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64 | ||
175 | "%s%s:%s\n"; | ||
176 | symbol_conf.field_sep = " "; | ||
177 | } | ||
178 | |||
179 | printf(fmt, | ||
180 | sample->pid, | ||
181 | symbol_conf.field_sep, | ||
182 | sample->tid, | ||
183 | symbol_conf.field_sep, | ||
184 | sample->ip, | ||
185 | symbol_conf.field_sep, | ||
186 | sample->addr, | ||
187 | symbol_conf.field_sep, | ||
188 | sample->phys_addr, | ||
189 | symbol_conf.field_sep, | ||
190 | sample->weight, | ||
191 | symbol_conf.field_sep, | ||
192 | sample->data_src, | ||
193 | symbol_conf.field_sep, | ||
194 | al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", | ||
195 | al.sym ? al.sym->name : "???"); | ||
167 | } else { | 196 | } else { |
168 | fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 | 197 | if (symbol_conf.field_sep) { |
169 | "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; | 198 | fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 |
170 | symbol_conf.field_sep = " "; | 199 | "%s0x%"PRIx64"%s%s:%s\n"; |
171 | } | 200 | } else { |
201 | fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 | ||
202 | "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; | ||
203 | symbol_conf.field_sep = " "; | ||
204 | } | ||
172 | 205 | ||
173 | printf(fmt, | 206 | printf(fmt, |
174 | sample->pid, | 207 | sample->pid, |
175 | symbol_conf.field_sep, | 208 | symbol_conf.field_sep, |
176 | sample->tid, | 209 | sample->tid, |
177 | symbol_conf.field_sep, | 210 | symbol_conf.field_sep, |
178 | sample->ip, | 211 | sample->ip, |
179 | symbol_conf.field_sep, | 212 | symbol_conf.field_sep, |
180 | sample->addr, | 213 | sample->addr, |
181 | symbol_conf.field_sep, | 214 | symbol_conf.field_sep, |
182 | sample->weight, | 215 | sample->weight, |
183 | symbol_conf.field_sep, | 216 | symbol_conf.field_sep, |
184 | sample->data_src, | 217 | sample->data_src, |
185 | symbol_conf.field_sep, | 218 | symbol_conf.field_sep, |
186 | al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", | 219 | al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", |
187 | al.sym ? al.sym->name : "???"); | 220 | al.sym ? al.sym->name : "???"); |
221 | } | ||
188 | out_put: | 222 | out_put: |
189 | addr_location__put(&al); | 223 | addr_location__put(&al); |
190 | return 0; | 224 | return 0; |
@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem) | |||
224 | if (ret < 0) | 258 | if (ret < 0) |
225 | goto out_delete; | 259 | goto out_delete; |
226 | 260 | ||
227 | printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); | 261 | if (mem->phys_addr) |
262 | printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); | ||
263 | else | ||
264 | printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); | ||
228 | 265 | ||
229 | ret = perf_session__process_events(session); | 266 | ret = perf_session__process_events(session); |
230 | 267 | ||
@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) | |||
254 | * there is no weight (cost) associated with stores, so don't print | 291 | * there is no weight (cost) associated with stores, so don't print |
255 | * the column | 292 | * the column |
256 | */ | 293 | */ |
257 | if (!(mem->operation & MEM_OPERATION_LOAD)) | 294 | if (!(mem->operation & MEM_OPERATION_LOAD)) { |
258 | rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," | 295 | if (mem->phys_addr) |
259 | "dso_daddr,tlb,locked"; | 296 | rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," |
297 | "dso_daddr,tlb,locked,phys_daddr"; | ||
298 | else | ||
299 | rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," | ||
300 | "dso_daddr,tlb,locked"; | ||
301 | } else if (mem->phys_addr) | ||
302 | rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr," | ||
303 | "dso_daddr,snoop,tlb,locked,phys_daddr"; | ||
260 | 304 | ||
261 | for (j = 1; j < argc; j++, i++) | 305 | for (j = 1; j < argc; j++, i++) |
262 | rep_argv[i] = argv[j]; | 306 | rep_argv[i] = argv[j]; |
@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv) | |||
373 | "separator for columns, no spaces will be added" | 417 | "separator for columns, no spaces will be added" |
374 | " between columns '.' is reserved."), | 418 | " between columns '.' is reserved."), |
375 | OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), | 419 | OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), |
420 | OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"), | ||
376 | OPT_END() | 421 | OPT_END() |
377 | }; | 422 | }; |
378 | const char *const mem_subcommands[] = { "record", "report", NULL }; | 423 | const char *const mem_subcommands[] = { "record", "report", NULL }; |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 36d7117a7562..56f8142ff97f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -1604,6 +1604,8 @@ static struct option __record_options[] = { | |||
1604 | OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, | 1604 | OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, |
1605 | "per thread counts"), | 1605 | "per thread counts"), |
1606 | OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), | 1606 | OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), |
1607 | OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, | ||
1608 | "Record the sample physical addresses"), | ||
1607 | OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), | 1609 | OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), |
1608 | OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, | 1610 | OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, |
1609 | &record.opts.sample_time_set, | 1611 | &record.opts.sample_time_set, |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 378f76cdf923..3d4c3b5e1868 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -87,6 +87,7 @@ enum perf_output_field { | |||
87 | PERF_OUTPUT_BRSTACKINSN = 1U << 23, | 87 | PERF_OUTPUT_BRSTACKINSN = 1U << 23, |
88 | PERF_OUTPUT_BRSTACKOFF = 1U << 24, | 88 | PERF_OUTPUT_BRSTACKOFF = 1U << 24, |
89 | PERF_OUTPUT_SYNTH = 1U << 25, | 89 | PERF_OUTPUT_SYNTH = 1U << 25, |
90 | PERF_OUTPUT_PHYS_ADDR = 1U << 26, | ||
90 | }; | 91 | }; |
91 | 92 | ||
92 | struct output_option { | 93 | struct output_option { |
@@ -119,6 +120,7 @@ struct output_option { | |||
119 | {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, | 120 | {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, |
120 | {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, | 121 | {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, |
121 | {.str = "synth", .field = PERF_OUTPUT_SYNTH}, | 122 | {.str = "synth", .field = PERF_OUTPUT_SYNTH}, |
123 | {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, | ||
122 | }; | 124 | }; |
123 | 125 | ||
124 | enum { | 126 | enum { |
@@ -175,7 +177,8 @@ static struct { | |||
175 | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | | 177 | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | |
176 | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | | 178 | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | |
177 | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | | 179 | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | |
178 | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, | 180 | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | |
181 | PERF_OUTPUT_PHYS_ADDR, | ||
179 | 182 | ||
180 | .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, | 183 | .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, |
181 | }, | 184 | }, |
@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, | |||
382 | PERF_OUTPUT_IREGS)) | 385 | PERF_OUTPUT_IREGS)) |
383 | return -EINVAL; | 386 | return -EINVAL; |
384 | 387 | ||
388 | if (PRINT_FIELD(PHYS_ADDR) && | ||
389 | perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", | ||
390 | PERF_OUTPUT_PHYS_ADDR)) | ||
391 | return -EINVAL; | ||
392 | |||
385 | return 0; | 393 | return 0; |
386 | } | 394 | } |
387 | 395 | ||
@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script, | |||
1446 | if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) | 1454 | if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) |
1447 | print_sample_bpf_output(sample); | 1455 | print_sample_bpf_output(sample); |
1448 | print_insn(sample, attr, thread, machine); | 1456 | print_insn(sample, attr, thread, machine); |
1457 | |||
1458 | if (PRINT_FIELD(PHYS_ADDR)) | ||
1459 | printf("%16" PRIx64, sample->phys_addr); | ||
1449 | printf("\n"); | 1460 | printf("\n"); |
1450 | } | 1461 | } |
1451 | 1462 | ||
@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv) | |||
2729 | "Valid types: hw,sw,trace,raw,synth. " | 2740 | "Valid types: hw,sw,trace,raw,synth. " |
2730 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," | 2741 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," |
2731 | "addr,symoff,period,iregs,brstack,brstacksym,flags," | 2742 | "addr,symoff,period,iregs,brstack,brstacksym,flags," |
2732 | "bpf-output,callindent,insn,insnlen,brstackinsn,synth", | 2743 | "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", |
2733 | parse_output_fields), | 2744 | parse_output_fields), |
2734 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 2745 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
2735 | "system-wide collection from all CPUs"), | 2746 | "system-wide collection from all CPUs"), |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 866da7aa54bf..85e992d9215b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter, | |||
1257 | if (counter->merged_stat) | 1257 | if (counter->merged_stat) |
1258 | return false; | 1258 | return false; |
1259 | cb(counter, data, true); | 1259 | cb(counter, data, true); |
1260 | if (!no_merge) | 1260 | if (!no_merge && counter->auto_merge_stats) |
1261 | collect_all_aliases(counter, cb, data); | 1261 | collect_all_aliases(counter, cb, data); |
1262 | return true; | 1262 | return true; |
1263 | } | 1263 | } |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d59cdadf3a79..771ddab94bb0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) | |||
1261 | static int trace__validate_ev_qualifier(struct trace *trace) | 1261 | static int trace__validate_ev_qualifier(struct trace *trace) |
1262 | { | 1262 | { |
1263 | int err = 0, i; | 1263 | int err = 0, i; |
1264 | size_t nr_allocated; | ||
1264 | struct str_node *pos; | 1265 | struct str_node *pos; |
1265 | 1266 | ||
1266 | trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); | 1267 | trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); |
@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace) | |||
1274 | goto out; | 1275 | goto out; |
1275 | } | 1276 | } |
1276 | 1277 | ||
1278 | nr_allocated = trace->ev_qualifier_ids.nr; | ||
1277 | i = 0; | 1279 | i = 0; |
1278 | 1280 | ||
1279 | strlist__for_each_entry(pos, trace->ev_qualifier) { | 1281 | strlist__for_each_entry(pos, trace->ev_qualifier) { |
1280 | const char *sc = pos->s; | 1282 | const char *sc = pos->s; |
1281 | int id = syscalltbl__id(trace->sctbl, sc); | 1283 | int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; |
1282 | 1284 | ||
1283 | if (id < 0) { | 1285 | if (id < 0) { |
1286 | id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); | ||
1287 | if (id >= 0) | ||
1288 | goto matches; | ||
1289 | |||
1284 | if (err == 0) { | 1290 | if (err == 0) { |
1285 | fputs("Error:\tInvalid syscall ", trace->output); | 1291 | fputs("Error:\tInvalid syscall ", trace->output); |
1286 | err = -EINVAL; | 1292 | err = -EINVAL; |
@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace) | |||
1290 | 1296 | ||
1291 | fputs(sc, trace->output); | 1297 | fputs(sc, trace->output); |
1292 | } | 1298 | } |
1293 | 1299 | matches: | |
1294 | trace->ev_qualifier_ids.entries[i++] = id; | 1300 | trace->ev_qualifier_ids.entries[i++] = id; |
1301 | if (match_next == -1) | ||
1302 | continue; | ||
1303 | |||
1304 | while (1) { | ||
1305 | id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); | ||
1306 | if (id < 0) | ||
1307 | break; | ||
1308 | if (nr_allocated == trace->ev_qualifier_ids.nr) { | ||
1309 | void *entries; | ||
1310 | |||
1311 | nr_allocated += 8; | ||
1312 | entries = realloc(trace->ev_qualifier_ids.entries, | ||
1313 | nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); | ||
1314 | if (entries == NULL) { | ||
1315 | err = -ENOMEM; | ||
1316 | fputs("\nError:\t Not enough memory for parsing\n", trace->output); | ||
1317 | goto out_free; | ||
1318 | } | ||
1319 | trace->ev_qualifier_ids.entries = entries; | ||
1320 | } | ||
1321 | trace->ev_qualifier_ids.nr++; | ||
1322 | trace->ev_qualifier_ids.entries[i++] = id; | ||
1323 | } | ||
1295 | } | 1324 | } |
1296 | 1325 | ||
1297 | if (err < 0) { | 1326 | if (err < 0) { |
1298 | fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" | 1327 | fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" |
1299 | "\nHint:\tand: 'man syscalls'\n", trace->output); | 1328 | "\nHint:\tand: 'man syscalls'\n", trace->output); |
1329 | out_free: | ||
1300 | zfree(&trace->ev_qualifier_ids.entries); | 1330 | zfree(&trace->ev_qualifier_ids.entries); |
1301 | trace->ev_qualifier_ids.nr = 0; | 1331 | trace->ev_qualifier_ids.nr = 0; |
1302 | } | 1332 | } |
@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, | |||
2814 | struct trace *trace = (struct trace *)opt->value; | 2844 | struct trace *trace = (struct trace *)opt->value; |
2815 | const char *s = str; | 2845 | const char *s = str; |
2816 | char *sep = NULL, *lists[2] = { NULL, NULL, }; | 2846 | char *sep = NULL, *lists[2] = { NULL, NULL, }; |
2817 | int len = strlen(str) + 1, err = -1, list; | 2847 | int len = strlen(str) + 1, err = -1, list, idx; |
2818 | char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); | 2848 | char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); |
2819 | char group_name[PATH_MAX]; | 2849 | char group_name[PATH_MAX]; |
2820 | 2850 | ||
@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str, | |||
2831 | *sep = '\0'; | 2861 | *sep = '\0'; |
2832 | 2862 | ||
2833 | list = 0; | 2863 | list = 0; |
2834 | if (syscalltbl__id(trace->sctbl, s) >= 0) { | 2864 | if (syscalltbl__id(trace->sctbl, s) >= 0 || |
2865 | syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { | ||
2835 | list = 1; | 2866 | list = 1; |
2836 | } else { | 2867 | } else { |
2837 | path__join(group_name, sizeof(group_name), strace_groups_dir, s); | 2868 | path__join(group_name, sizeof(group_name), strace_groups_dir, s); |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2c010dd6a79d..dc442ba21bf6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -43,6 +43,7 @@ struct record_opts { | |||
43 | bool no_samples; | 43 | bool no_samples; |
44 | bool raw_samples; | 44 | bool raw_samples; |
45 | bool sample_address; | 45 | bool sample_address; |
46 | bool sample_phys_addr; | ||
46 | bool sample_weight; | 47 | bool sample_weight; |
47 | bool sample_time; | 48 | bool sample_time; |
48 | bool sample_time_set; | 49 | bool sample_time_set; |
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index 7e62c46d7a20..c63a919eda98 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json | |||
@@ -80,11 +80,6 @@ | |||
80 | "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." | 80 | "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." |
81 | }, | 81 | }, |
82 | {, | 82 | {, |
83 | "EventCode": "0x400F0", | ||
84 | "EventName": "PM_LD_MISS_L1", | ||
85 | "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." | ||
86 | }, | ||
87 | {, | ||
88 | "EventCode": "0x2E01A", | 83 | "EventCode": "0x2E01A", |
89 | "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", | 84 | "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", |
90 | "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete" | 85 | "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete" |
@@ -374,4 +369,4 @@ | |||
374 | "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", | 369 | "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", |
375 | "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" | 370 | "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" |
376 | } | 371 | } |
377 | ] \ No newline at end of file | 372 | ] |
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 00f3d2a21f31..54cc3be00fc2 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json | |||
@@ -605,11 +605,6 @@ | |||
605 | "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" | 605 | "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" |
606 | }, | 606 | }, |
607 | {, | 607 | {, |
608 | "EventCode": "0x3689E", | ||
609 | "EventName": "PM_L2_RTY_LD", | ||
610 | "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" | ||
611 | }, | ||
612 | {, | ||
613 | "EventCode": "0xE08C", | 608 | "EventCode": "0xE08C", |
614 | "EventName": "PM_LSU0_ERAT_HIT", | 609 | "EventName": "PM_LSU0_ERAT_HIT", |
615 | "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" | 610 | "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" |
@@ -715,11 +710,6 @@ | |||
715 | "BriefDescription": "Lifetime, sample of RD machine 0 valid" | 710 | "BriefDescription": "Lifetime, sample of RD machine 0 valid" |
716 | }, | 711 | }, |
717 | {, | 712 | {, |
718 | "EventCode": "0x468B4", | ||
719 | "EventName": "PM_L3_RD0_BUSY", | ||
720 | "BriefDescription": "Lifetime, sample of RD machine 0 valid" | ||
721 | }, | ||
722 | {, | ||
723 | "EventCode": "0x46080", | 713 | "EventCode": "0x46080", |
724 | "EventName": "PM_L2_DISP_ALL_L2MISS", | 714 | "EventName": "PM_L2_DISP_ALL_L2MISS", |
725 | "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" | 715 | "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" |
@@ -850,21 +840,11 @@ | |||
850 | "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" | 840 | "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" |
851 | }, | 841 | }, |
852 | {, | 842 | {, |
853 | "EventCode": "0x2608C", | ||
854 | "EventName": "PM_RC0_BUSY", | ||
855 | "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" | ||
856 | }, | ||
857 | {, | ||
858 | "EventCode": "0x36082", | 843 | "EventCode": "0x36082", |
859 | "EventName": "PM_L2_LD_DISP", | 844 | "EventName": "PM_L2_LD_DISP", |
860 | "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." | 845 | "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." |
861 | }, | 846 | }, |
862 | {, | 847 | {, |
863 | "EventCode": "0x1609E", | ||
864 | "EventName": "PM_L2_LD_DISP", | ||
865 | "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)" | ||
866 | }, | ||
867 | {, | ||
868 | "EventCode": "0xF8B0", | 848 | "EventCode": "0xF8B0", |
869 | "EventName": "PM_L3_SW_PREF", | 849 | "EventName": "PM_L3_SW_PREF", |
870 | "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest" | 850 | "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest" |
@@ -1040,11 +1020,6 @@ | |||
1040 | "BriefDescription": "L3 castouts in Mepf state for this thread" | 1020 | "BriefDescription": "L3 castouts in Mepf state for this thread" |
1041 | }, | 1021 | }, |
1042 | {, | 1022 | {, |
1043 | "EventCode": "0x168A0", | ||
1044 | "EventName": "PM_L3_CO_MEPF", | ||
1045 | "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request" | ||
1046 | }, | ||
1047 | {, | ||
1048 | "EventCode": "0x460A2", | 1023 | "EventCode": "0x460A2", |
1049 | "EventName": "PM_L3_LAT_CI_HIT", | 1024 | "EventName": "PM_L3_LAT_CI_HIT", |
1050 | "BriefDescription": "L3 Lateral Castins Hit" | 1025 | "BriefDescription": "L3 Lateral Castins Hit" |
@@ -1150,11 +1125,6 @@ | |||
1150 | "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" | 1125 | "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" |
1151 | }, | 1126 | }, |
1152 | {, | 1127 | {, |
1153 | "EventCode": "0x4689E", | ||
1154 | "EventName": "PM_L2_RTY_ST", | ||
1155 | "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" | ||
1156 | }, | ||
1157 | {, | ||
1158 | "EventCode": "0x24040", | 1128 | "EventCode": "0x24040", |
1159 | "EventName": "PM_INST_FROM_L2_MEPF", | 1129 | "EventName": "PM_INST_FROM_L2_MEPF", |
1160 | "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)" | 1130 | "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)" |
@@ -1255,11 +1225,6 @@ | |||
1255 | "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" | 1225 | "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" |
1256 | }, | 1226 | }, |
1257 | {, | 1227 | {, |
1258 | "EventCode": "0x4608C", | ||
1259 | "EventName": "PM_CO0_BUSY", | ||
1260 | "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" | ||
1261 | }, | ||
1262 | {, | ||
1263 | "EventCode": "0x2C122", | 1228 | "EventCode": "0x2C122", |
1264 | "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", | 1229 | "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", |
1265 | "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load" | 1230 | "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load" |
@@ -1395,11 +1360,6 @@ | |||
1395 | "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request" | 1360 | "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request" |
1396 | }, | 1361 | }, |
1397 | {, | 1362 | {, |
1398 | "EventCode": "0x40006", | ||
1399 | "EventName": "PM_ISLB_MISS", | ||
1400 | "BriefDescription": "Number of ISLB misses for this thread" | ||
1401 | }, | ||
1402 | {, | ||
1403 | "EventCode": "0xD8A8", | 1363 | "EventCode": "0xD8A8", |
1404 | "EventName": "PM_ISLB_MISS", | 1364 | "EventName": "PM_ISLB_MISS", |
1405 | "BriefDescription": "Instruction SLB miss - Total of all segment sizes" | 1365 | "BriefDescription": "Instruction SLB miss - Total of all segment sizes" |
@@ -1515,11 +1475,6 @@ | |||
1515 | "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." | 1475 | "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." |
1516 | }, | 1476 | }, |
1517 | {, | 1477 | {, |
1518 | "EventCode": "0x3609E", | ||
1519 | "EventName": "PM_L2_INST", | ||
1520 | "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" | ||
1521 | }, | ||
1522 | {, | ||
1523 | "EventCode": "0x3504C", | 1478 | "EventCode": "0x3504C", |
1524 | "EventName": "PM_IPTEG_FROM_DL4", | 1479 | "EventName": "PM_IPTEG_FROM_DL4", |
1525 | "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request" | 1480 | "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request" |
@@ -1690,11 +1645,6 @@ | |||
1690 | "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" | 1645 | "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" |
1691 | }, | 1646 | }, |
1692 | {, | 1647 | {, |
1693 | "EventCode": "0x2609E", | ||
1694 | "EventName": "PM_L2_LD_HIT", | ||
1695 | "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread" | ||
1696 | }, | ||
1697 | {, | ||
1698 | "EventCode": "0x168AC", | 1648 | "EventCode": "0x168AC", |
1699 | "EventName": "PM_L3_CI_USAGE", | 1649 | "EventName": "PM_L3_CI_USAGE", |
1700 | "BriefDescription": "Rotating sample of 16 CI or CO actives" | 1650 | "BriefDescription": "Rotating sample of 16 CI or CO actives" |
@@ -1795,21 +1745,11 @@ | |||
1795 | "BriefDescription": "Rotating sample of 8 WI valid" | 1745 | "BriefDescription": "Rotating sample of 8 WI valid" |
1796 | }, | 1746 | }, |
1797 | {, | 1747 | {, |
1798 | "EventCode": "0x260B6", | ||
1799 | "EventName": "PM_L3_WI0_BUSY", | ||
1800 | "BriefDescription": "Rotating sample of 8 WI valid (duplicate)" | ||
1801 | }, | ||
1802 | {, | ||
1803 | "EventCode": "0x368AC", | 1748 | "EventCode": "0x368AC", |
1804 | "EventName": "PM_L3_CO0_BUSY", | 1749 | "EventName": "PM_L3_CO0_BUSY", |
1805 | "BriefDescription": "Lifetime, sample of CO machine 0 valid" | 1750 | "BriefDescription": "Lifetime, sample of CO machine 0 valid" |
1806 | }, | 1751 | }, |
1807 | {, | 1752 | {, |
1808 | "EventCode": "0x468AC", | ||
1809 | "EventName": "PM_L3_CO0_BUSY", | ||
1810 | "BriefDescription": "Lifetime, sample of CO machine 0 valid" | ||
1811 | }, | ||
1812 | {, | ||
1813 | "EventCode": "0x2E040", | 1753 | "EventCode": "0x2E040", |
1814 | "EventName": "PM_DPTEG_FROM_L2_MEPF", | 1754 | "EventName": "PM_DPTEG_FROM_L2_MEPF", |
1815 | "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" | 1755 | "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" |
@@ -1840,11 +1780,6 @@ | |||
1840 | "BriefDescription": "L3 PF received retry port 0, every retry counted" | 1780 | "BriefDescription": "L3 PF received retry port 0, every retry counted" |
1841 | }, | 1781 | }, |
1842 | {, | 1782 | {, |
1843 | "EventCode": "0x260AE", | ||
1844 | "EventName": "PM_L3_P0_PF_RTY", | ||
1845 | "BriefDescription": "L3 PF received retry port 0, every retry counted" | ||
1846 | }, | ||
1847 | {, | ||
1848 | "EventCode": "0x268B2", | 1783 | "EventCode": "0x268B2", |
1849 | "EventName": "PM_L3_LOC_GUESS_WRONG", | 1784 | "EventName": "PM_L3_LOC_GUESS_WRONG", |
1850 | "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" | 1785 | "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" |
@@ -1895,11 +1830,6 @@ | |||
1895 | "BriefDescription": "Lifetime, sample of snooper machine 0 valid" | 1830 | "BriefDescription": "Lifetime, sample of snooper machine 0 valid" |
1896 | }, | 1831 | }, |
1897 | {, | 1832 | {, |
1898 | "EventCode": "0x460AC", | ||
1899 | "EventName": "PM_L3_SN0_BUSY", | ||
1900 | "BriefDescription": "Lifetime, sample of snooper machine 0 valid" | ||
1901 | }, | ||
1902 | {, | ||
1903 | "EventCode": "0x3005C", | 1833 | "EventCode": "0x3005C", |
1904 | "EventName": "PM_BFU_BUSY", | 1834 | "EventName": "PM_BFU_BUSY", |
1905 | "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity" | 1835 | "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity" |
@@ -1935,11 +1865,6 @@ | |||
1935 | "BriefDescription": "Lifetime, sample of PF machine 0 valid" | 1865 | "BriefDescription": "Lifetime, sample of PF machine 0 valid" |
1936 | }, | 1866 | }, |
1937 | {, | 1867 | {, |
1938 | "EventCode": "0x460B4", | ||
1939 | "EventName": "PM_L3_PF0_BUSY", | ||
1940 | "BriefDescription": "Lifetime, sample of PF machine 0 valid" | ||
1941 | }, | ||
1942 | {, | ||
1943 | "EventCode": "0xC0B0", | 1868 | "EventCode": "0xC0B0", |
1944 | "EventName": "PM_LSU_FLUSH_UE", | 1869 | "EventName": "PM_LSU_FLUSH_UE", |
1945 | "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" | 1870 | "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" |
@@ -2085,11 +2010,6 @@ | |||
2085 | "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" | 2010 | "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" |
2086 | }, | 2011 | }, |
2087 | {, | 2012 | {, |
2088 | "EventCode": "0x468AE", | ||
2089 | "EventName": "PM_L3_P1_CO_RTY", | ||
2090 | "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted" | ||
2091 | }, | ||
2092 | {, | ||
2093 | "EventCode": "0xC0AC", | 2013 | "EventCode": "0xC0AC", |
2094 | "EventName": "PM_LSU_FLUSH_EMSH", | 2014 | "EventName": "PM_LSU_FLUSH_EMSH", |
2095 | "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" | 2015 | "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" |
@@ -2195,11 +2115,6 @@ | |||
2195 | "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" | 2115 | "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" |
2196 | }, | 2116 | }, |
2197 | {, | 2117 | {, |
2198 | "EventCode": "0x46886", | ||
2199 | "EventName": "PM_L2_SN_M_WR_DONE", | ||
2200 | "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" | ||
2201 | }, | ||
2202 | {, | ||
2203 | "EventCode": "0x489C", | 2118 | "EventCode": "0x489C", |
2204 | "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", | 2119 | "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", |
2205 | "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" | 2120 | "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" |
@@ -2290,21 +2205,11 @@ | |||
2290 | "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" | 2205 | "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" |
2291 | }, | 2206 | }, |
2292 | {, | 2207 | {, |
2293 | "EventCode": "0x26090", | ||
2294 | "EventName": "PM_SN0_BUSY", | ||
2295 | "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" | ||
2296 | }, | ||
2297 | {, | ||
2298 | "EventCode": "0x360AE", | 2208 | "EventCode": "0x360AE", |
2299 | "EventName": "PM_L3_P0_CO_RTY", | 2209 | "EventName": "PM_L3_P0_CO_RTY", |
2300 | "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" | 2210 | "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" |
2301 | }, | 2211 | }, |
2302 | {, | 2212 | {, |
2303 | "EventCode": "0x460AE", | ||
2304 | "EventName": "PM_L3_P0_CO_RTY", | ||
2305 | "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" | ||
2306 | }, | ||
2307 | {, | ||
2308 | "EventCode": "0x168A8", | 2213 | "EventCode": "0x168A8", |
2309 | "EventName": "PM_L3_WI_USAGE", | 2214 | "EventName": "PM_L3_WI_USAGE", |
2310 | "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid" | 2215 | "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid" |
@@ -2340,26 +2245,11 @@ | |||
2340 | "BriefDescription": "L3 PF received retry port 1, every retry counted" | 2245 | "BriefDescription": "L3 PF received retry port 1, every retry counted" |
2341 | }, | 2246 | }, |
2342 | {, | 2247 | {, |
2343 | "EventCode": "0x268AE", | ||
2344 | "EventName": "PM_L3_P1_PF_RTY", | ||
2345 | "BriefDescription": "L3 PF received retry port 3, every retry counted" | ||
2346 | }, | ||
2347 | {, | ||
2348 | "EventCode": "0x46082", | 2248 | "EventCode": "0x46082", |
2349 | "EventName": "PM_L2_ST_DISP", | 2249 | "EventName": "PM_L2_ST_DISP", |
2350 | "BriefDescription": "All successful D-side store dispatches for this thread " | 2250 | "BriefDescription": "All successful D-side store dispatches for this thread " |
2351 | }, | 2251 | }, |
2352 | {, | 2252 | {, |
2353 | "EventCode": "0x1689E", | ||
2354 | "EventName": "PM_L2_ST_DISP", | ||
2355 | "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" | ||
2356 | }, | ||
2357 | {, | ||
2358 | "EventCode": "0x36880", | ||
2359 | "EventName": "PM_L2_INST_MISS", | ||
2360 | "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" | ||
2361 | }, | ||
2362 | {, | ||
2363 | "EventCode": "0x4609E", | 2253 | "EventCode": "0x4609E", |
2364 | "EventName": "PM_L2_INST_MISS", | 2254 | "EventName": "PM_L2_INST_MISS", |
2365 | "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" | 2255 | "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" |
@@ -2430,11 +2320,6 @@ | |||
2430 | "BriefDescription": "# PPC Dispatched" | 2320 | "BriefDescription": "# PPC Dispatched" |
2431 | }, | 2321 | }, |
2432 | {, | 2322 | {, |
2433 | "EventCode": "0x300F2", | ||
2434 | "EventName": "PM_INST_DISP", | ||
2435 | "BriefDescription": "# PPC Dispatched" | ||
2436 | }, | ||
2437 | {, | ||
2438 | "EventCode": "0x4E05E", | 2323 | "EventCode": "0x4E05E", |
2439 | "EventName": "PM_TM_OUTER_TBEGIN_DISP", | 2324 | "EventName": "PM_TM_OUTER_TBEGIN_DISP", |
2440 | "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions" | 2325 | "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions" |
@@ -2460,11 +2345,6 @@ | |||
2460 | "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" | 2345 | "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" |
2461 | }, | 2346 | }, |
2462 | {, | 2347 | {, |
2463 | "EventCode": "0x2689E", | ||
2464 | "EventName": "PM_L2_ST_HIT", | ||
2465 | "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread" | ||
2466 | }, | ||
2467 | {, | ||
2468 | "EventCode": "0x360A8", | 2348 | "EventCode": "0x360A8", |
2469 | "EventName": "PM_L3_CO", | 2349 | "EventName": "PM_L3_CO", |
2470 | "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))" | 2350 | "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))" |
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index 47a82568a8df..bc2db636dabf 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json | |||
@@ -420,11 +420,6 @@ | |||
420 | "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" | 420 | "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" |
421 | }, | 421 | }, |
422 | {, | 422 | {, |
423 | "EventCode": "0x10016", | ||
424 | "EventName": "PM_DSLB_MISS", | ||
425 | "BriefDescription": "Data SLB Miss - Total of all segment sizes" | ||
426 | }, | ||
427 | {, | ||
428 | "EventCode": "0xD0A8", | 423 | "EventCode": "0xD0A8", |
429 | "EventName": "PM_DSLB_MISS", | 424 | "EventName": "PM_DSLB_MISS", |
430 | "BriefDescription": "Data SLB Miss - Total of all segment sizes" | 425 | "BriefDescription": "Data SLB Miss - Total of all segment sizes" |
@@ -554,4 +549,4 @@ | |||
554 | "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", | 549 | "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", |
555 | "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" | 550 | "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" |
556 | } | 551 | } |
557 | ] \ No newline at end of file | 552 | ] |
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index a2c95a99e168..3ef8a10aac86 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json | |||
@@ -5,11 +5,6 @@ | |||
5 | "BriefDescription": "Branches that are not strongly biased" | 5 | "BriefDescription": "Branches that are not strongly biased" |
6 | }, | 6 | }, |
7 | {, | 7 | {, |
8 | "EventCode": "0x40036", | ||
9 | "EventName": "PM_BR_2PATH", | ||
10 | "BriefDescription": "Branches that are not strongly biased" | ||
11 | }, | ||
12 | {, | ||
13 | "EventCode": "0x40056", | 8 | "EventCode": "0x40056", |
14 | "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", | 9 | "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", |
15 | "BriefDescription": "Local memory above threshold for LSU medium" | 10 | "BriefDescription": "Local memory above threshold for LSU medium" |
@@ -124,4 +119,4 @@ | |||
124 | "EventName": "PM_1FLOP_CMPL", | 119 | "EventName": "PM_1FLOP_CMPL", |
125 | "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" | 120 | "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" |
126 | } | 121 | } |
127 | ] \ No newline at end of file | 122 | ] |
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 761c5a448c56..466a462b26d1 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c | |||
@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, | |||
237 | 237 | ||
238 | thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); | 238 | thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); |
239 | if (!al.map || !al.map->dso) { | 239 | if (!al.map || !al.map->dso) { |
240 | if (cpumode == PERF_RECORD_MISC_HYPERVISOR) { | ||
241 | pr_debug("Hypervisor address can not be resolved - skipping\n"); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
240 | pr_debug("thread__find_addr_map failed\n"); | 245 | pr_debug("thread__find_addr_map failed\n"); |
241 | return -1; | 246 | return -1; |
242 | } | 247 | } |
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 6d028f42b3cf..c3858487159d 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c | |||
@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1, | |||
141 | } | 141 | } |
142 | } | 142 | } |
143 | 143 | ||
144 | if (type & PERF_SAMPLE_PHYS_ADDR) | ||
145 | COMP(phys_addr); | ||
146 | |||
144 | return true; | 147 | return true; |
145 | } | 148 | } |
146 | 149 | ||
@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) | |||
206 | .mask = sample_regs, | 209 | .mask = sample_regs, |
207 | .regs = regs, | 210 | .regs = regs, |
208 | }, | 211 | }, |
212 | .phys_addr = 113, | ||
209 | }; | 213 | }; |
210 | struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; | 214 | struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; |
211 | struct perf_sample sample_out; | 215 | struct perf_sample sample_out; |
@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u | |||
305 | * were added. Please actually update the test rather than just change | 309 | * were added. Please actually update the test rather than just change |
306 | * the condition below. | 310 | * the condition below. |
307 | */ | 311 | */ |
308 | if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) { | 312 | if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) { |
309 | pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); | 313 | pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); |
310 | return -1; | 314 | return -1; |
311 | } | 315 | } |
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ba0aee576a2b..786fecaf578e 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c | |||
@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser, | |||
829 | "q/ESC/CTRL+C Exit\n\n" | 829 | "q/ESC/CTRL+C Exit\n\n" |
830 | "ENTER Go to target\n" | 830 | "ENTER Go to target\n" |
831 | "ESC Exit\n" | 831 | "ESC Exit\n" |
832 | "H Cycle thru hottest instructions\n" | 832 | "H Go to hottest instruction\n" |
833 | "TAB/shift+TAB Cycle thru hottest instructions\n" | ||
833 | "j Toggle showing jump to target arrows\n" | 834 | "j Toggle showing jump to target arrows\n" |
834 | "J Toggle showing number of jump sources on targets\n" | 835 | "J Toggle showing number of jump sources on targets\n" |
835 | "n Search next string\n" | 836 | "n Search next string\n" |
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f4bc2462bc2c..13dfb0a0bdeb 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c | |||
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, | |||
931 | browser->show_dso); | 931 | browser->show_dso); |
932 | 932 | ||
933 | if (symbol_conf.show_branchflag_count) { | 933 | if (symbol_conf.show_branchflag_count) { |
934 | if (need_percent) | 934 | callchain_list_counts__printf_value(chain, NULL, |
935 | callchain_list_counts__printf_value(node, chain, NULL, | 935 | buf, sizeof(buf)); |
936 | buf, sizeof(buf)); | ||
937 | else | ||
938 | callchain_list_counts__printf_value(NULL, chain, NULL, | ||
939 | buf, sizeof(buf)); | ||
940 | 936 | ||
941 | if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) | 937 | if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) |
942 | str = "Not enough memory!"; | 938 | str = "Not enough memory!"; |
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5c95b8301c67..8bdb7a500181 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c | |||
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, | |||
124 | str = callchain_list__sym_name(chain, bf, sizeof(bf), false); | 124 | str = callchain_list__sym_name(chain, bf, sizeof(bf), false); |
125 | 125 | ||
126 | if (symbol_conf.show_branchflag_count) { | 126 | if (symbol_conf.show_branchflag_count) { |
127 | if (!period) | 127 | callchain_list_counts__printf_value(chain, NULL, |
128 | callchain_list_counts__printf_value(node, chain, NULL, | 128 | buf, sizeof(buf)); |
129 | buf, sizeof(buf)); | ||
130 | else | ||
131 | callchain_list_counts__printf_value(NULL, chain, NULL, | ||
132 | buf, sizeof(buf)); | ||
133 | 129 | ||
134 | if (asprintf(&alloc_str, "%s%s", str, buf) < 0) | 130 | if (asprintf(&alloc_str, "%s%s", str, buf) < 0) |
135 | str = "Not enough memory!"; | 131 | str = "Not enough memory!"; |
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, | |||
313 | 309 | ||
314 | if (symbol_conf.show_branchflag_count) | 310 | if (symbol_conf.show_branchflag_count) |
315 | ret += callchain_list_counts__printf_value( | 311 | ret += callchain_list_counts__printf_value( |
316 | NULL, chain, fp, NULL, 0); | 312 | chain, fp, NULL, 0); |
317 | ret += fprintf(fp, "\n"); | 313 | ret += fprintf(fp, "\n"); |
318 | 314 | ||
319 | if (++entries_printed == callchain_param.print_limit) | 315 | if (++entries_printed == callchain_param.print_limit) |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f320b0777e0d..510b513e0f01 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) | |||
588 | call->cycles_count = | 588 | call->cycles_count = |
589 | cursor_node->branch_flags.cycles; | 589 | cursor_node->branch_flags.cycles; |
590 | call->iter_count = cursor_node->nr_loop_iter; | 590 | call->iter_count = cursor_node->nr_loop_iter; |
591 | call->samples_count = cursor_node->samples; | 591 | call->iter_cycles = cursor_node->iter_cycles; |
592 | } | 592 | } |
593 | } | 593 | } |
594 | 594 | ||
@@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node, | |||
722 | cnode->cycles_count += | 722 | cnode->cycles_count += |
723 | node->branch_flags.cycles; | 723 | node->branch_flags.cycles; |
724 | cnode->iter_count += node->nr_loop_iter; | 724 | cnode->iter_count += node->nr_loop_iter; |
725 | cnode->samples_count += node->samples; | 725 | cnode->iter_cycles += node->iter_cycles; |
726 | } | 726 | } |
727 | } | 727 | } |
728 | 728 | ||
@@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor, | |||
998 | int callchain_cursor_append(struct callchain_cursor *cursor, | 998 | int callchain_cursor_append(struct callchain_cursor *cursor, |
999 | u64 ip, struct map *map, struct symbol *sym, | 999 | u64 ip, struct map *map, struct symbol *sym, |
1000 | bool branch, struct branch_flags *flags, | 1000 | bool branch, struct branch_flags *flags, |
1001 | int nr_loop_iter, int samples, u64 branch_from) | 1001 | int nr_loop_iter, u64 iter_cycles, u64 branch_from) |
1002 | { | 1002 | { |
1003 | struct callchain_cursor_node *node = *cursor->last; | 1003 | struct callchain_cursor_node *node = *cursor->last; |
1004 | 1004 | ||
@@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, | |||
1016 | node->sym = sym; | 1016 | node->sym = sym; |
1017 | node->branch = branch; | 1017 | node->branch = branch; |
1018 | node->nr_loop_iter = nr_loop_iter; | 1018 | node->nr_loop_iter = nr_loop_iter; |
1019 | node->samples = samples; | 1019 | node->iter_cycles = iter_cycles; |
1020 | 1020 | ||
1021 | if (flags) | 1021 | if (flags) |
1022 | memcpy(&node->branch_flags, flags, | 1022 | memcpy(&node->branch_flags, flags, |
@@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize, | |||
1306 | static int branch_from_str(char *bf, int bfsize, | 1306 | static int branch_from_str(char *bf, int bfsize, |
1307 | u64 branch_count, | 1307 | u64 branch_count, |
1308 | u64 cycles_count, u64 iter_count, | 1308 | u64 cycles_count, u64 iter_count, |
1309 | u64 samples_count) | 1309 | u64 iter_cycles) |
1310 | { | 1310 | { |
1311 | int printed = 0, i = 0; | 1311 | int printed = 0, i = 0; |
1312 | u64 cycles; | 1312 | u64 cycles; |
@@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize, | |||
1318 | bf + printed, bfsize - printed); | 1318 | bf + printed, bfsize - printed); |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | if (iter_count && samples_count) { | 1321 | if (iter_count) { |
1322 | printed += count_pri64_printf(i++, "iterations", | 1322 | printed += count_pri64_printf(i++, "iter", |
1323 | iter_count / samples_count, | 1323 | iter_count, |
1324 | bf + printed, bfsize - printed); | ||
1325 | |||
1326 | printed += count_pri64_printf(i++, "avg_cycles", | ||
1327 | iter_cycles / iter_count, | ||
1324 | bf + printed, bfsize - printed); | 1328 | bf + printed, bfsize - printed); |
1325 | } | 1329 | } |
1326 | 1330 | ||
@@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize, | |||
1333 | static int counts_str_build(char *bf, int bfsize, | 1337 | static int counts_str_build(char *bf, int bfsize, |
1334 | u64 branch_count, u64 predicted_count, | 1338 | u64 branch_count, u64 predicted_count, |
1335 | u64 abort_count, u64 cycles_count, | 1339 | u64 abort_count, u64 cycles_count, |
1336 | u64 iter_count, u64 samples_count, | 1340 | u64 iter_count, u64 iter_cycles, |
1337 | struct branch_type_stat *brtype_stat) | 1341 | struct branch_type_stat *brtype_stat) |
1338 | { | 1342 | { |
1339 | int printed; | 1343 | int printed; |
@@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize, | |||
1346 | predicted_count, abort_count, brtype_stat); | 1350 | predicted_count, abort_count, brtype_stat); |
1347 | } else { | 1351 | } else { |
1348 | printed = branch_from_str(bf, bfsize, branch_count, | 1352 | printed = branch_from_str(bf, bfsize, branch_count, |
1349 | cycles_count, iter_count, samples_count); | 1353 | cycles_count, iter_count, iter_cycles); |
1350 | } | 1354 | } |
1351 | 1355 | ||
1352 | if (!printed) | 1356 | if (!printed) |
@@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize, | |||
1358 | static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, | 1362 | static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, |
1359 | u64 branch_count, u64 predicted_count, | 1363 | u64 branch_count, u64 predicted_count, |
1360 | u64 abort_count, u64 cycles_count, | 1364 | u64 abort_count, u64 cycles_count, |
1361 | u64 iter_count, u64 samples_count, | 1365 | u64 iter_count, u64 iter_cycles, |
1362 | struct branch_type_stat *brtype_stat) | 1366 | struct branch_type_stat *brtype_stat) |
1363 | { | 1367 | { |
1364 | char str[256]; | 1368 | char str[256]; |
1365 | 1369 | ||
1366 | counts_str_build(str, sizeof(str), branch_count, | 1370 | counts_str_build(str, sizeof(str), branch_count, |
1367 | predicted_count, abort_count, cycles_count, | 1371 | predicted_count, abort_count, cycles_count, |
1368 | iter_count, samples_count, brtype_stat); | 1372 | iter_count, iter_cycles, brtype_stat); |
1369 | 1373 | ||
1370 | if (fp) | 1374 | if (fp) |
1371 | return fprintf(fp, "%s", str); | 1375 | return fprintf(fp, "%s", str); |
@@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, | |||
1373 | return scnprintf(bf, bfsize, "%s", str); | 1377 | return scnprintf(bf, bfsize, "%s", str); |
1374 | } | 1378 | } |
1375 | 1379 | ||
1376 | int callchain_list_counts__printf_value(struct callchain_node *node, | 1380 | int callchain_list_counts__printf_value(struct callchain_list *clist, |
1377 | struct callchain_list *clist, | ||
1378 | FILE *fp, char *bf, int bfsize) | 1381 | FILE *fp, char *bf, int bfsize) |
1379 | { | 1382 | { |
1380 | u64 branch_count, predicted_count; | 1383 | u64 branch_count, predicted_count; |
1381 | u64 abort_count, cycles_count; | 1384 | u64 abort_count, cycles_count; |
1382 | u64 iter_count = 0, samples_count = 0; | 1385 | u64 iter_count, iter_cycles; |
1383 | 1386 | ||
1384 | branch_count = clist->branch_count; | 1387 | branch_count = clist->branch_count; |
1385 | predicted_count = clist->predicted_count; | 1388 | predicted_count = clist->predicted_count; |
1386 | abort_count = clist->abort_count; | 1389 | abort_count = clist->abort_count; |
1387 | cycles_count = clist->cycles_count; | 1390 | cycles_count = clist->cycles_count; |
1388 | 1391 | iter_count = clist->iter_count; | |
1389 | if (node) { | 1392 | iter_cycles = clist->iter_cycles; |
1390 | struct callchain_list *call; | ||
1391 | |||
1392 | list_for_each_entry(call, &node->val, list) { | ||
1393 | iter_count += call->iter_count; | ||
1394 | samples_count += call->samples_count; | ||
1395 | } | ||
1396 | } | ||
1397 | 1393 | ||
1398 | return callchain_counts_printf(fp, bf, bfsize, branch_count, | 1394 | return callchain_counts_printf(fp, bf, bfsize, branch_count, |
1399 | predicted_count, abort_count, | 1395 | predicted_count, abort_count, |
1400 | cycles_count, iter_count, samples_count, | 1396 | cycles_count, iter_count, iter_cycles, |
1401 | &clist->brtype_stat); | 1397 | &clist->brtype_stat); |
1402 | } | 1398 | } |
1403 | 1399 | ||
@@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst, | |||
1523 | 1519 | ||
1524 | rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, | 1520 | rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, |
1525 | node->branch, &node->branch_flags, | 1521 | node->branch, &node->branch_flags, |
1526 | node->nr_loop_iter, node->samples, | 1522 | node->nr_loop_iter, |
1523 | node->iter_cycles, | ||
1527 | node->branch_from); | 1524 | node->branch_from); |
1528 | if (rc) | 1525 | if (rc) |
1529 | break; | 1526 | break; |
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 97738201464a..1ed6fc61d0a5 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -119,7 +119,7 @@ struct callchain_list { | |||
119 | u64 abort_count; | 119 | u64 abort_count; |
120 | u64 cycles_count; | 120 | u64 cycles_count; |
121 | u64 iter_count; | 121 | u64 iter_count; |
122 | u64 samples_count; | 122 | u64 iter_cycles; |
123 | struct branch_type_stat brtype_stat; | 123 | struct branch_type_stat brtype_stat; |
124 | char *srcline; | 124 | char *srcline; |
125 | struct list_head list; | 125 | struct list_head list; |
@@ -139,7 +139,7 @@ struct callchain_cursor_node { | |||
139 | struct branch_flags branch_flags; | 139 | struct branch_flags branch_flags; |
140 | u64 branch_from; | 140 | u64 branch_from; |
141 | int nr_loop_iter; | 141 | int nr_loop_iter; |
142 | int samples; | 142 | u64 iter_cycles; |
143 | struct callchain_cursor_node *next; | 143 | struct callchain_cursor_node *next; |
144 | }; | 144 | }; |
145 | 145 | ||
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) | |||
201 | int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, | 201 | int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, |
202 | struct map *map, struct symbol *sym, | 202 | struct map *map, struct symbol *sym, |
203 | bool branch, struct branch_flags *flags, | 203 | bool branch, struct branch_flags *flags, |
204 | int nr_loop_iter, int samples, u64 branch_from); | 204 | int nr_loop_iter, u64 iter_cycles, u64 branch_from); |
205 | 205 | ||
206 | /* Close a cursor writing session. Initialize for the reader */ | 206 | /* Close a cursor writing session. Initialize for the reader */ |
207 | static inline void callchain_cursor_commit(struct callchain_cursor *cursor) | 207 | static inline void callchain_cursor_commit(struct callchain_cursor *cursor) |
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, | |||
282 | int callchain_node__fprintf_value(struct callchain_node *node, | 282 | int callchain_node__fprintf_value(struct callchain_node *node, |
283 | FILE *fp, u64 total); | 283 | FILE *fp, u64 total); |
284 | 284 | ||
285 | int callchain_list_counts__printf_value(struct callchain_node *node, | 285 | int callchain_list_counts__printf_value(struct callchain_list *clist, |
286 | struct callchain_list *clist, | ||
287 | FILE *fp, char *bf, int bfsize); | 286 | FILE *fp, char *bf, int bfsize); |
288 | 287 | ||
289 | void free_callchain(struct callchain_root *root); | 288 | void free_callchain(struct callchain_root *root); |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 423ac82605f3..ee7bcc898d35 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -200,6 +200,7 @@ struct perf_sample { | |||
200 | u32 cpu; | 200 | u32 cpu; |
201 | u32 raw_size; | 201 | u32 raw_size; |
202 | u64 data_src; | 202 | u64 data_src; |
203 | u64 phys_addr; | ||
203 | u32 flags; | 204 | u32 flags; |
204 | u16 insn_len; | 205 | u16 insn_len; |
205 | u8 cpumode; | 206 | u8 cpumode; |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d9bd632ed7db..4bb89373eb52 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, | |||
955 | if (opts->sample_address) | 955 | if (opts->sample_address) |
956 | perf_evsel__set_sample_bit(evsel, DATA_SRC); | 956 | perf_evsel__set_sample_bit(evsel, DATA_SRC); |
957 | 957 | ||
958 | if (opts->sample_phys_addr) | ||
959 | perf_evsel__set_sample_bit(evsel, PHYS_ADDR); | ||
960 | |||
958 | if (opts->no_buffering) { | 961 | if (opts->no_buffering) { |
959 | attr->watermark = 0; | 962 | attr->watermark = 0; |
960 | attr->wakeup_events = 1; | 963 | attr->wakeup_events = 1; |
@@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) | |||
1464 | bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), | 1467 | bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), |
1465 | bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), | 1468 | bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), |
1466 | bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), | 1469 | bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), |
1467 | bit_name(WEIGHT), | 1470 | bit_name(WEIGHT), bit_name(PHYS_ADDR), |
1468 | { .name = NULL, } | 1471 | { .name = NULL, } |
1469 | }; | 1472 | }; |
1470 | #undef bit_name | 1473 | #undef bit_name |
@@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, | |||
2206 | } | 2209 | } |
2207 | } | 2210 | } |
2208 | 2211 | ||
2212 | data->phys_addr = 0; | ||
2213 | if (type & PERF_SAMPLE_PHYS_ADDR) { | ||
2214 | data->phys_addr = *array; | ||
2215 | array++; | ||
2216 | } | ||
2217 | |||
2209 | return 0; | 2218 | return 0; |
2210 | } | 2219 | } |
2211 | 2220 | ||
@@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, | |||
2311 | } | 2320 | } |
2312 | } | 2321 | } |
2313 | 2322 | ||
2323 | if (type & PERF_SAMPLE_PHYS_ADDR) | ||
2324 | result += sizeof(u64); | ||
2325 | |||
2314 | return result; | 2326 | return result; |
2315 | } | 2327 | } |
2316 | 2328 | ||
@@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, | |||
2500 | } | 2512 | } |
2501 | } | 2513 | } |
2502 | 2514 | ||
2515 | if (type & PERF_SAMPLE_PHYS_ADDR) { | ||
2516 | *array = sample->phys_addr; | ||
2517 | array++; | ||
2518 | } | ||
2519 | |||
2503 | return 0; | 2520 | return 0; |
2504 | } | 2521 | } |
2505 | 2522 | ||
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 351d3b2d8887..dd2c4b5112a5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -131,6 +131,7 @@ struct perf_evsel { | |||
131 | bool cmdline_group_boundary; | 131 | bool cmdline_group_boundary; |
132 | struct list_head config_terms; | 132 | struct list_head config_terms; |
133 | int bpf_fd; | 133 | int bpf_fd; |
134 | bool auto_merge_stats; | ||
134 | bool merged_stat; | 135 | bool merged_stat; |
135 | const char * metric_expr; | 136 | const char * metric_expr; |
136 | const char * metric_name; | 137 | const char * metric_name; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9453b2e27015..e60d8d8ea4c2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
167 | symlen = unresolved_col_width + 4 + 2; | 167 | symlen = unresolved_col_width + 4 + 2; |
168 | hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); | 168 | hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); |
169 | } | 169 | } |
170 | |||
171 | hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR, | ||
172 | unresolved_col_width + 4 + 2); | ||
173 | |||
170 | } else { | 174 | } else { |
171 | symlen = unresolved_col_width + 4 + 2; | 175 | symlen = unresolved_col_width + 4 + 2; |
172 | hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); | 176 | hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ee3670a388df..e60dda26a920 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -47,6 +47,7 @@ enum hist_column { | |||
47 | HISTC_GLOBAL_WEIGHT, | 47 | HISTC_GLOBAL_WEIGHT, |
48 | HISTC_MEM_DADDR_SYMBOL, | 48 | HISTC_MEM_DADDR_SYMBOL, |
49 | HISTC_MEM_DADDR_DSO, | 49 | HISTC_MEM_DADDR_DSO, |
50 | HISTC_MEM_PHYS_DADDR, | ||
50 | HISTC_MEM_LOCKED, | 51 | HISTC_MEM_LOCKED, |
51 | HISTC_MEM_TLB, | 52 | HISTC_MEM_TLB, |
52 | HISTC_MEM_LVL, | 53 | HISTC_MEM_LVL, |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5c8eacaca4f4..df709363ef69 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread, | |||
1635 | ams->al_addr = al.addr; | 1635 | ams->al_addr = al.addr; |
1636 | ams->sym = al.sym; | 1636 | ams->sym = al.sym; |
1637 | ams->map = al.map; | 1637 | ams->map = al.map; |
1638 | ams->phys_addr = 0; | ||
1638 | } | 1639 | } |
1639 | 1640 | ||
1640 | static void ip__resolve_data(struct thread *thread, | 1641 | static void ip__resolve_data(struct thread *thread, |
1641 | u8 m, struct addr_map_symbol *ams, u64 addr) | 1642 | u8 m, struct addr_map_symbol *ams, |
1643 | u64 addr, u64 phys_addr) | ||
1642 | { | 1644 | { |
1643 | struct addr_location al; | 1645 | struct addr_location al; |
1644 | 1646 | ||
@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread, | |||
1658 | ams->al_addr = al.addr; | 1660 | ams->al_addr = al.addr; |
1659 | ams->sym = al.sym; | 1661 | ams->sym = al.sym; |
1660 | ams->map = al.map; | 1662 | ams->map = al.map; |
1663 | ams->phys_addr = phys_addr; | ||
1661 | } | 1664 | } |
1662 | 1665 | ||
1663 | struct mem_info *sample__resolve_mem(struct perf_sample *sample, | 1666 | struct mem_info *sample__resolve_mem(struct perf_sample *sample, |
@@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, | |||
1669 | return NULL; | 1672 | return NULL; |
1670 | 1673 | ||
1671 | ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); | 1674 | ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); |
1672 | ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr); | 1675 | ip__resolve_data(al->thread, al->cpumode, &mi->daddr, |
1676 | sample->addr, sample->phys_addr); | ||
1673 | mi->data_src.val = sample->data_src; | 1677 | mi->data_src.val = sample->data_src; |
1674 | 1678 | ||
1675 | return mi; | 1679 | return mi; |
1676 | } | 1680 | } |
1677 | 1681 | ||
1682 | struct iterations { | ||
1683 | int nr_loop_iter; | ||
1684 | u64 cycles; | ||
1685 | }; | ||
1686 | |||
1678 | static int add_callchain_ip(struct thread *thread, | 1687 | static int add_callchain_ip(struct thread *thread, |
1679 | struct callchain_cursor *cursor, | 1688 | struct callchain_cursor *cursor, |
1680 | struct symbol **parent, | 1689 | struct symbol **parent, |
@@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread, | |||
1683 | u64 ip, | 1692 | u64 ip, |
1684 | bool branch, | 1693 | bool branch, |
1685 | struct branch_flags *flags, | 1694 | struct branch_flags *flags, |
1686 | int nr_loop_iter, | 1695 | struct iterations *iter, |
1687 | int samples, | ||
1688 | u64 branch_from) | 1696 | u64 branch_from) |
1689 | { | 1697 | { |
1690 | struct addr_location al; | 1698 | struct addr_location al; |
1699 | int nr_loop_iter = 0; | ||
1700 | u64 iter_cycles = 0; | ||
1691 | 1701 | ||
1692 | al.filtered = 0; | 1702 | al.filtered = 0; |
1693 | al.sym = NULL; | 1703 | al.sym = NULL; |
@@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread, | |||
1737 | 1747 | ||
1738 | if (symbol_conf.hide_unresolved && al.sym == NULL) | 1748 | if (symbol_conf.hide_unresolved && al.sym == NULL) |
1739 | return 0; | 1749 | return 0; |
1750 | |||
1751 | if (iter) { | ||
1752 | nr_loop_iter = iter->nr_loop_iter; | ||
1753 | iter_cycles = iter->cycles; | ||
1754 | } | ||
1755 | |||
1740 | return callchain_cursor_append(cursor, al.addr, al.map, al.sym, | 1756 | return callchain_cursor_append(cursor, al.addr, al.map, al.sym, |
1741 | branch, flags, nr_loop_iter, samples, | 1757 | branch, flags, nr_loop_iter, |
1742 | branch_from); | 1758 | iter_cycles, branch_from); |
1743 | } | 1759 | } |
1744 | 1760 | ||
1745 | struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | 1761 | struct branch_info *sample__resolve_bstack(struct perf_sample *sample, |
@@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
1760 | return bi; | 1776 | return bi; |
1761 | } | 1777 | } |
1762 | 1778 | ||
1779 | static void save_iterations(struct iterations *iter, | ||
1780 | struct branch_entry *be, int nr) | ||
1781 | { | ||
1782 | int i; | ||
1783 | |||
1784 | iter->nr_loop_iter = nr; | ||
1785 | iter->cycles = 0; | ||
1786 | |||
1787 | for (i = 0; i < nr; i++) | ||
1788 | iter->cycles += be[i].flags.cycles; | ||
1789 | } | ||
1790 | |||
1763 | #define CHASHSZ 127 | 1791 | #define CHASHSZ 127 |
1764 | #define CHASHBITS 7 | 1792 | #define CHASHBITS 7 |
1765 | #define NO_ENTRY 0xff | 1793 | #define NO_ENTRY 0xff |
@@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
1767 | #define PERF_MAX_BRANCH_DEPTH 127 | 1795 | #define PERF_MAX_BRANCH_DEPTH 127 |
1768 | 1796 | ||
1769 | /* Remove loops. */ | 1797 | /* Remove loops. */ |
1770 | static int remove_loops(struct branch_entry *l, int nr) | 1798 | static int remove_loops(struct branch_entry *l, int nr, |
1799 | struct iterations *iter) | ||
1771 | { | 1800 | { |
1772 | int i, j, off; | 1801 | int i, j, off; |
1773 | unsigned char chash[CHASHSZ]; | 1802 | unsigned char chash[CHASHSZ]; |
@@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr) | |||
1792 | break; | 1821 | break; |
1793 | } | 1822 | } |
1794 | if (is_loop) { | 1823 | if (is_loop) { |
1795 | memmove(l + i, l + i + off, | 1824 | j = nr - (i + off); |
1796 | (nr - (i + off)) * sizeof(*l)); | 1825 | if (j > 0) { |
1826 | save_iterations(iter + i + off, | ||
1827 | l + i, off); | ||
1828 | |||
1829 | memmove(iter + i, iter + i + off, | ||
1830 | j * sizeof(*iter)); | ||
1831 | |||
1832 | memmove(l + i, l + i + off, | ||
1833 | j * sizeof(*l)); | ||
1834 | } | ||
1835 | |||
1797 | nr -= off; | 1836 | nr -= off; |
1798 | } | 1837 | } |
1799 | } | 1838 | } |
@@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, | |||
1883 | 1922 | ||
1884 | err = add_callchain_ip(thread, cursor, parent, | 1923 | err = add_callchain_ip(thread, cursor, parent, |
1885 | root_al, &cpumode, ip, | 1924 | root_al, &cpumode, ip, |
1886 | branch, flags, 0, 0, | 1925 | branch, flags, NULL, |
1887 | branch_from); | 1926 | branch_from); |
1888 | if (err) | 1927 | if (err) |
1889 | return (err < 0) ? err : 0; | 1928 | return (err < 0) ? err : 0; |
@@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
1909 | int i, j, err, nr_entries; | 1948 | int i, j, err, nr_entries; |
1910 | int skip_idx = -1; | 1949 | int skip_idx = -1; |
1911 | int first_call = 0; | 1950 | int first_call = 0; |
1912 | int nr_loop_iter; | ||
1913 | 1951 | ||
1914 | if (chain) | 1952 | if (chain) |
1915 | chain_nr = chain->nr; | 1953 | chain_nr = chain->nr; |
@@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
1942 | if (branch && callchain_param.branch_callstack) { | 1980 | if (branch && callchain_param.branch_callstack) { |
1943 | int nr = min(max_stack, (int)branch->nr); | 1981 | int nr = min(max_stack, (int)branch->nr); |
1944 | struct branch_entry be[nr]; | 1982 | struct branch_entry be[nr]; |
1983 | struct iterations iter[nr]; | ||
1945 | 1984 | ||
1946 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { | 1985 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { |
1947 | pr_warning("corrupted branch chain. skipping...\n"); | 1986 | pr_warning("corrupted branch chain. skipping...\n"); |
@@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
1972 | be[i] = branch->entries[branch->nr - i - 1]; | 2011 | be[i] = branch->entries[branch->nr - i - 1]; |
1973 | } | 2012 | } |
1974 | 2013 | ||
1975 | nr_loop_iter = nr; | 2014 | memset(iter, 0, sizeof(struct iterations) * nr); |
1976 | nr = remove_loops(be, nr); | 2015 | nr = remove_loops(be, nr, iter); |
1977 | |||
1978 | /* | ||
1979 | * Get the number of iterations. | ||
1980 | * It's only approximation, but good enough in practice. | ||
1981 | */ | ||
1982 | if (nr_loop_iter > nr) | ||
1983 | nr_loop_iter = nr_loop_iter - nr + 1; | ||
1984 | else | ||
1985 | nr_loop_iter = 0; | ||
1986 | 2016 | ||
1987 | for (i = 0; i < nr; i++) { | 2017 | for (i = 0; i < nr; i++) { |
1988 | if (i == nr - 1) | 2018 | err = add_callchain_ip(thread, cursor, parent, |
1989 | err = add_callchain_ip(thread, cursor, parent, | 2019 | root_al, |
1990 | root_al, | 2020 | NULL, be[i].to, |
1991 | NULL, be[i].to, | 2021 | true, &be[i].flags, |
1992 | true, &be[i].flags, | 2022 | NULL, be[i].from); |
1993 | nr_loop_iter, 1, | ||
1994 | be[i].from); | ||
1995 | else | ||
1996 | err = add_callchain_ip(thread, cursor, parent, | ||
1997 | root_al, | ||
1998 | NULL, be[i].to, | ||
1999 | true, &be[i].flags, | ||
2000 | 0, 0, be[i].from); | ||
2001 | 2023 | ||
2002 | if (!err) | 2024 | if (!err) |
2003 | err = add_callchain_ip(thread, cursor, parent, root_al, | 2025 | err = add_callchain_ip(thread, cursor, parent, root_al, |
2004 | NULL, be[i].from, | 2026 | NULL, be[i].from, |
2005 | true, &be[i].flags, | 2027 | true, &be[i].flags, |
2006 | 0, 0, 0); | 2028 | &iter[i], 0); |
2007 | if (err == -EINVAL) | 2029 | if (err == -EINVAL) |
2008 | break; | 2030 | break; |
2009 | if (err) | 2031 | if (err) |
@@ -2037,7 +2059,7 @@ check_calls: | |||
2037 | 2059 | ||
2038 | err = add_callchain_ip(thread, cursor, parent, | 2060 | err = add_callchain_ip(thread, cursor, parent, |
2039 | root_al, &cpumode, ip, | 2061 | root_al, &cpumode, ip, |
2040 | false, NULL, 0, 0, 0); | 2062 | false, NULL, NULL, 0); |
2041 | 2063 | ||
2042 | if (err) | 2064 | if (err) |
2043 | return (err < 0) ? err : 0; | 2065 | return (err < 0) ? err : 0; |
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f44aeba51d1f..f6257fb4f08c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -310,7 +310,7 @@ static struct perf_evsel * | |||
310 | __add_event(struct list_head *list, int *idx, | 310 | __add_event(struct list_head *list, int *idx, |
311 | struct perf_event_attr *attr, | 311 | struct perf_event_attr *attr, |
312 | char *name, struct cpu_map *cpus, | 312 | char *name, struct cpu_map *cpus, |
313 | struct list_head *config_terms) | 313 | struct list_head *config_terms, bool auto_merge_stats) |
314 | { | 314 | { |
315 | struct perf_evsel *evsel; | 315 | struct perf_evsel *evsel; |
316 | 316 | ||
@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx, | |||
324 | evsel->cpus = cpu_map__get(cpus); | 324 | evsel->cpus = cpu_map__get(cpus); |
325 | evsel->own_cpus = cpu_map__get(cpus); | 325 | evsel->own_cpus = cpu_map__get(cpus); |
326 | evsel->system_wide = !!cpus; | 326 | evsel->system_wide = !!cpus; |
327 | evsel->auto_merge_stats = auto_merge_stats; | ||
327 | 328 | ||
328 | if (name) | 329 | if (name) |
329 | evsel->name = strdup(name); | 330 | evsel->name = strdup(name); |
@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx, | |||
339 | struct perf_event_attr *attr, char *name, | 340 | struct perf_event_attr *attr, char *name, |
340 | struct list_head *config_terms) | 341 | struct list_head *config_terms) |
341 | { | 342 | { |
342 | return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; | 343 | return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM; |
343 | } | 344 | } |
344 | 345 | ||
345 | static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) | 346 | static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) |
@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, | |||
1209 | get_config_name(head_config), &config_terms); | 1210 | get_config_name(head_config), &config_terms); |
1210 | } | 1211 | } |
1211 | 1212 | ||
1212 | int parse_events_add_pmu(struct parse_events_state *parse_state, | 1213 | static int __parse_events_add_pmu(struct parse_events_state *parse_state, |
1213 | struct list_head *list, char *name, | 1214 | struct list_head *list, char *name, |
1214 | struct list_head *head_config) | 1215 | struct list_head *head_config, bool auto_merge_stats) |
1215 | { | 1216 | { |
1216 | struct perf_event_attr attr; | 1217 | struct perf_event_attr attr; |
1217 | struct perf_pmu_info info; | 1218 | struct perf_pmu_info info; |
@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, | |||
1232 | 1233 | ||
1233 | if (!head_config) { | 1234 | if (!head_config) { |
1234 | attr.type = pmu->type; | 1235 | attr.type = pmu->type; |
1235 | evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL); | 1236 | evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats); |
1236 | return evsel ? 0 : -ENOMEM; | 1237 | return evsel ? 0 : -ENOMEM; |
1237 | } | 1238 | } |
1238 | 1239 | ||
@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, | |||
1254 | 1255 | ||
1255 | evsel = __add_event(list, &parse_state->idx, &attr, | 1256 | evsel = __add_event(list, &parse_state->idx, &attr, |
1256 | get_config_name(head_config), pmu->cpus, | 1257 | get_config_name(head_config), pmu->cpus, |
1257 | &config_terms); | 1258 | &config_terms, auto_merge_stats); |
1258 | if (evsel) { | 1259 | if (evsel) { |
1259 | evsel->unit = info.unit; | 1260 | evsel->unit = info.unit; |
1260 | evsel->scale = info.scale; | 1261 | evsel->scale = info.scale; |
@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, | |||
1267 | return evsel ? 0 : -ENOMEM; | 1268 | return evsel ? 0 : -ENOMEM; |
1268 | } | 1269 | } |
1269 | 1270 | ||
1271 | int parse_events_add_pmu(struct parse_events_state *parse_state, | ||
1272 | struct list_head *list, char *name, | ||
1273 | struct list_head *head_config) | ||
1274 | { | ||
1275 | return __parse_events_add_pmu(parse_state, list, name, head_config, false); | ||
1276 | } | ||
1277 | |||
1270 | int parse_events_multi_pmu_add(struct parse_events_state *parse_state, | 1278 | int parse_events_multi_pmu_add(struct parse_events_state *parse_state, |
1271 | char *str, struct list_head **listp) | 1279 | char *str, struct list_head **listp) |
1272 | { | 1280 | { |
@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, | |||
1296 | return -1; | 1304 | return -1; |
1297 | list_add_tail(&term->list, head); | 1305 | list_add_tail(&term->list, head); |
1298 | 1306 | ||
1299 | if (!parse_events_add_pmu(parse_state, list, | 1307 | if (!__parse_events_add_pmu(parse_state, list, |
1300 | pmu->name, head)) { | 1308 | pmu->name, head, true)) { |
1301 | pr_debug("%s -> %s/%s/\n", str, | 1309 | pr_debug("%s -> %s/%s/\n", str, |
1302 | pmu->name, alias->str); | 1310 | pmu->name, alias->str); |
1303 | ok++; | 1311 | ok++; |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ac863691605f..a7ebd9fe8e40 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, | |||
1120 | if (sample_type & PERF_SAMPLE_DATA_SRC) | 1120 | if (sample_type & PERF_SAMPLE_DATA_SRC) |
1121 | printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); | 1121 | printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); |
1122 | 1122 | ||
1123 | if (sample_type & PERF_SAMPLE_PHYS_ADDR) | ||
1124 | printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr); | ||
1125 | |||
1123 | if (sample_type & PERF_SAMPLE_TRANSACTION) | 1126 | if (sample_type & PERF_SAMPLE_TRANSACTION) |
1124 | printf("... transaction: %" PRIx64 "\n", sample->transaction); | 1127 | printf("... transaction: %" PRIx64 "\n", sample->transaction); |
1125 | 1128 | ||
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 12359bd986db..eb3ab902a1c0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -1316,6 +1316,47 @@ struct sort_entry sort_mem_dcacheline = { | |||
1316 | }; | 1316 | }; |
1317 | 1317 | ||
1318 | static int64_t | 1318 | static int64_t |
1319 | sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right) | ||
1320 | { | ||
1321 | uint64_t l = 0, r = 0; | ||
1322 | |||
1323 | if (left->mem_info) | ||
1324 | l = left->mem_info->daddr.phys_addr; | ||
1325 | if (right->mem_info) | ||
1326 | r = right->mem_info->daddr.phys_addr; | ||
1327 | |||
1328 | return (int64_t)(r - l); | ||
1329 | } | ||
1330 | |||
1331 | static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf, | ||
1332 | size_t size, unsigned int width) | ||
1333 | { | ||
1334 | uint64_t addr = 0; | ||
1335 | size_t ret = 0; | ||
1336 | size_t len = BITS_PER_LONG / 4; | ||
1337 | |||
1338 | addr = he->mem_info->daddr.phys_addr; | ||
1339 | |||
1340 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level); | ||
1341 | |||
1342 | ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr); | ||
1343 | |||
1344 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, ""); | ||
1345 | |||
1346 | if (ret > width) | ||
1347 | bf[width] = '\0'; | ||
1348 | |||
1349 | return width; | ||
1350 | } | ||
1351 | |||
1352 | struct sort_entry sort_mem_phys_daddr = { | ||
1353 | .se_header = "Data Physical Address", | ||
1354 | .se_cmp = sort__phys_daddr_cmp, | ||
1355 | .se_snprintf = hist_entry__phys_daddr_snprintf, | ||
1356 | .se_width_idx = HISTC_MEM_PHYS_DADDR, | ||
1357 | }; | ||
1358 | |||
1359 | static int64_t | ||
1319 | sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) | 1360 | sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) |
1320 | { | 1361 | { |
1321 | if (!left->branch_info || !right->branch_info) | 1362 | if (!left->branch_info || !right->branch_info) |
@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = { | |||
1547 | DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), | 1588 | DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), |
1548 | DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), | 1589 | DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), |
1549 | DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), | 1590 | DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), |
1591 | DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), | ||
1550 | }; | 1592 | }; |
1551 | 1593 | ||
1552 | #undef DIM | 1594 | #undef DIM |
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b7c75597e18f..f36dc4980a6c 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -245,6 +245,7 @@ enum sort_type { | |||
245 | SORT_MEM_SNOOP, | 245 | SORT_MEM_SNOOP, |
246 | SORT_MEM_DCACHELINE, | 246 | SORT_MEM_DCACHELINE, |
247 | SORT_MEM_IADDR_SYMBOL, | 247 | SORT_MEM_IADDR_SYMBOL, |
248 | SORT_MEM_PHYS_DADDR, | ||
248 | }; | 249 | }; |
249 | 250 | ||
250 | /* | 251 | /* |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d00a012cfdfb..2bd6a1f01a1c 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -186,6 +186,7 @@ struct addr_map_symbol { | |||
186 | struct symbol *sym; | 186 | struct symbol *sym; |
187 | u64 addr; | 187 | u64 addr; |
188 | u64 al_addr; | 188 | u64 al_addr; |
189 | u64 phys_addr; | ||
189 | }; | 190 | }; |
190 | 191 | ||
191 | struct branch_info { | 192 | struct branch_info { |
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index bbb4c1957578..19e5db90394c 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #ifdef HAVE_SYSCALL_TABLE | 19 | #ifdef HAVE_SYSCALL_TABLE |
20 | #include <linux/compiler.h> | 20 | #include <linux/compiler.h> |
21 | #include <string.h> | 21 | #include <string.h> |
22 | #include "string2.h" | ||
22 | #include "util.h" | 23 | #include "util.h" |
23 | 24 | ||
24 | #if defined(__x86_64__) | 25 | #if defined(__x86_64__) |
@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) | |||
105 | return sc ? sc->id : -1; | 106 | return sc ? sc->id : -1; |
106 | } | 107 | } |
107 | 108 | ||
109 | int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx) | ||
110 | { | ||
111 | int i; | ||
112 | struct syscall *syscalls = tbl->syscalls.entries; | ||
113 | |||
114 | for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) { | ||
115 | if (strglobmatch(syscalls[i].name, syscall_glob)) { | ||
116 | *idx = i; | ||
117 | return syscalls[i].id; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | return -1; | ||
122 | } | ||
123 | |||
124 | int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) | ||
125 | { | ||
126 | *idx = -1; | ||
127 | return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); | ||
128 | } | ||
129 | |||
108 | #else /* HAVE_SYSCALL_TABLE */ | 130 | #else /* HAVE_SYSCALL_TABLE */ |
109 | 131 | ||
110 | #include <libaudit.h> | 132 | #include <libaudit.h> |
@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) | |||
131 | { | 153 | { |
132 | return audit_name_to_syscall(name, tbl->audit_machine); | 154 | return audit_name_to_syscall(name, tbl->audit_machine); |
133 | } | 155 | } |
156 | |||
157 | int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, | ||
158 | const char *syscall_glob __maybe_unused, int *idx __maybe_unused) | ||
159 | { | ||
160 | return -1; | ||
161 | } | ||
162 | |||
163 | int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) | ||
164 | { | ||
165 | return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); | ||
166 | } | ||
134 | #endif /* HAVE_SYSCALL_TABLE */ | 167 | #endif /* HAVE_SYSCALL_TABLE */ |
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index e2951510484f..e9fb8786da7c 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h | |||
@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl); | |||
17 | const char *syscalltbl__name(const struct syscalltbl *tbl, int id); | 17 | const char *syscalltbl__name(const struct syscalltbl *tbl, int id); |
18 | int syscalltbl__id(struct syscalltbl *tbl, const char *name); | 18 | int syscalltbl__id(struct syscalltbl *tbl, const char *name); |
19 | 19 | ||
20 | int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx); | ||
21 | int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx); | ||
22 | |||
20 | #endif /* __PERF_SYSCALLTBL_H */ | 23 | #endif /* __PERF_SYSCALLTBL_H */ |