diff options
Diffstat (limited to 'tools/perf')
35 files changed, 702 insertions, 221 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c33b69f3374f..960da203ec11 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -109,6 +109,7 @@ OPTIONS | |||
109 | - mispredict: "N" for predicted branch, "Y" for mispredicted branch | 109 | - mispredict: "N" for predicted branch, "Y" for mispredicted branch |
110 | - in_tx: branch in TSX transaction | 110 | - in_tx: branch in TSX transaction |
111 | - abort: TSX transaction abort. | 111 | - abort: TSX transaction abort. |
112 | - cycles: Cycles in basic block | ||
112 | 113 | ||
113 | And default sort keys are changed to comm, dso_from, symbol_from, dso_to | 114 | And default sort keys are changed to comm, dso_from, symbol_from, dso_to |
114 | and symbol_to, see '--branch-stack'. | 115 | and symbol_to, see '--branch-stack'. |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 776aec4d0927..f6a23eb294e7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
@@ -208,6 +208,27 @@ Default is to monitor all CPUS. | |||
208 | This option sets the time out limit. The default value is 500 ms. | 208 | This option sets the time out limit. The default value is 500 ms. |
209 | 209 | ||
210 | 210 | ||
211 | -b:: | ||
212 | --branch-any:: | ||
213 | Enable taken branch stack sampling. Any type of taken branch may be sampled. | ||
214 | This is a shortcut for --branch-filter any. See --branch-filter for more infos. | ||
215 | |||
216 | -j:: | ||
217 | --branch-filter:: | ||
218 | Enable taken branch stack sampling. Each sample captures a series of consecutive | ||
219 | taken branches. The number of branches captured with each sample depends on the | ||
220 | underlying hardware, the type of branches of interest, and the executed code. | ||
221 | It is possible to select the types of branches captured by enabling filters. | ||
222 | For a full list of modifiers please see the perf record manpage. | ||
223 | |||
224 | The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. | ||
225 | The privilege levels may be omitted, in which case, the privilege levels of the associated | ||
226 | event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege | ||
227 | levels are subject to permissions. When sampling on multiple events, branch stack sampling | ||
228 | is enabled for all the sampling events. The sampled branch type is the same for all events. | ||
229 | The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k | ||
230 | Note that this feature may not be available on all processors. | ||
231 | |||
211 | INTERACTIVE PROMPTING KEYS | 232 | INTERACTIVE PROMPTING KEYS |
212 | -------------------------- | 233 | -------------------------- |
213 | 234 | ||
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/alpha/Build | |||
@@ -0,0 +1 @@ | |||
# empty | |||
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/mips/Build | |||
@@ -0,0 +1 @@ | |||
# empty | |||
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/parisc/Build | |||
@@ -0,0 +1 @@ | |||
# empty | |||
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/xtensa/Build | |||
@@ -0,0 +1 @@ | |||
libperf-y += util/ | |||
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile new file mode 100644 index 000000000000..7fbca175099e --- /dev/null +++ b/tools/perf/arch/xtensa/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | ifndef NO_DWARF | ||
2 | PERF_HAVE_DWARF_REGS := 1 | ||
3 | endif | ||
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/xtensa/util/Build | |||
@@ -0,0 +1 @@ | |||
libperf-$(CONFIG_DWARF) += dwarf-regs.o | |||
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c new file mode 100644 index 000000000000..4dba76bfb4ce --- /dev/null +++ b/tools/perf/arch/xtensa/util/dwarf-regs.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * Mapping of DWARF debug register numbers into register names. | ||
3 | * | ||
4 | * Copyright (c) 2015 Cadence Design Systems Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <stddef.h> | ||
13 | #include <dwarf-regs.h> | ||
14 | |||
15 | #define XTENSA_MAX_REGS 16 | ||
16 | |||
17 | const char *xtensa_regs_table[XTENSA_MAX_REGS] = { | ||
18 | "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", | ||
19 | "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", | ||
20 | }; | ||
21 | |||
22 | const char *get_arch_regstr(unsigned int n) | ||
23 | { | ||
24 | return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; | ||
25 | } | ||
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2c1bec39c30e..467a23b14e2f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -187,6 +187,7 @@ find_next: | |||
187 | * symbol, free he->ms.sym->src to signal we already | 187 | * symbol, free he->ms.sym->src to signal we already |
188 | * processed this symbol. | 188 | * processed this symbol. |
189 | */ | 189 | */ |
190 | zfree(¬es->src->cycles_hist); | ||
190 | zfree(¬es->src); | 191 | zfree(¬es->src); |
191 | } | 192 | } |
192 | } | 193 | } |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95a47719aec3..3a9d1b659fcd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -53,6 +53,7 @@ struct report { | |||
53 | bool mem_mode; | 53 | bool mem_mode; |
54 | bool header; | 54 | bool header; |
55 | bool header_only; | 55 | bool header_only; |
56 | bool nonany_branch_mode; | ||
56 | int max_stack; | 57 | int max_stack; |
57 | struct perf_read_values show_threads_values; | 58 | struct perf_read_values show_threads_values; |
58 | const char *pretty_printing_style; | 59 | const char *pretty_printing_style; |
@@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, | |||
102 | if (!ui__has_annotation()) | 103 | if (!ui__has_annotation()) |
103 | return 0; | 104 | return 0; |
104 | 105 | ||
106 | hist__account_cycles(iter->sample->branch_stack, al, iter->sample, | ||
107 | rep->nonany_branch_mode); | ||
108 | |||
105 | if (sort__mode == SORT_MODE__BRANCH) { | 109 | if (sort__mode == SORT_MODE__BRANCH) { |
106 | bi = he->branch_info; | 110 | bi = he->branch_info; |
107 | err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); | 111 | err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); |
@@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep) | |||
258 | else | 262 | else |
259 | callchain_param.record_mode = CALLCHAIN_FP; | 263 | callchain_param.record_mode = CALLCHAIN_FP; |
260 | } | 264 | } |
265 | |||
266 | /* ??? handle more cases than just ANY? */ | ||
267 | if (!(perf_evlist__combined_branch_type(session->evlist) & | ||
268 | PERF_SAMPLE_BRANCH_ANY)) | ||
269 | rep->nonany_branch_mode = true; | ||
270 | |||
261 | return 0; | 271 | return 0; |
262 | } | 272 | } |
263 | 273 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d99d850e1444..a054ddc0b2a0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -101,8 +101,6 @@ static struct target target = { | |||
101 | 101 | ||
102 | static int run_count = 1; | 102 | static int run_count = 1; |
103 | static bool no_inherit = false; | 103 | static bool no_inherit = false; |
104 | static bool scale = true; | ||
105 | static enum aggr_mode aggr_mode = AGGR_GLOBAL; | ||
106 | static volatile pid_t child_pid = -1; | 104 | static volatile pid_t child_pid = -1; |
107 | static bool null_run = false; | 105 | static bool null_run = false; |
108 | static int detailed_run = 0; | 106 | static int detailed_run = 0; |
@@ -112,11 +110,9 @@ static int big_num_opt = -1; | |||
112 | static const char *csv_sep = NULL; | 110 | static const char *csv_sep = NULL; |
113 | static bool csv_output = false; | 111 | static bool csv_output = false; |
114 | static bool group = false; | 112 | static bool group = false; |
115 | static FILE *output = NULL; | ||
116 | static const char *pre_cmd = NULL; | 113 | static const char *pre_cmd = NULL; |
117 | static const char *post_cmd = NULL; | 114 | static const char *post_cmd = NULL; |
118 | static bool sync_run = false; | 115 | static bool sync_run = false; |
119 | static unsigned int interval = 0; | ||
120 | static unsigned int initial_delay = 0; | 116 | static unsigned int initial_delay = 0; |
121 | static unsigned int unit_width = 4; /* strlen("unit") */ | 117 | static unsigned int unit_width = 4; /* strlen("unit") */ |
122 | static bool forever = false; | 118 | static bool forever = false; |
@@ -126,6 +122,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); | |||
126 | 122 | ||
127 | static volatile int done = 0; | 123 | static volatile int done = 0; |
128 | 124 | ||
125 | static struct perf_stat_config stat_config = { | ||
126 | .aggr_mode = AGGR_GLOBAL, | ||
127 | .scale = true, | ||
128 | }; | ||
129 | |||
129 | static inline void diff_timespec(struct timespec *r, struct timespec *a, | 130 | static inline void diff_timespec(struct timespec *r, struct timespec *a, |
130 | struct timespec *b) | 131 | struct timespec *b) |
131 | { | 132 | { |
@@ -148,7 +149,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) | |||
148 | { | 149 | { |
149 | struct perf_event_attr *attr = &evsel->attr; | 150 | struct perf_event_attr *attr = &evsel->attr; |
150 | 151 | ||
151 | if (scale) | 152 | if (stat_config.scale) |
152 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 153 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
153 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 154 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
154 | 155 | ||
@@ -178,142 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
178 | return 0; | 179 | return 0; |
179 | } | 180 | } |
180 | 181 | ||
181 | static void zero_per_pkg(struct perf_evsel *counter) | ||
182 | { | ||
183 | if (counter->per_pkg_mask) | ||
184 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | ||
185 | } | ||
186 | |||
187 | static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | ||
188 | { | ||
189 | unsigned long *mask = counter->per_pkg_mask; | ||
190 | struct cpu_map *cpus = perf_evsel__cpus(counter); | ||
191 | int s; | ||
192 | |||
193 | *skip = false; | ||
194 | |||
195 | if (!counter->per_pkg) | ||
196 | return 0; | ||
197 | |||
198 | if (cpu_map__empty(cpus)) | ||
199 | return 0; | ||
200 | |||
201 | if (!mask) { | ||
202 | mask = zalloc(MAX_NR_CPUS); | ||
203 | if (!mask) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | counter->per_pkg_mask = mask; | ||
207 | } | ||
208 | |||
209 | s = cpu_map__get_socket(cpus, cpu); | ||
210 | if (s < 0) | ||
211 | return -1; | ||
212 | |||
213 | *skip = test_and_set_bit(s, mask) == 1; | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static int | ||
218 | process_counter_values(struct perf_evsel *evsel, int cpu, int thread, | ||
219 | struct perf_counts_values *count) | ||
220 | { | ||
221 | struct perf_counts_values *aggr = &evsel->counts->aggr; | ||
222 | static struct perf_counts_values zero; | ||
223 | bool skip = false; | ||
224 | |||
225 | if (check_per_pkg(evsel, cpu, &skip)) { | ||
226 | pr_err("failed to read per-pkg counter\n"); | ||
227 | return -1; | ||
228 | } | ||
229 | |||
230 | if (skip) | ||
231 | count = &zero; | ||
232 | |||
233 | switch (aggr_mode) { | ||
234 | case AGGR_THREAD: | ||
235 | case AGGR_CORE: | ||
236 | case AGGR_SOCKET: | ||
237 | case AGGR_NONE: | ||
238 | if (!evsel->snapshot) | ||
239 | perf_evsel__compute_deltas(evsel, cpu, thread, count); | ||
240 | perf_counts_values__scale(count, scale, NULL); | ||
241 | if (aggr_mode == AGGR_NONE) | ||
242 | perf_stat__update_shadow_stats(evsel, count->values, cpu); | ||
243 | break; | ||
244 | case AGGR_GLOBAL: | ||
245 | aggr->val += count->val; | ||
246 | if (scale) { | ||
247 | aggr->ena += count->ena; | ||
248 | aggr->run += count->run; | ||
249 | } | ||
250 | default: | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | static int process_counter_maps(struct perf_evsel *counter) | ||
258 | { | ||
259 | int nthreads = thread_map__nr(counter->threads); | ||
260 | int ncpus = perf_evsel__nr_cpus(counter); | ||
261 | int cpu, thread; | ||
262 | |||
263 | if (counter->system_wide) | ||
264 | nthreads = 1; | ||
265 | |||
266 | for (thread = 0; thread < nthreads; thread++) { | ||
267 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
268 | if (process_counter_values(counter, cpu, thread, | ||
269 | perf_counts(counter->counts, cpu, thread))) | ||
270 | return -1; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int process_counter(struct perf_evsel *counter) | ||
278 | { | ||
279 | struct perf_counts_values *aggr = &counter->counts->aggr; | ||
280 | struct perf_stat *ps = counter->priv; | ||
281 | u64 *count = counter->counts->aggr.values; | ||
282 | int i, ret; | ||
283 | |||
284 | aggr->val = aggr->ena = aggr->run = 0; | ||
285 | init_stats(ps->res_stats); | ||
286 | |||
287 | if (counter->per_pkg) | ||
288 | zero_per_pkg(counter); | ||
289 | |||
290 | ret = process_counter_maps(counter); | ||
291 | if (ret) | ||
292 | return ret; | ||
293 | |||
294 | if (aggr_mode != AGGR_GLOBAL) | ||
295 | return 0; | ||
296 | |||
297 | if (!counter->snapshot) | ||
298 | perf_evsel__compute_deltas(counter, -1, -1, aggr); | ||
299 | perf_counts_values__scale(aggr, scale, &counter->counts->scaled); | ||
300 | |||
301 | for (i = 0; i < 3; i++) | ||
302 | update_stats(&ps->res_stats[i], count[i]); | ||
303 | |||
304 | if (verbose) { | ||
305 | fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", | ||
306 | perf_evsel__name(counter), count[0], count[1], count[2]); | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * Save the full runtime - to allow normalization during printout: | ||
311 | */ | ||
312 | perf_stat__update_shadow_stats(counter, count, 0); | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | /* | 182 | /* |
318 | * Read out the results of a single counter: | 183 | * Read out the results of a single counter: |
319 | * do not aggregate counts across CPUs in system-wide mode | 184 | * do not aggregate counts across CPUs in system-wide mode |
@@ -351,7 +216,7 @@ static void read_counters(bool close_counters) | |||
351 | if (read_counter(counter)) | 216 | if (read_counter(counter)) |
352 | pr_warning("failed to read counter %s\n", counter->name); | 217 | pr_warning("failed to read counter %s\n", counter->name); |
353 | 218 | ||
354 | if (process_counter(counter)) | 219 | if (perf_stat_process_counter(&stat_config, counter)) |
355 | pr_warning("failed to process counter %s\n", counter->name); | 220 | pr_warning("failed to process counter %s\n", counter->name); |
356 | 221 | ||
357 | if (close_counters) { | 222 | if (close_counters) { |
@@ -402,6 +267,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf | |||
402 | 267 | ||
403 | static int __run_perf_stat(int argc, const char **argv) | 268 | static int __run_perf_stat(int argc, const char **argv) |
404 | { | 269 | { |
270 | int interval = stat_config.interval; | ||
405 | char msg[512]; | 271 | char msg[512]; |
406 | unsigned long long t0, t1; | 272 | unsigned long long t0, t1; |
407 | struct perf_evsel *counter; | 273 | struct perf_evsel *counter; |
@@ -545,13 +411,13 @@ static int run_perf_stat(int argc, const char **argv) | |||
545 | static void print_running(u64 run, u64 ena) | 411 | static void print_running(u64 run, u64 ena) |
546 | { | 412 | { |
547 | if (csv_output) { | 413 | if (csv_output) { |
548 | fprintf(output, "%s%" PRIu64 "%s%.2f", | 414 | fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", |
549 | csv_sep, | 415 | csv_sep, |
550 | run, | 416 | run, |
551 | csv_sep, | 417 | csv_sep, |
552 | ena ? 100.0 * run / ena : 100.0); | 418 | ena ? 100.0 * run / ena : 100.0); |
553 | } else if (run != ena) { | 419 | } else if (run != ena) { |
554 | fprintf(output, " (%.2f%%)", 100.0 * run / ena); | 420 | fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); |
555 | } | 421 | } |
556 | } | 422 | } |
557 | 423 | ||
@@ -560,9 +426,9 @@ static void print_noise_pct(double total, double avg) | |||
560 | double pct = rel_stddev_stats(total, avg); | 426 | double pct = rel_stddev_stats(total, avg); |
561 | 427 | ||
562 | if (csv_output) | 428 | if (csv_output) |
563 | fprintf(output, "%s%.2f%%", csv_sep, pct); | 429 | fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); |
564 | else if (pct) | 430 | else if (pct) |
565 | fprintf(output, " ( +-%6.2f%% )", pct); | 431 | fprintf(stat_config.output, " ( +-%6.2f%% )", pct); |
566 | } | 432 | } |
567 | 433 | ||
568 | static void print_noise(struct perf_evsel *evsel, double avg) | 434 | static void print_noise(struct perf_evsel *evsel, double avg) |
@@ -578,9 +444,9 @@ static void print_noise(struct perf_evsel *evsel, double avg) | |||
578 | 444 | ||
579 | static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | 445 | static void aggr_printout(struct perf_evsel *evsel, int id, int nr) |
580 | { | 446 | { |
581 | switch (aggr_mode) { | 447 | switch (stat_config.aggr_mode) { |
582 | case AGGR_CORE: | 448 | case AGGR_CORE: |
583 | fprintf(output, "S%d-C%*d%s%*d%s", | 449 | fprintf(stat_config.output, "S%d-C%*d%s%*d%s", |
584 | cpu_map__id_to_socket(id), | 450 | cpu_map__id_to_socket(id), |
585 | csv_output ? 0 : -8, | 451 | csv_output ? 0 : -8, |
586 | cpu_map__id_to_cpu(id), | 452 | cpu_map__id_to_cpu(id), |
@@ -590,7 +456,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
590 | csv_sep); | 456 | csv_sep); |
591 | break; | 457 | break; |
592 | case AGGR_SOCKET: | 458 | case AGGR_SOCKET: |
593 | fprintf(output, "S%*d%s%*d%s", | 459 | fprintf(stat_config.output, "S%*d%s%*d%s", |
594 | csv_output ? 0 : -5, | 460 | csv_output ? 0 : -5, |
595 | id, | 461 | id, |
596 | csv_sep, | 462 | csv_sep, |
@@ -599,12 +465,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
599 | csv_sep); | 465 | csv_sep); |
600 | break; | 466 | break; |
601 | case AGGR_NONE: | 467 | case AGGR_NONE: |
602 | fprintf(output, "CPU%*d%s", | 468 | fprintf(stat_config.output, "CPU%*d%s", |
603 | csv_output ? 0 : -4, | 469 | csv_output ? 0 : -4, |
604 | perf_evsel__cpus(evsel)->map[id], csv_sep); | 470 | perf_evsel__cpus(evsel)->map[id], csv_sep); |
605 | break; | 471 | break; |
606 | case AGGR_THREAD: | 472 | case AGGR_THREAD: |
607 | fprintf(output, "%*s-%*d%s", | 473 | fprintf(stat_config.output, "%*s-%*d%s", |
608 | csv_output ? 0 : 16, | 474 | csv_output ? 0 : 16, |
609 | thread_map__comm(evsel->threads, id), | 475 | thread_map__comm(evsel->threads, id), |
610 | csv_output ? 0 : -8, | 476 | csv_output ? 0 : -8, |
@@ -619,6 +485,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
619 | 485 | ||
620 | static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | 486 | static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) |
621 | { | 487 | { |
488 | FILE *output = stat_config.output; | ||
622 | double msecs = avg / 1e6; | 489 | double msecs = avg / 1e6; |
623 | const char *fmt_v, *fmt_n; | 490 | const char *fmt_v, *fmt_n; |
624 | char name[25]; | 491 | char name[25]; |
@@ -643,7 +510,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
643 | if (evsel->cgrp) | 510 | if (evsel->cgrp) |
644 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 511 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
645 | 512 | ||
646 | if (csv_output || interval) | 513 | if (csv_output || stat_config.interval) |
647 | return; | 514 | return; |
648 | 515 | ||
649 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 516 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
@@ -655,6 +522,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
655 | 522 | ||
656 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | 523 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) |
657 | { | 524 | { |
525 | FILE *output = stat_config.output; | ||
658 | double sc = evsel->scale; | 526 | double sc = evsel->scale; |
659 | const char *fmt; | 527 | const char *fmt; |
660 | int cpu = cpu_map__id_to_cpu(id); | 528 | int cpu = cpu_map__id_to_cpu(id); |
@@ -670,7 +538,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
670 | 538 | ||
671 | aggr_printout(evsel, id, nr); | 539 | aggr_printout(evsel, id, nr); |
672 | 540 | ||
673 | if (aggr_mode == AGGR_GLOBAL) | 541 | if (stat_config.aggr_mode == AGGR_GLOBAL) |
674 | cpu = 0; | 542 | cpu = 0; |
675 | 543 | ||
676 | fprintf(output, fmt, avg, csv_sep); | 544 | fprintf(output, fmt, avg, csv_sep); |
@@ -685,14 +553,16 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
685 | if (evsel->cgrp) | 553 | if (evsel->cgrp) |
686 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 554 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
687 | 555 | ||
688 | if (csv_output || interval) | 556 | if (csv_output || stat_config.interval) |
689 | return; | 557 | return; |
690 | 558 | ||
691 | perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); | 559 | perf_stat__print_shadow_stats(output, evsel, avg, cpu, |
560 | stat_config.aggr_mode); | ||
692 | } | 561 | } |
693 | 562 | ||
694 | static void print_aggr(char *prefix) | 563 | static void print_aggr(char *prefix) |
695 | { | 564 | { |
565 | FILE *output = stat_config.output; | ||
696 | struct perf_evsel *counter; | 566 | struct perf_evsel *counter; |
697 | int cpu, cpu2, s, s2, id, nr; | 567 | int cpu, cpu2, s, s2, id, nr; |
698 | double uval; | 568 | double uval; |
@@ -761,6 +631,7 @@ static void print_aggr(char *prefix) | |||
761 | 631 | ||
762 | static void print_aggr_thread(struct perf_evsel *counter, char *prefix) | 632 | static void print_aggr_thread(struct perf_evsel *counter, char *prefix) |
763 | { | 633 | { |
634 | FILE *output = stat_config.output; | ||
764 | int nthreads = thread_map__nr(counter->threads); | 635 | int nthreads = thread_map__nr(counter->threads); |
765 | int ncpus = cpu_map__nr(counter->cpus); | 636 | int ncpus = cpu_map__nr(counter->cpus); |
766 | int cpu, thread; | 637 | int cpu, thread; |
@@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) | |||
799 | */ | 670 | */ |
800 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix) | 671 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix) |
801 | { | 672 | { |
673 | FILE *output = stat_config.output; | ||
802 | struct perf_stat *ps = counter->priv; | 674 | struct perf_stat *ps = counter->priv; |
803 | double avg = avg_stats(&ps->res_stats[0]); | 675 | double avg = avg_stats(&ps->res_stats[0]); |
804 | int scaled = counter->counts->scaled; | 676 | int scaled = counter->counts->scaled; |
@@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) | |||
850 | */ | 722 | */ |
851 | static void print_counter(struct perf_evsel *counter, char *prefix) | 723 | static void print_counter(struct perf_evsel *counter, char *prefix) |
852 | { | 724 | { |
725 | FILE *output = stat_config.output; | ||
853 | u64 ena, run, val; | 726 | u64 ena, run, val; |
854 | double uval; | 727 | double uval; |
855 | int cpu; | 728 | int cpu; |
@@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix) | |||
904 | 777 | ||
905 | static void print_interval(char *prefix, struct timespec *ts) | 778 | static void print_interval(char *prefix, struct timespec *ts) |
906 | { | 779 | { |
780 | FILE *output = stat_config.output; | ||
907 | static int num_print_interval; | 781 | static int num_print_interval; |
908 | 782 | ||
909 | sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); | 783 | sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); |
910 | 784 | ||
911 | if (num_print_interval == 0 && !csv_output) { | 785 | if (num_print_interval == 0 && !csv_output) { |
912 | switch (aggr_mode) { | 786 | switch (stat_config.aggr_mode) { |
913 | case AGGR_SOCKET: | 787 | case AGGR_SOCKET: |
914 | fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); | 788 | fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); |
915 | break; | 789 | break; |
@@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts) | |||
934 | 808 | ||
935 | static void print_header(int argc, const char **argv) | 809 | static void print_header(int argc, const char **argv) |
936 | { | 810 | { |
811 | FILE *output = stat_config.output; | ||
937 | int i; | 812 | int i; |
938 | 813 | ||
939 | fflush(stdout); | 814 | fflush(stdout); |
@@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv) | |||
963 | 838 | ||
964 | static void print_footer(void) | 839 | static void print_footer(void) |
965 | { | 840 | { |
841 | FILE *output = stat_config.output; | ||
842 | |||
966 | if (!null_run) | 843 | if (!null_run) |
967 | fprintf(output, "\n"); | 844 | fprintf(output, "\n"); |
968 | fprintf(output, " %17.9f seconds time elapsed", | 845 | fprintf(output, " %17.9f seconds time elapsed", |
@@ -977,6 +854,7 @@ static void print_footer(void) | |||
977 | 854 | ||
978 | static void print_counters(struct timespec *ts, int argc, const char **argv) | 855 | static void print_counters(struct timespec *ts, int argc, const char **argv) |
979 | { | 856 | { |
857 | int interval = stat_config.interval; | ||
980 | struct perf_evsel *counter; | 858 | struct perf_evsel *counter; |
981 | char buf[64], *prefix = NULL; | 859 | char buf[64], *prefix = NULL; |
982 | 860 | ||
@@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
985 | else | 863 | else |
986 | print_header(argc, argv); | 864 | print_header(argc, argv); |
987 | 865 | ||
988 | switch (aggr_mode) { | 866 | switch (stat_config.aggr_mode) { |
989 | case AGGR_CORE: | 867 | case AGGR_CORE: |
990 | case AGGR_SOCKET: | 868 | case AGGR_SOCKET: |
991 | print_aggr(prefix); | 869 | print_aggr(prefix); |
@@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
1009 | if (!interval && !csv_output) | 887 | if (!interval && !csv_output) |
1010 | print_footer(); | 888 | print_footer(); |
1011 | 889 | ||
1012 | fflush(output); | 890 | fflush(stat_config.output); |
1013 | } | 891 | } |
1014 | 892 | ||
1015 | static volatile int signr = -1; | 893 | static volatile int signr = -1; |
1016 | 894 | ||
1017 | static void skip_signal(int signo) | 895 | static void skip_signal(int signo) |
1018 | { | 896 | { |
1019 | if ((child_pid == -1) || interval) | 897 | if ((child_pid == -1) || stat_config.interval) |
1020 | done = 1; | 898 | done = 1; |
1021 | 899 | ||
1022 | signr = signo; | 900 | signr = signo; |
@@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, | |||
1064 | 942 | ||
1065 | static int perf_stat_init_aggr_mode(void) | 943 | static int perf_stat_init_aggr_mode(void) |
1066 | { | 944 | { |
1067 | switch (aggr_mode) { | 945 | switch (stat_config.aggr_mode) { |
1068 | case AGGR_SOCKET: | 946 | case AGGR_SOCKET: |
1069 | if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { | 947 | if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { |
1070 | perror("cannot build socket map"); | 948 | perror("cannot build socket map"); |
@@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1270 | "system-wide collection from all CPUs"), | 1148 | "system-wide collection from all CPUs"), |
1271 | OPT_BOOLEAN('g', "group", &group, | 1149 | OPT_BOOLEAN('g', "group", &group, |
1272 | "put the counters into a counter group"), | 1150 | "put the counters into a counter group"), |
1273 | OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), | 1151 | OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), |
1274 | OPT_INCR('v', "verbose", &verbose, | 1152 | OPT_INCR('v', "verbose", &verbose, |
1275 | "be more verbose (show counter open errors, etc)"), | 1153 | "be more verbose (show counter open errors, etc)"), |
1276 | OPT_INTEGER('r', "repeat", &run_count, | 1154 | OPT_INTEGER('r', "repeat", &run_count, |
@@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1286 | stat__set_big_num), | 1164 | stat__set_big_num), |
1287 | OPT_STRING('C', "cpu", &target.cpu_list, "cpu", | 1165 | OPT_STRING('C', "cpu", &target.cpu_list, "cpu", |
1288 | "list of cpus to monitor in system-wide"), | 1166 | "list of cpus to monitor in system-wide"), |
1289 | OPT_SET_UINT('A', "no-aggr", &aggr_mode, | 1167 | OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, |
1290 | "disable CPU count aggregation", AGGR_NONE), | 1168 | "disable CPU count aggregation", AGGR_NONE), |
1291 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | 1169 | OPT_STRING('x', "field-separator", &csv_sep, "separator", |
1292 | "print counts with custom separator"), | 1170 | "print counts with custom separator"), |
@@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1300 | "command to run prior to the measured command"), | 1178 | "command to run prior to the measured command"), |
1301 | OPT_STRING(0, "post", &post_cmd, "command", | 1179 | OPT_STRING(0, "post", &post_cmd, "command", |
1302 | "command to run after to the measured command"), | 1180 | "command to run after to the measured command"), |
1303 | OPT_UINTEGER('I', "interval-print", &interval, | 1181 | OPT_UINTEGER('I', "interval-print", &stat_config.interval, |
1304 | "print counts at regular interval in ms (>= 100)"), | 1182 | "print counts at regular interval in ms (>= 100)"), |
1305 | OPT_SET_UINT(0, "per-socket", &aggr_mode, | 1183 | OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, |
1306 | "aggregate counts per processor socket", AGGR_SOCKET), | 1184 | "aggregate counts per processor socket", AGGR_SOCKET), |
1307 | OPT_SET_UINT(0, "per-core", &aggr_mode, | 1185 | OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, |
1308 | "aggregate counts per physical processor core", AGGR_CORE), | 1186 | "aggregate counts per physical processor core", AGGR_CORE), |
1309 | OPT_SET_UINT(0, "per-thread", &aggr_mode, | 1187 | OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, |
1310 | "aggregate counts per thread", AGGR_THREAD), | 1188 | "aggregate counts per thread", AGGR_THREAD), |
1311 | OPT_UINTEGER('D', "delay", &initial_delay, | 1189 | OPT_UINTEGER('D', "delay", &initial_delay, |
1312 | "ms to wait before starting measurement after program start"), | 1190 | "ms to wait before starting measurement after program start"), |
@@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1318 | }; | 1196 | }; |
1319 | int status = -EINVAL, run_idx; | 1197 | int status = -EINVAL, run_idx; |
1320 | const char *mode; | 1198 | const char *mode; |
1199 | FILE *output = stderr; | ||
1200 | unsigned int interval; | ||
1321 | 1201 | ||
1322 | setlocale(LC_ALL, ""); | 1202 | setlocale(LC_ALL, ""); |
1323 | 1203 | ||
@@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1328 | argc = parse_options(argc, argv, options, stat_usage, | 1208 | argc = parse_options(argc, argv, options, stat_usage, |
1329 | PARSE_OPT_STOP_AT_NON_OPTION); | 1209 | PARSE_OPT_STOP_AT_NON_OPTION); |
1330 | 1210 | ||
1331 | output = stderr; | 1211 | interval = stat_config.interval; |
1212 | |||
1332 | if (output_name && strcmp(output_name, "-")) | 1213 | if (output_name && strcmp(output_name, "-")) |
1333 | output = NULL; | 1214 | output = NULL; |
1334 | 1215 | ||
@@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1365 | } | 1246 | } |
1366 | } | 1247 | } |
1367 | 1248 | ||
1249 | stat_config.output = output; | ||
1250 | |||
1368 | if (csv_sep) { | 1251 | if (csv_sep) { |
1369 | csv_output = true; | 1252 | csv_output = true; |
1370 | if (!strcmp(csv_sep, "\\t")) | 1253 | if (!strcmp(csv_sep, "\\t")) |
@@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1399 | run_count = 1; | 1282 | run_count = 1; |
1400 | } | 1283 | } |
1401 | 1284 | ||
1402 | if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { | 1285 | if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { |
1403 | fprintf(stderr, "The --per-thread option is only available " | 1286 | fprintf(stderr, "The --per-thread option is only available " |
1404 | "when monitoring via -p -t options.\n"); | 1287 | "when monitoring via -p -t options.\n"); |
1405 | parse_options_usage(NULL, options, "p", 1); | 1288 | parse_options_usage(NULL, options, "p", 1); |
@@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1411 | * no_aggr, cgroup are for system-wide only | 1294 | * no_aggr, cgroup are for system-wide only |
1412 | * --per-thread is aggregated per thread, we dont mix it with cpu mode | 1295 | * --per-thread is aggregated per thread, we dont mix it with cpu mode |
1413 | */ | 1296 | */ |
1414 | if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && | 1297 | if (((stat_config.aggr_mode != AGGR_GLOBAL && |
1298 | stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && | ||
1415 | !target__has_cpu(&target)) { | 1299 | !target__has_cpu(&target)) { |
1416 | fprintf(stderr, "both cgroup and no-aggregation " | 1300 | fprintf(stderr, "both cgroup and no-aggregation " |
1417 | "modes only available in system-wide mode\n"); | 1301 | "modes only available in system-wide mode\n"); |
@@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1444 | * Initialize thread_map with comm names, | 1328 | * Initialize thread_map with comm names, |
1445 | * so we could print it out on output. | 1329 | * so we could print it out on output. |
1446 | */ | 1330 | */ |
1447 | if (aggr_mode == AGGR_THREAD) | 1331 | if (stat_config.aggr_mode == AGGR_THREAD) |
1448 | thread_map__read_comms(evsel_list->threads); | 1332 | thread_map__read_comms(evsel_list->threads); |
1449 | 1333 | ||
1450 | if (interval && interval < 100) { | 1334 | if (interval && interval < 100) { |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecf319728f25..bfe24f1e362f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "util/xyarray.h" | 40 | #include "util/xyarray.h" |
41 | #include "util/sort.h" | 41 | #include "util/sort.h" |
42 | #include "util/intlist.h" | 42 | #include "util/intlist.h" |
43 | #include "util/parse-branch-options.h" | ||
43 | #include "arch/common.h" | 44 | #include "arch/common.h" |
44 | 45 | ||
45 | #include "util/debug.h" | 46 | #include "util/debug.h" |
@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, | |||
695 | perf_top__record_precise_ip(top, he, evsel->idx, ip); | 696 | perf_top__record_precise_ip(top, he, evsel->idx, ip); |
696 | } | 697 | } |
697 | 698 | ||
699 | hist__account_cycles(iter->sample->branch_stack, al, iter->sample, | ||
700 | !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); | ||
698 | return 0; | 701 | return 0; |
699 | } | 702 | } |
700 | 703 | ||
@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1171 | "don't try to adjust column width, use these fixed values"), | 1174 | "don't try to adjust column width, use these fixed values"), |
1172 | OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, | 1175 | OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, |
1173 | "per thread proc mmap processing timeout in ms"), | 1176 | "per thread proc mmap processing timeout in ms"), |
1177 | OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack, | ||
1178 | "branch any", "sample any taken branches", | ||
1179 | parse_branch_stack), | ||
1180 | OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, | ||
1181 | "branch filter mask", "branch stack filter modes", | ||
1182 | parse_branch_stack), | ||
1174 | OPT_END() | 1183 | OPT_END() |
1175 | }; | 1184 | }; |
1176 | const char * const top_usage[] = { | 1185 | const char * const top_usage[] = { |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a47497011c93..a25048c85b76 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, | |||
2773 | 2773 | ||
2774 | printed += fprintf(fp, "\n"); | 2774 | printed += fprintf(fp, "\n"); |
2775 | 2775 | ||
2776 | printed += fprintf(fp, " syscall calls min avg max stddev\n"); | 2776 | printed += fprintf(fp, " syscall calls total min avg max stddev\n"); |
2777 | printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); | 2777 | printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); |
2778 | printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); | 2778 | printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); |
2779 | 2779 | ||
2780 | /* each int_node is a syscall */ | 2780 | /* each int_node is a syscall */ |
2781 | while (inode) { | 2781 | while (inode) { |
@@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, | |||
2792 | 2792 | ||
2793 | sc = &trace->syscalls.table[inode->i]; | 2793 | sc = &trace->syscalls.table[inode->i]; |
2794 | printed += fprintf(fp, " %-15s", sc->name); | 2794 | printed += fprintf(fp, " %-15s", sc->name); |
2795 | printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", | 2795 | printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", |
2796 | n, min, avg); | 2796 | n, avg * n, min, avg); |
2797 | printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); | 2797 | printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); |
2798 | } | 2798 | } |
2799 | 2799 | ||
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index c7ff90a90e4e..7e47a7cbc195 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh | |||
@@ -50,7 +50,7 @@ copy_kcore() | |||
50 | fi | 50 | fi |
51 | 51 | ||
52 | rm -f perf.data.junk | 52 | rm -f perf.data.junk |
53 | ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & | 53 | ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null & |
54 | PERF_PID=$! | 54 | PERF_PID=$! |
55 | 55 | ||
56 | # Need to make sure that perf has started | 56 | # Need to make sure that perf has started |
@@ -160,18 +160,18 @@ record() | |||
160 | echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 | 160 | echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 |
161 | fi | 161 | fi |
162 | 162 | ||
163 | if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then | 163 | if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then |
164 | echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 | 164 | echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 |
165 | fi | 165 | fi |
166 | 166 | ||
167 | if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then | 167 | if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then |
168 | if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then | 168 | if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then |
169 | echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 | 169 | echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 |
170 | fi | 170 | fi |
171 | 171 | ||
172 | if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then | 172 | if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then |
173 | true | 173 | true |
174 | elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then | 174 | elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then |
175 | true | 175 | true |
176 | elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then | 176 | elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then |
177 | echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 | 177 | echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 |
@@ -193,8 +193,8 @@ record() | |||
193 | 193 | ||
194 | mkdir "$PERF_DATA_DIR" | 194 | mkdir "$PERF_DATA_DIR" |
195 | 195 | ||
196 | echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" | 196 | echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@" |
197 | "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true | 197 | "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true |
198 | 198 | ||
199 | if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then | 199 | if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then |
200 | exit 1 | 200 | exit 1 |
@@ -209,8 +209,8 @@ subcommand() | |||
209 | { | 209 | { |
210 | find_perf | 210 | find_perf |
211 | check_buildid_cache_permissions | 211 | check_buildid_cache_permissions |
212 | echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" | 212 | echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@" |
213 | "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* | 213 | "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@" |
214 | } | 214 | } |
215 | 215 | ||
216 | if [ "$1" = "fix_buildid_cache_permissions" ] ; then | 216 | if [ "$1" = "fix_buildid_cache_permissions" ] ; then |
@@ -234,7 +234,7 @@ fi | |||
234 | case "$PERF_SUB_COMMAND" in | 234 | case "$PERF_SUB_COMMAND" in |
235 | "record") | 235 | "record") |
236 | while [ "$1" != "--" ] ; do | 236 | while [ "$1" != "--" ] ; do |
237 | PERF_OPTIONS+="$1 " | 237 | PERF_OPTIONS+=("$1") |
238 | shift || break | 238 | shift || break |
239 | done | 239 | done |
240 | if [ "$1" != "--" ] ; then | 240 | if [ "$1" != "--" ] ; then |
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in | |||
242 | usage | 242 | usage |
243 | fi | 243 | fi |
244 | shift | 244 | shift |
245 | record $* | 245 | record "$@" |
246 | ;; | 246 | ;; |
247 | "script") | 247 | "script") |
248 | subcommand $* | 248 | subcommand "$@" |
249 | ;; | 249 | ;; |
250 | "report") | 250 | "report") |
251 | subcommand $* | 251 | subcommand "$@" |
252 | ;; | 252 | ;; |
253 | "inject") | 253 | "inject") |
254 | subcommand $* | 254 | subcommand "$@" |
255 | ;; | 255 | ;; |
256 | *) | 256 | *) |
257 | usage | 257 | usage |
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5995a8bd7c69..b5fc847f9660 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c | |||
@@ -16,6 +16,9 @@ struct disasm_line_samples { | |||
16 | u64 nr; | 16 | u64 nr; |
17 | }; | 17 | }; |
18 | 18 | ||
19 | #define IPC_WIDTH 6 | ||
20 | #define CYCLES_WIDTH 6 | ||
21 | |||
19 | struct browser_disasm_line { | 22 | struct browser_disasm_line { |
20 | struct rb_node rb_node; | 23 | struct rb_node rb_node; |
21 | u32 idx; | 24 | u32 idx; |
@@ -53,6 +56,7 @@ struct annotate_browser { | |||
53 | int max_jump_sources; | 56 | int max_jump_sources; |
54 | int nr_jumps; | 57 | int nr_jumps; |
55 | bool searching_backwards; | 58 | bool searching_backwards; |
59 | bool have_cycles; | ||
56 | u8 addr_width; | 60 | u8 addr_width; |
57 | u8 jumps_width; | 61 | u8 jumps_width; |
58 | u8 target_width; | 62 | u8 target_width; |
@@ -96,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br | |||
96 | return ui_browser__set_color(&browser->b, color); | 100 | return ui_browser__set_color(&browser->b, color); |
97 | } | 101 | } |
98 | 102 | ||
103 | static int annotate_browser__pcnt_width(struct annotate_browser *ab) | ||
104 | { | ||
105 | int w = 7 * ab->nr_events; | ||
106 | |||
107 | if (ab->have_cycles) | ||
108 | w += IPC_WIDTH + CYCLES_WIDTH; | ||
109 | return w; | ||
110 | } | ||
111 | |||
99 | static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) | 112 | static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) |
100 | { | 113 | { |
101 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); | 114 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); |
@@ -106,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
106 | (!current_entry || (browser->use_navkeypressed && | 119 | (!current_entry || (browser->use_navkeypressed && |
107 | !browser->navkeypressed))); | 120 | !browser->navkeypressed))); |
108 | int width = browser->width, printed; | 121 | int width = browser->width, printed; |
109 | int i, pcnt_width = 7 * ab->nr_events; | 122 | int i, pcnt_width = annotate_browser__pcnt_width(ab); |
110 | double percent_max = 0.0; | 123 | double percent_max = 0.0; |
111 | char bf[256]; | 124 | char bf[256]; |
112 | 125 | ||
@@ -116,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
116 | } | 129 | } |
117 | 130 | ||
118 | if (dl->offset != -1 && percent_max != 0.0) { | 131 | if (dl->offset != -1 && percent_max != 0.0) { |
119 | for (i = 0; i < ab->nr_events; i++) { | 132 | if (percent_max != 0.0) { |
120 | ui_browser__set_percent_color(browser, | 133 | for (i = 0; i < ab->nr_events; i++) { |
121 | bdl->samples[i].percent, | 134 | ui_browser__set_percent_color(browser, |
122 | current_entry); | 135 | bdl->samples[i].percent, |
123 | if (annotate_browser__opts.show_total_period) | 136 | current_entry); |
124 | slsmg_printf("%6" PRIu64 " ", | 137 | if (annotate_browser__opts.show_total_period) |
125 | bdl->samples[i].nr); | 138 | slsmg_printf("%6" PRIu64 " ", |
126 | else | 139 | bdl->samples[i].nr); |
127 | slsmg_printf("%6.2f ", bdl->samples[i].percent); | 140 | else |
141 | slsmg_printf("%6.2f ", bdl->samples[i].percent); | ||
142 | } | ||
143 | } else { | ||
144 | slsmg_write_nstring(" ", 7 * ab->nr_events); | ||
128 | } | 145 | } |
129 | } else { | 146 | } else { |
130 | ui_browser__set_percent_color(browser, 0, current_entry); | 147 | ui_browser__set_percent_color(browser, 0, current_entry); |
131 | slsmg_write_nstring(" ", pcnt_width); | 148 | slsmg_write_nstring(" ", 7 * ab->nr_events); |
149 | } | ||
150 | if (ab->have_cycles) { | ||
151 | if (dl->ipc) | ||
152 | slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc); | ||
153 | else | ||
154 | slsmg_write_nstring(" ", IPC_WIDTH); | ||
155 | if (dl->cycles) | ||
156 | slsmg_printf("%*" PRIu64 " ", | ||
157 | CYCLES_WIDTH - 1, dl->cycles); | ||
158 | else | ||
159 | slsmg_write_nstring(" ", CYCLES_WIDTH); | ||
132 | } | 160 | } |
133 | 161 | ||
134 | SLsmg_write_char(' '); | 162 | SLsmg_write_char(' '); |
@@ -231,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) | |||
231 | unsigned int from, to; | 259 | unsigned int from, to; |
232 | struct map_symbol *ms = ab->b.priv; | 260 | struct map_symbol *ms = ab->b.priv; |
233 | struct symbol *sym = ms->sym; | 261 | struct symbol *sym = ms->sym; |
234 | u8 pcnt_width = 7; | 262 | u8 pcnt_width = annotate_browser__pcnt_width(ab); |
235 | 263 | ||
236 | /* PLT symbols contain external offsets */ | 264 | /* PLT symbols contain external offsets */ |
237 | if (strstr(sym->name, "@plt")) | 265 | if (strstr(sym->name, "@plt")) |
@@ -255,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) | |||
255 | to = (u64)btarget->idx; | 283 | to = (u64)btarget->idx; |
256 | } | 284 | } |
257 | 285 | ||
258 | pcnt_width *= ab->nr_events; | ||
259 | |||
260 | ui_browser__set_color(browser, HE_COLORSET_CODE); | 286 | ui_browser__set_color(browser, HE_COLORSET_CODE); |
261 | __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, | 287 | __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, |
262 | from, to); | 288 | from, to); |
@@ -266,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) | |||
266 | { | 292 | { |
267 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); | 293 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); |
268 | int ret = ui_browser__list_head_refresh(browser); | 294 | int ret = ui_browser__list_head_refresh(browser); |
269 | int pcnt_width; | 295 | int pcnt_width = annotate_browser__pcnt_width(ab); |
270 | |||
271 | pcnt_width = 7 * ab->nr_events; | ||
272 | 296 | ||
273 | if (annotate_browser__opts.jump_arrows) | 297 | if (annotate_browser__opts.jump_arrows) |
274 | annotate_browser__draw_current_jump(browser); | 298 | annotate_browser__draw_current_jump(browser); |
@@ -390,7 +414,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, | |||
390 | max_percent = bpos->samples[i].percent; | 414 | max_percent = bpos->samples[i].percent; |
391 | } | 415 | } |
392 | 416 | ||
393 | if (max_percent < 0.01) { | 417 | if (max_percent < 0.01 && pos->ipc == 0) { |
394 | RB_CLEAR_NODE(&bpos->rb_node); | 418 | RB_CLEAR_NODE(&bpos->rb_node); |
395 | continue; | 419 | continue; |
396 | } | 420 | } |
@@ -869,6 +893,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, | |||
869 | return map_symbol__tui_annotate(&he->ms, evsel, hbt); | 893 | return map_symbol__tui_annotate(&he->ms, evsel, hbt); |
870 | } | 894 | } |
871 | 895 | ||
896 | |||
897 | static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end) | ||
898 | { | ||
899 | unsigned n_insn = 0; | ||
900 | u64 offset; | ||
901 | |||
902 | for (offset = start; offset <= end; offset++) { | ||
903 | if (browser->offsets[offset]) | ||
904 | n_insn++; | ||
905 | } | ||
906 | return n_insn; | ||
907 | } | ||
908 | |||
909 | static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, | ||
910 | struct cyc_hist *ch) | ||
911 | { | ||
912 | unsigned n_insn; | ||
913 | u64 offset; | ||
914 | |||
915 | n_insn = count_insn(browser, start, end); | ||
916 | if (n_insn && ch->num && ch->cycles) { | ||
917 | float ipc = n_insn / ((double)ch->cycles / (double)ch->num); | ||
918 | |||
919 | /* Hide data when there are too many overlaps. */ | ||
920 | if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) | ||
921 | return; | ||
922 | |||
923 | for (offset = start; offset <= end; offset++) { | ||
924 | struct disasm_line *dl = browser->offsets[offset]; | ||
925 | |||
926 | if (dl) | ||
927 | dl->ipc = ipc; | ||
928 | } | ||
929 | } | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * This should probably be in util/annotate.c to share with the tty | ||
934 | * annotate, but right now we need the per byte offsets arrays, | ||
935 | * which are only here. | ||
936 | */ | ||
937 | static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, | ||
938 | struct symbol *sym) | ||
939 | { | ||
940 | u64 offset; | ||
941 | struct annotation *notes = symbol__annotation(sym); | ||
942 | |||
943 | if (!notes->src || !notes->src->cycles_hist) | ||
944 | return; | ||
945 | |||
946 | pthread_mutex_lock(¬es->lock); | ||
947 | for (offset = 0; offset < size; ++offset) { | ||
948 | struct cyc_hist *ch; | ||
949 | |||
950 | ch = ¬es->src->cycles_hist[offset]; | ||
951 | if (ch && ch->cycles) { | ||
952 | struct disasm_line *dl; | ||
953 | |||
954 | if (ch->have_start) | ||
955 | count_and_fill(browser, ch->start, offset, ch); | ||
956 | dl = browser->offsets[offset]; | ||
957 | if (dl && ch->num_aggr) | ||
958 | dl->cycles = ch->cycles_aggr / ch->num_aggr; | ||
959 | browser->have_cycles = true; | ||
960 | } | ||
961 | } | ||
962 | pthread_mutex_unlock(¬es->lock); | ||
963 | } | ||
964 | |||
872 | static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, | 965 | static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, |
873 | size_t size) | 966 | size_t size) |
874 | { | 967 | { |
@@ -991,6 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, | |||
991 | } | 1084 | } |
992 | 1085 | ||
993 | annotate_browser__mark_jump_targets(&browser, size); | 1086 | annotate_browser__mark_jump_targets(&browser, size); |
1087 | annotate__compute_ipc(&browser, size, sym); | ||
994 | 1088 | ||
995 | browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); | 1089 | browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); |
996 | browser.max_addr_width = hex_width(sym->end); | 1090 | browser.max_addr_width = hex_width(sym->end); |
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 03b7bc70eb66..e0b614648044 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym) | |||
473 | return 0; | 473 | return 0; |
474 | } | 474 | } |
475 | 475 | ||
476 | /* The cycles histogram is lazily allocated. */ | ||
477 | static int symbol__alloc_hist_cycles(struct symbol *sym) | ||
478 | { | ||
479 | struct annotation *notes = symbol__annotation(sym); | ||
480 | const size_t size = symbol__size(sym); | ||
481 | |||
482 | notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); | ||
483 | if (notes->src->cycles_hist == NULL) | ||
484 | return -1; | ||
485 | return 0; | ||
486 | } | ||
487 | |||
476 | void symbol__annotate_zero_histograms(struct symbol *sym) | 488 | void symbol__annotate_zero_histograms(struct symbol *sym) |
477 | { | 489 | { |
478 | struct annotation *notes = symbol__annotation(sym); | 490 | struct annotation *notes = symbol__annotation(sym); |
479 | 491 | ||
480 | pthread_mutex_lock(¬es->lock); | 492 | pthread_mutex_lock(¬es->lock); |
481 | if (notes->src != NULL) | 493 | if (notes->src != NULL) { |
482 | memset(notes->src->histograms, 0, | 494 | memset(notes->src->histograms, 0, |
483 | notes->src->nr_histograms * notes->src->sizeof_sym_hist); | 495 | notes->src->nr_histograms * notes->src->sizeof_sym_hist); |
496 | if (notes->src->cycles_hist) | ||
497 | memset(notes->src->cycles_hist, 0, | ||
498 | symbol__size(sym) * sizeof(struct cyc_hist)); | ||
499 | } | ||
484 | pthread_mutex_unlock(¬es->lock); | 500 | pthread_mutex_unlock(¬es->lock); |
485 | } | 501 | } |
486 | 502 | ||
503 | static int __symbol__account_cycles(struct annotation *notes, | ||
504 | u64 start, | ||
505 | unsigned offset, unsigned cycles, | ||
506 | unsigned have_start) | ||
507 | { | ||
508 | struct cyc_hist *ch; | ||
509 | |||
510 | ch = notes->src->cycles_hist; | ||
511 | /* | ||
512 | * For now we can only account one basic block per | ||
513 | * final jump. But multiple could be overlapping. | ||
514 | * Always account the longest one. So when | ||
515 | * a shorter one has been already seen throw it away. | ||
516 | * | ||
517 | * We separately always account the full cycles. | ||
518 | */ | ||
519 | ch[offset].num_aggr++; | ||
520 | ch[offset].cycles_aggr += cycles; | ||
521 | |||
522 | if (!have_start && ch[offset].have_start) | ||
523 | return 0; | ||
524 | if (ch[offset].num) { | ||
525 | if (have_start && (!ch[offset].have_start || | ||
526 | ch[offset].start > start)) { | ||
527 | ch[offset].have_start = 0; | ||
528 | ch[offset].cycles = 0; | ||
529 | ch[offset].num = 0; | ||
530 | if (ch[offset].reset < 0xffff) | ||
531 | ch[offset].reset++; | ||
532 | } else if (have_start && | ||
533 | ch[offset].start < start) | ||
534 | return 0; | ||
535 | } | ||
536 | ch[offset].have_start = have_start; | ||
537 | ch[offset].start = start; | ||
538 | ch[offset].cycles += cycles; | ||
539 | ch[offset].num++; | ||
540 | return 0; | ||
541 | } | ||
542 | |||
487 | static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, | 543 | static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, |
488 | struct annotation *notes, int evidx, u64 addr) | 544 | struct annotation *notes, int evidx, u64 addr) |
489 | { | 545 | { |
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, | |||
506 | return 0; | 562 | return 0; |
507 | } | 563 | } |
508 | 564 | ||
509 | static struct annotation *symbol__get_annotation(struct symbol *sym) | 565 | static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) |
510 | { | 566 | { |
511 | struct annotation *notes = symbol__annotation(sym); | 567 | struct annotation *notes = symbol__annotation(sym); |
512 | 568 | ||
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym) | |||
514 | if (symbol__alloc_hist(sym) < 0) | 570 | if (symbol__alloc_hist(sym) < 0) |
515 | return NULL; | 571 | return NULL; |
516 | } | 572 | } |
573 | if (!notes->src->cycles_hist && cycles) { | ||
574 | if (symbol__alloc_hist_cycles(sym) < 0) | ||
575 | return NULL; | ||
576 | } | ||
517 | return notes; | 577 | return notes; |
518 | } | 578 | } |
519 | 579 | ||
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, | |||
524 | 584 | ||
525 | if (sym == NULL) | 585 | if (sym == NULL) |
526 | return 0; | 586 | return 0; |
527 | notes = symbol__get_annotation(sym); | 587 | notes = symbol__get_annotation(sym, false); |
528 | if (notes == NULL) | 588 | if (notes == NULL) |
529 | return -ENOMEM; | 589 | return -ENOMEM; |
530 | return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); | 590 | return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); |
531 | } | 591 | } |
532 | 592 | ||
593 | static int symbol__account_cycles(u64 addr, u64 start, | ||
594 | struct symbol *sym, unsigned cycles) | ||
595 | { | ||
596 | struct annotation *notes; | ||
597 | unsigned offset; | ||
598 | |||
599 | if (sym == NULL) | ||
600 | return 0; | ||
601 | notes = symbol__get_annotation(sym, true); | ||
602 | if (notes == NULL) | ||
603 | return -ENOMEM; | ||
604 | if (addr < sym->start || addr >= sym->end) | ||
605 | return -ERANGE; | ||
606 | |||
607 | if (start) { | ||
608 | if (start < sym->start || start >= sym->end) | ||
609 | return -ERANGE; | ||
610 | if (start >= addr) | ||
611 | start = 0; | ||
612 | } | ||
613 | offset = addr - sym->start; | ||
614 | return __symbol__account_cycles(notes, | ||
615 | start ? start - sym->start : 0, | ||
616 | offset, cycles, | ||
617 | !!start); | ||
618 | } | ||
619 | |||
620 | int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, | ||
621 | struct addr_map_symbol *start, | ||
622 | unsigned cycles) | ||
623 | { | ||
624 | unsigned long saddr = 0; | ||
625 | int err; | ||
626 | |||
627 | if (!cycles) | ||
628 | return 0; | ||
629 | |||
630 | /* | ||
631 | * Only set start when IPC can be computed. We can only | ||
632 | * compute it when the basic block is completely in a single | ||
633 | * function. | ||
634 | * Special case the case when the jump is elsewhere, but | ||
635 | * it starts on the function start. | ||
636 | */ | ||
637 | if (start && | ||
638 | (start->sym == ams->sym || | ||
639 | (ams->sym && | ||
640 | start->addr == ams->sym->start + ams->map->start))) | ||
641 | saddr = start->al_addr; | ||
642 | if (saddr == 0) | ||
643 | pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n", | ||
644 | ams->addr, | ||
645 | start ? start->addr : 0, | ||
646 | ams->sym ? ams->sym->start + ams->map->start : 0, | ||
647 | saddr); | ||
648 | err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles); | ||
649 | if (err) | ||
650 | pr_debug2("account_cycles failed %d\n", err); | ||
651 | return err; | ||
652 | } | ||
653 | |||
533 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) | 654 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) |
534 | { | 655 | { |
535 | return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); | 656 | return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); |
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..e9996092a093 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h | |||
@@ -59,6 +59,8 @@ struct disasm_line { | |||
59 | char *name; | 59 | char *name; |
60 | struct ins *ins; | 60 | struct ins *ins; |
61 | int line_nr; | 61 | int line_nr; |
62 | float ipc; | ||
63 | u64 cycles; | ||
62 | struct ins_operands ops; | 64 | struct ins_operands ops; |
63 | }; | 65 | }; |
64 | 66 | ||
@@ -79,6 +81,17 @@ struct sym_hist { | |||
79 | u64 addr[0]; | 81 | u64 addr[0]; |
80 | }; | 82 | }; |
81 | 83 | ||
84 | struct cyc_hist { | ||
85 | u64 start; | ||
86 | u64 cycles; | ||
87 | u64 cycles_aggr; | ||
88 | u32 num; | ||
89 | u32 num_aggr; | ||
90 | u8 have_start; | ||
91 | /* 1 byte padding */ | ||
92 | u16 reset; | ||
93 | }; | ||
94 | |||
82 | struct source_line_samples { | 95 | struct source_line_samples { |
83 | double percent; | 96 | double percent; |
84 | double percent_sum; | 97 | double percent_sum; |
@@ -97,6 +110,7 @@ struct source_line { | |||
97 | * @histogram: Array of addr hit histograms per event being monitored | 110 | * @histogram: Array of addr hit histograms per event being monitored |
98 | * @lines: If 'print_lines' is specified, per source code line percentages | 111 | * @lines: If 'print_lines' is specified, per source code line percentages |
99 | * @source: source parsed from a disassembler like objdump -dS | 112 | * @source: source parsed from a disassembler like objdump -dS |
113 | * @cyc_hist: Average cycles per basic block | ||
100 | * | 114 | * |
101 | * lines is allocated, percentages calculated and all sorted by percentage | 115 | * lines is allocated, percentages calculated and all sorted by percentage |
102 | * when the annotation is about to be presented, so the percentages are for | 116 | * when the annotation is about to be presented, so the percentages are for |
@@ -109,6 +123,7 @@ struct annotated_source { | |||
109 | struct source_line *lines; | 123 | struct source_line *lines; |
110 | int nr_histograms; | 124 | int nr_histograms; |
111 | int sizeof_sym_hist; | 125 | int sizeof_sym_hist; |
126 | struct cyc_hist *cycles_hist; | ||
112 | struct sym_hist histograms[0]; | 127 | struct sym_hist histograms[0]; |
113 | }; | 128 | }; |
114 | 129 | ||
@@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) | |||
130 | 145 | ||
131 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); | 146 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); |
132 | 147 | ||
148 | int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, | ||
149 | struct addr_map_symbol *start, | ||
150 | unsigned cycles); | ||
151 | |||
133 | int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); | 152 | int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); |
134 | 153 | ||
135 | int symbol__alloc_hist(struct symbol *sym); | 154 | int symbol__alloc_hist(struct symbol *sym); |
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 83d9dd96fe08..a25b3609cef8 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c | |||
@@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
942 | struct itrace_synth_opts *synth_opts = opt->value; | 942 | struct itrace_synth_opts *synth_opts = opt->value; |
943 | const char *p; | 943 | const char *p; |
944 | char *endptr; | 944 | char *endptr; |
945 | bool period_type_set = false; | ||
945 | 946 | ||
946 | synth_opts->set = true; | 947 | synth_opts->set = true; |
947 | 948 | ||
@@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
970 | case 'i': | 971 | case 'i': |
971 | synth_opts->period_type = | 972 | synth_opts->period_type = |
972 | PERF_ITRACE_PERIOD_INSTRUCTIONS; | 973 | PERF_ITRACE_PERIOD_INSTRUCTIONS; |
974 | period_type_set = true; | ||
973 | break; | 975 | break; |
974 | case 't': | 976 | case 't': |
975 | synth_opts->period_type = | 977 | synth_opts->period_type = |
976 | PERF_ITRACE_PERIOD_TICKS; | 978 | PERF_ITRACE_PERIOD_TICKS; |
979 | period_type_set = true; | ||
977 | break; | 980 | break; |
978 | case 'm': | 981 | case 'm': |
979 | synth_opts->period *= 1000; | 982 | synth_opts->period *= 1000; |
@@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
986 | goto out_err; | 989 | goto out_err; |
987 | synth_opts->period_type = | 990 | synth_opts->period_type = |
988 | PERF_ITRACE_PERIOD_NANOSECS; | 991 | PERF_ITRACE_PERIOD_NANOSECS; |
992 | period_type_set = true; | ||
989 | break; | 993 | break; |
990 | case '\0': | 994 | case '\0': |
991 | goto out; | 995 | goto out; |
@@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
1039 | } | 1043 | } |
1040 | out: | 1044 | out: |
1041 | if (synth_opts->instructions) { | 1045 | if (synth_opts->instructions) { |
1042 | if (!synth_opts->period_type) | 1046 | if (!period_type_set) |
1043 | synth_opts->period_type = | 1047 | synth_opts->period_type = |
1044 | PERF_ITRACE_DEFAULT_PERIOD_TYPE; | 1048 | PERF_ITRACE_DEFAULT_PERIOD_TYPE; |
1045 | if (!synth_opts->period) | 1049 | if (!synth_opts->period) |
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2da5581ec74d..86d9c7302598 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c | |||
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) | |||
36 | return ret; | 36 | return ret; |
37 | } | 37 | } |
38 | 38 | ||
39 | int veprintf(int level, int var, const char *fmt, va_list args) | ||
40 | { | ||
41 | return _eprintf(level, var, fmt, args); | ||
42 | } | ||
43 | |||
39 | int eprintf(int level, int var, const char *fmt, ...) | 44 | int eprintf(int level, int var, const char *fmt, ...) |
40 | { | 45 | { |
41 | va_list args; | 46 | va_list args; |
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index caac2fdc6105..8b9a088c32ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h | |||
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...); | |||
50 | 50 | ||
51 | int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); | 51 | int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); |
52 | int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); | 52 | int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); |
53 | int veprintf(int level, int var, const char *fmt, va_list args); | ||
53 | 54 | ||
54 | int perf_debug_option(const char *str); | 55 | int perf_debug_option(const char *str); |
55 | 56 | ||
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 4bb2ae894c78..f729df5e25e6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -134,7 +134,8 @@ struct branch_flags { | |||
134 | u64 predicted:1; | 134 | u64 predicted:1; |
135 | u64 in_tx:1; | 135 | u64 in_tx:1; |
136 | u64 abort:1; | 136 | u64 abort:1; |
137 | u64 reserved:60; | 137 | u64 cycles:16; |
138 | u64 reserved:44; | ||
138 | }; | 139 | }; |
139 | 140 | ||
140 | struct branch_entry { | 141 | struct branch_entry { |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3b9f411a6b46..373f65b02545 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) | |||
1273 | return __perf_evlist__combined_sample_type(evlist); | 1273 | return __perf_evlist__combined_sample_type(evlist); |
1274 | } | 1274 | } |
1275 | 1275 | ||
1276 | u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) | ||
1277 | { | ||
1278 | struct perf_evsel *evsel; | ||
1279 | u64 branch_type = 0; | ||
1280 | |||
1281 | evlist__for_each(evlist, evsel) | ||
1282 | branch_type |= evsel->attr.branch_sample_type; | ||
1283 | return branch_type; | ||
1284 | } | ||
1285 | |||
1276 | bool perf_evlist__valid_read_format(struct perf_evlist *evlist) | 1286 | bool perf_evlist__valid_read_format(struct perf_evlist *evlist) |
1277 | { | 1287 | { |
1278 | struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; | 1288 | struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8930b68456b..397757063da1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
@@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist); | |||
165 | u64 perf_evlist__read_format(struct perf_evlist *evlist); | 165 | u64 perf_evlist__read_format(struct perf_evlist *evlist); |
166 | u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); | 166 | u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); |
167 | u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); | 167 | u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); |
168 | u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist); | ||
168 | bool perf_evlist__sample_id_all(struct perf_evlist *evlist); | 169 | bool perf_evlist__sample_id_all(struct perf_evlist *evlist); |
169 | u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); | 170 | u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); |
170 | 171 | ||
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f28d53d4e46..a6e9ddd37913 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a | |||
618 | * and not events sampled. Thus we use a pseudo period of 1. | 618 | * and not events sampled. Thus we use a pseudo period of 1. |
619 | */ | 619 | */ |
620 | he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, | 620 | he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, |
621 | 1, 1, 0, true); | 621 | 1, bi->flags.cycles ? bi->flags.cycles : 1, |
622 | 0, true); | ||
622 | if (he == NULL) | 623 | if (he == NULL) |
623 | return -ENOMEM; | 624 | return -ENOMEM; |
624 | 625 | ||
@@ -1414,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other) | |||
1414 | return 0; | 1415 | return 0; |
1415 | } | 1416 | } |
1416 | 1417 | ||
1418 | void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, | ||
1419 | struct perf_sample *sample, bool nonany_branch_mode) | ||
1420 | { | ||
1421 | struct branch_info *bi; | ||
1422 | |||
1423 | /* If we have branch cycles always annotate them. */ | ||
1424 | if (bs && bs->nr && bs->entries[0].flags.cycles) { | ||
1425 | int i; | ||
1426 | |||
1427 | bi = sample__resolve_bstack(sample, al); | ||
1428 | if (bi) { | ||
1429 | struct addr_map_symbol *prev = NULL; | ||
1430 | |||
1431 | /* | ||
1432 | * Ignore errors, still want to process the | ||
1433 | * other entries. | ||
1434 | * | ||
1435 | * For non standard branch modes always | ||
1436 | * force no IPC (prev == NULL) | ||
1437 | * | ||
1438 | * Note that perf stores branches reversed from | ||
1439 | * program order! | ||
1440 | */ | ||
1441 | for (i = bs->nr - 1; i >= 0; i--) { | ||
1442 | addr_map_symbol__account_cycles(&bi[i].from, | ||
1443 | nonany_branch_mode ? NULL : prev, | ||
1444 | bi[i].flags.cycles); | ||
1445 | prev = &bi[i].to; | ||
1446 | } | ||
1447 | free(bi); | ||
1448 | } | ||
1449 | } | ||
1450 | } | ||
1417 | 1451 | ||
1418 | size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) | 1452 | size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) |
1419 | { | 1453 | { |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5ed8d9c22981..e2f712f85d2e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -47,6 +47,7 @@ enum hist_column { | |||
47 | HISTC_MEM_SNOOP, | 47 | HISTC_MEM_SNOOP, |
48 | HISTC_MEM_DCACHELINE, | 48 | HISTC_MEM_DCACHELINE, |
49 | HISTC_TRANSACTION, | 49 | HISTC_TRANSACTION, |
50 | HISTC_CYCLES, | ||
50 | HISTC_NR_COLS, /* Last entry */ | 51 | HISTC_NR_COLS, /* Last entry */ |
51 | }; | 52 | }; |
52 | 53 | ||
@@ -349,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) | |||
349 | 350 | ||
350 | unsigned int hists__sort_list_width(struct hists *hists); | 351 | unsigned int hists__sort_list_width(struct hists *hists); |
351 | 352 | ||
353 | void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, | ||
354 | struct perf_sample *sample, bool nonany_branch_mode); | ||
355 | |||
352 | struct option; | 356 | struct option; |
353 | int parse_filter_percentage(const struct option *opt __maybe_unused, | 357 | int parse_filter_percentage(const struct option *opt __maybe_unused, |
354 | const char *arg, int unset __maybe_unused); | 358 | const char *arg, int unset __maybe_unused); |
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a6cb9afc20e2..828936dc3f1e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err, | |||
1168 | * Maximum error index indent, we will cut | 1168 | * Maximum error index indent, we will cut |
1169 | * the event string if it's bigger. | 1169 | * the event string if it's bigger. |
1170 | */ | 1170 | */ |
1171 | int max_err_idx = 10; | 1171 | int max_err_idx = 13; |
1172 | 1172 | ||
1173 | /* | 1173 | /* |
1174 | * Let's be specific with the message when | 1174 | * Let's be specific with the message when |
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b615cdf211d6..d4b0e6454bc6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c | |||
@@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name) | |||
542 | } | 542 | } |
543 | 543 | ||
544 | static struct perf_pmu_format * | 544 | static struct perf_pmu_format * |
545 | pmu_find_format(struct list_head *formats, char *name) | 545 | pmu_find_format(struct list_head *formats, const char *name) |
546 | { | 546 | { |
547 | struct perf_pmu_format *format; | 547 | struct perf_pmu_format *format; |
548 | 548 | ||
@@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name) | |||
553 | return NULL; | 553 | return NULL; |
554 | } | 554 | } |
555 | 555 | ||
556 | __u64 perf_pmu__format_bits(struct list_head *formats, const char *name) | ||
557 | { | ||
558 | struct perf_pmu_format *format = pmu_find_format(formats, name); | ||
559 | __u64 bits = 0; | ||
560 | int fbit; | ||
561 | |||
562 | if (!format) | ||
563 | return 0; | ||
564 | |||
565 | for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS) | ||
566 | bits |= 1ULL << fbit; | ||
567 | |||
568 | return bits; | ||
569 | } | ||
570 | |||
556 | /* | 571 | /* |
557 | * Sets value based on the format definition (format parameter) | 572 | * Sets value based on the format definition (format parameter) |
558 | * and unformated value (value parameter). | 573 | * and unformated value (value parameter). |
@@ -574,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, | |||
574 | } | 589 | } |
575 | } | 590 | } |
576 | 591 | ||
592 | static __u64 pmu_format_max_value(const unsigned long *format) | ||
593 | { | ||
594 | int w; | ||
595 | |||
596 | w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); | ||
597 | if (!w) | ||
598 | return 0; | ||
599 | if (w < 64) | ||
600 | return (1ULL << w) - 1; | ||
601 | return -1; | ||
602 | } | ||
603 | |||
577 | /* | 604 | /* |
578 | * Term is a string term, and might be a param-term. Try to look up it's value | 605 | * Term is a string term, and might be a param-term. Try to look up it's value |
579 | * in the remaining terms. | 606 | * in the remaining terms. |
@@ -647,7 +674,7 @@ static int pmu_config_term(struct list_head *formats, | |||
647 | { | 674 | { |
648 | struct perf_pmu_format *format; | 675 | struct perf_pmu_format *format; |
649 | __u64 *vp; | 676 | __u64 *vp; |
650 | __u64 val; | 677 | __u64 val, max_val; |
651 | 678 | ||
652 | /* | 679 | /* |
653 | * If this is a parameter we've already used for parameterized-eval, | 680 | * If this is a parameter we've already used for parameterized-eval, |
@@ -713,6 +740,22 @@ static int pmu_config_term(struct list_head *formats, | |||
713 | } else | 740 | } else |
714 | return -EINVAL; | 741 | return -EINVAL; |
715 | 742 | ||
743 | max_val = pmu_format_max_value(format->bits); | ||
744 | if (val > max_val) { | ||
745 | if (err) { | ||
746 | err->idx = term->err_val; | ||
747 | if (asprintf(&err->str, | ||
748 | "value too big for format, maximum is %llu", | ||
749 | (unsigned long long)max_val) < 0) | ||
750 | err->str = strdup("value too big for format"); | ||
751 | return -EINVAL; | ||
752 | } | ||
753 | /* | ||
754 | * Assume we don't care if !err, in which case the value will be | ||
755 | * silently truncated. | ||
756 | */ | ||
757 | } | ||
758 | |||
716 | pmu_format_value(format->bits, val, vp, zero); | 759 | pmu_format_value(format->bits, val, vp, zero); |
717 | return 0; | 760 | return 0; |
718 | } | 761 | } |
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 7b9c8cf8ae3e..5d7e84466bee 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h | |||
@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats, | |||
54 | struct perf_event_attr *attr, | 54 | struct perf_event_attr *attr, |
55 | struct list_head *head_terms, | 55 | struct list_head *head_terms, |
56 | bool zero, struct parse_events_error *error); | 56 | bool zero, struct parse_events_error *error); |
57 | __u64 perf_pmu__format_bits(struct list_head *formats, const char *name); | ||
57 | int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, | 58 | int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, |
58 | struct perf_pmu_info *info); | 59 | struct perf_pmu_info *info); |
59 | struct list_head *perf_pmu__alias(struct perf_pmu *pmu, | 60 | struct list_head *perf_pmu__alias(struct perf_pmu *pmu, |
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 20f555d1ae1c..83ee95e9743b 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h | |||
@@ -106,6 +106,8 @@ struct variable_list { | |||
106 | struct strlist *vars; /* Available variables */ | 106 | struct strlist *vars; /* Available variables */ |
107 | }; | 107 | }; |
108 | 108 | ||
109 | struct map; | ||
110 | |||
109 | /* Command string to events */ | 111 | /* Command string to events */ |
110 | extern int parse_perf_probe_command(const char *cmd, | 112 | extern int parse_perf_probe_command(const char *cmd, |
111 | struct perf_probe_event *pev); | 113 | struct perf_probe_event *pev); |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f51eb54aeeb3..18722e774a69 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample) | |||
784 | 784 | ||
785 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); | 785 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); |
786 | 786 | ||
787 | for (i = 0; i < sample->branch_stack->nr; i++) | 787 | for (i = 0; i < sample->branch_stack->nr; i++) { |
788 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", | 788 | struct branch_entry *e = &sample->branch_stack->entries[i]; |
789 | i, sample->branch_stack->entries[i].from, | 789 | |
790 | sample->branch_stack->entries[i].to); | 790 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", |
791 | i, e->from, e->to, | ||
792 | e->flags.cycles, | ||
793 | e->flags.mispred ? "M" : " ", | ||
794 | e->flags.predicted ? "P" : " ", | ||
795 | e->flags.abort ? "A" : " ", | ||
796 | e->flags.in_tx ? "T" : " ", | ||
797 | (unsigned)e->flags.reserved); | ||
798 | } | ||
791 | } | 799 | } |
792 | 800 | ||
793 | static void regs_dump__printf(u64 mask, u64 *regs) | 801 | static void regs_dump__printf(u64 mask, u64 *regs) |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4c65a143a34c..5177088a71d3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -9,7 +9,7 @@ regex_t parent_regex; | |||
9 | const char default_parent_pattern[] = "^sys_|^do_page_fault"; | 9 | const char default_parent_pattern[] = "^sys_|^do_page_fault"; |
10 | const char *parent_pattern = default_parent_pattern; | 10 | const char *parent_pattern = default_parent_pattern; |
11 | const char default_sort_order[] = "comm,dso,symbol"; | 11 | const char default_sort_order[] = "comm,dso,symbol"; |
12 | const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; | 12 | const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; |
13 | const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; | 13 | const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; |
14 | const char default_top_sort_order[] = "dso,symbol"; | 14 | const char default_top_sort_order[] = "dso,symbol"; |
15 | const char default_diff_sort_order[] = "dso,symbol"; | 15 | const char default_diff_sort_order[] = "dso,symbol"; |
@@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, | |||
526 | return repsep_snprintf(bf, size, "%-*.*s", width, width, out); | 526 | return repsep_snprintf(bf, size, "%-*.*s", width, width, out); |
527 | } | 527 | } |
528 | 528 | ||
529 | static int64_t | ||
530 | sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) | ||
531 | { | ||
532 | return left->branch_info->flags.cycles - | ||
533 | right->branch_info->flags.cycles; | ||
534 | } | ||
535 | |||
536 | static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, | ||
537 | size_t size, unsigned int width) | ||
538 | { | ||
539 | if (he->branch_info->flags.cycles == 0) | ||
540 | return repsep_snprintf(bf, size, "%-*s", width, "-"); | ||
541 | return repsep_snprintf(bf, size, "%-*hd", width, | ||
542 | he->branch_info->flags.cycles); | ||
543 | } | ||
544 | |||
545 | struct sort_entry sort_cycles = { | ||
546 | .se_header = "Basic Block Cycles", | ||
547 | .se_cmp = sort__cycles_cmp, | ||
548 | .se_snprintf = hist_entry__cycles_snprintf, | ||
549 | .se_width_idx = HISTC_CYCLES, | ||
550 | }; | ||
551 | |||
529 | /* --sort daddr_sym */ | 552 | /* --sort daddr_sym */ |
530 | static int64_t | 553 | static int64_t |
531 | sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) | 554 | sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) |
@@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { | |||
1190 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), | 1213 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), |
1191 | DIM(SORT_IN_TX, "in_tx", sort_in_tx), | 1214 | DIM(SORT_IN_TX, "in_tx", sort_in_tx), |
1192 | DIM(SORT_ABORT, "abort", sort_abort), | 1215 | DIM(SORT_ABORT, "abort", sort_abort), |
1216 | DIM(SORT_CYCLES, "cycles", sort_cycles), | ||
1193 | }; | 1217 | }; |
1194 | 1218 | ||
1195 | #undef DIM | 1219 | #undef DIM |
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e97cd476d336..bc6c87a76d16 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -185,6 +185,7 @@ enum sort_type { | |||
185 | SORT_MISPREDICT, | 185 | SORT_MISPREDICT, |
186 | SORT_ABORT, | 186 | SORT_ABORT, |
187 | SORT_IN_TX, | 187 | SORT_IN_TX, |
188 | SORT_CYCLES, | ||
188 | 189 | ||
189 | /* memory mode specific sort keys */ | 190 | /* memory mode specific sort keys */ |
190 | __SORT_MEMORY_MODE, | 191 | __SORT_MEMORY_MODE, |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index f2a0d1521e26..c5c709cdc3ce 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) | |||
238 | perf_evsel__reset_counts(evsel); | 238 | perf_evsel__reset_counts(evsel); |
239 | } | 239 | } |
240 | } | 240 | } |
241 | |||
242 | static void zero_per_pkg(struct perf_evsel *counter) | ||
243 | { | ||
244 | if (counter->per_pkg_mask) | ||
245 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | ||
246 | } | ||
247 | |||
248 | static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | ||
249 | { | ||
250 | unsigned long *mask = counter->per_pkg_mask; | ||
251 | struct cpu_map *cpus = perf_evsel__cpus(counter); | ||
252 | int s; | ||
253 | |||
254 | *skip = false; | ||
255 | |||
256 | if (!counter->per_pkg) | ||
257 | return 0; | ||
258 | |||
259 | if (cpu_map__empty(cpus)) | ||
260 | return 0; | ||
261 | |||
262 | if (!mask) { | ||
263 | mask = zalloc(MAX_NR_CPUS); | ||
264 | if (!mask) | ||
265 | return -ENOMEM; | ||
266 | |||
267 | counter->per_pkg_mask = mask; | ||
268 | } | ||
269 | |||
270 | s = cpu_map__get_socket(cpus, cpu); | ||
271 | if (s < 0) | ||
272 | return -1; | ||
273 | |||
274 | *skip = test_and_set_bit(s, mask) == 1; | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | static int | ||
279 | process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, | ||
280 | int cpu, int thread, | ||
281 | struct perf_counts_values *count) | ||
282 | { | ||
283 | struct perf_counts_values *aggr = &evsel->counts->aggr; | ||
284 | static struct perf_counts_values zero; | ||
285 | bool skip = false; | ||
286 | |||
287 | if (check_per_pkg(evsel, cpu, &skip)) { | ||
288 | pr_err("failed to read per-pkg counter\n"); | ||
289 | return -1; | ||
290 | } | ||
291 | |||
292 | if (skip) | ||
293 | count = &zero; | ||
294 | |||
295 | switch (config->aggr_mode) { | ||
296 | case AGGR_THREAD: | ||
297 | case AGGR_CORE: | ||
298 | case AGGR_SOCKET: | ||
299 | case AGGR_NONE: | ||
300 | if (!evsel->snapshot) | ||
301 | perf_evsel__compute_deltas(evsel, cpu, thread, count); | ||
302 | perf_counts_values__scale(count, config->scale, NULL); | ||
303 | if (config->aggr_mode == AGGR_NONE) | ||
304 | perf_stat__update_shadow_stats(evsel, count->values, cpu); | ||
305 | break; | ||
306 | case AGGR_GLOBAL: | ||
307 | aggr->val += count->val; | ||
308 | if (config->scale) { | ||
309 | aggr->ena += count->ena; | ||
310 | aggr->run += count->run; | ||
311 | } | ||
312 | default: | ||
313 | break; | ||
314 | } | ||
315 | |||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static int process_counter_maps(struct perf_stat_config *config, | ||
320 | struct perf_evsel *counter) | ||
321 | { | ||
322 | int nthreads = thread_map__nr(counter->threads); | ||
323 | int ncpus = perf_evsel__nr_cpus(counter); | ||
324 | int cpu, thread; | ||
325 | |||
326 | if (counter->system_wide) | ||
327 | nthreads = 1; | ||
328 | |||
329 | for (thread = 0; thread < nthreads; thread++) { | ||
330 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
331 | if (process_counter_values(config, counter, cpu, thread, | ||
332 | perf_counts(counter->counts, cpu, thread))) | ||
333 | return -1; | ||
334 | } | ||
335 | } | ||
336 | |||
337 | return 0; | ||
338 | } | ||
339 | |||
340 | int perf_stat_process_counter(struct perf_stat_config *config, | ||
341 | struct perf_evsel *counter) | ||
342 | { | ||
343 | struct perf_counts_values *aggr = &counter->counts->aggr; | ||
344 | struct perf_stat *ps = counter->priv; | ||
345 | u64 *count = counter->counts->aggr.values; | ||
346 | int i, ret; | ||
347 | |||
348 | aggr->val = aggr->ena = aggr->run = 0; | ||
349 | init_stats(ps->res_stats); | ||
350 | |||
351 | if (counter->per_pkg) | ||
352 | zero_per_pkg(counter); | ||
353 | |||
354 | ret = process_counter_maps(config, counter); | ||
355 | if (ret) | ||
356 | return ret; | ||
357 | |||
358 | if (config->aggr_mode != AGGR_GLOBAL) | ||
359 | return 0; | ||
360 | |||
361 | if (!counter->snapshot) | ||
362 | perf_evsel__compute_deltas(counter, -1, -1, aggr); | ||
363 | perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); | ||
364 | |||
365 | for (i = 0; i < 3; i++) | ||
366 | update_stats(&ps->res_stats[i], count[i]); | ||
367 | |||
368 | if (verbose) { | ||
369 | fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", | ||
370 | perf_evsel__name(counter), count[0], count[1], count[2]); | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Save the full runtime - to allow normalization during printout: | ||
375 | */ | ||
376 | perf_stat__update_shadow_stats(counter, count, 0); | ||
377 | |||
378 | return 0; | ||
379 | } | ||
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1cfbe0a980ac..0b897b083682 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -50,6 +50,13 @@ struct perf_counts { | |||
50 | struct xyarray *values; | 50 | struct xyarray *values; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | struct perf_stat_config { | ||
54 | enum aggr_mode aggr_mode; | ||
55 | bool scale; | ||
56 | FILE *output; | ||
57 | unsigned int interval; | ||
58 | }; | ||
59 | |||
53 | static inline struct perf_counts_values* | 60 | static inline struct perf_counts_values* |
54 | perf_counts(struct perf_counts *counts, int cpu, int thread) | 61 | perf_counts(struct perf_counts *counts, int cpu, int thread) |
55 | { | 62 | { |
@@ -109,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); | |||
109 | int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); | 116 | int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); |
110 | void perf_evlist__free_stats(struct perf_evlist *evlist); | 117 | void perf_evlist__free_stats(struct perf_evlist *evlist); |
111 | void perf_evlist__reset_stats(struct perf_evlist *evlist); | 118 | void perf_evlist__reset_stats(struct perf_evlist *evlist); |
119 | |||
120 | int perf_stat_process_counter(struct perf_stat_config *config, | ||
121 | struct perf_evsel *counter); | ||
112 | #endif | 122 | #endif |