aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-report.txt1
-rw-r--r--tools/perf/Documentation/perf-top.txt21
-rw-r--r--tools/perf/arch/alpha/Build1
-rw-r--r--tools/perf/arch/mips/Build1
-rw-r--r--tools/perf/arch/parisc/Build1
-rw-r--r--tools/perf/arch/xtensa/Build1
-rw-r--r--tools/perf/arch/xtensa/Makefile3
-rw-r--r--tools/perf/arch/xtensa/util/Build1
-rw-r--r--tools/perf/arch/xtensa/util/dwarf-regs.c25
-rw-r--r--tools/perf/builtin-annotate.c1
-rw-r--r--tools/perf/builtin-report.c10
-rw-r--r--tools/perf/builtin-stat.c224
-rw-r--r--tools/perf/builtin-top.c9
-rw-r--r--tools/perf/builtin-trace.c10
-rw-r--r--tools/perf/perf-with-kcore.sh28
-rw-r--r--tools/perf/ui/browsers/annotate.c130
-rw-r--r--tools/perf/util/annotate.c127
-rw-r--r--tools/perf/util/annotate.h19
-rw-r--r--tools/perf/util/auxtrace.c6
-rw-r--r--tools/perf/util/debug.c5
-rw-r--r--tools/perf/util/debug.h1
-rw-r--r--tools/perf/util/event.h3
-rw-r--r--tools/perf/util/evlist.c10
-rw-r--r--tools/perf/util/evlist.h1
-rw-r--r--tools/perf/util/hist.c36
-rw-r--r--tools/perf/util/hist.h4
-rw-r--r--tools/perf/util/parse-events.c2
-rw-r--r--tools/perf/util/pmu.c47
-rw-r--r--tools/perf/util/pmu.h1
-rw-r--r--tools/perf/util/probe-event.h2
-rw-r--r--tools/perf/util/session.c16
-rw-r--r--tools/perf/util/sort.c26
-rw-r--r--tools/perf/util/sort.h1
-rw-r--r--tools/perf/util/stat.c139
-rw-r--r--tools/perf/util/stat.h10
35 files changed, 702 insertions, 221 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index c33b69f3374f..960da203ec11 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -109,6 +109,7 @@ OPTIONS
109 - mispredict: "N" for predicted branch, "Y" for mispredicted branch 109 - mispredict: "N" for predicted branch, "Y" for mispredicted branch
110 - in_tx: branch in TSX transaction 110 - in_tx: branch in TSX transaction
111 - abort: TSX transaction abort. 111 - abort: TSX transaction abort.
112 - cycles: Cycles in basic block
112 113
113 And default sort keys are changed to comm, dso_from, symbol_from, dso_to 114 And default sort keys are changed to comm, dso_from, symbol_from, dso_to
114 and symbol_to, see '--branch-stack'. 115 and symbol_to, see '--branch-stack'.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 776aec4d0927..f6a23eb294e7 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -208,6 +208,27 @@ Default is to monitor all CPUS.
208 This option sets the time out limit. The default value is 500 ms. 208 This option sets the time out limit. The default value is 500 ms.
209 209
210 210
211-b::
212--branch-any::
213 Enable taken branch stack sampling. Any type of taken branch may be sampled.
214 This is a shortcut for --branch-filter any. See --branch-filter for more infos.
215
216-j::
217--branch-filter::
218 Enable taken branch stack sampling. Each sample captures a series of consecutive
219 taken branches. The number of branches captured with each sample depends on the
220 underlying hardware, the type of branches of interest, and the executed code.
221 It is possible to select the types of branches captured by enabling filters.
222 For a full list of modifiers please see the perf record manpage.
223
224 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
225 The privilege levels may be omitted, in which case, the privilege levels of the associated
226 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
227 levels are subject to permissions. When sampling on multiple events, branch stack sampling
228 is enabled for all the sampling events. The sampled branch type is the same for all events.
229 The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
230 Note that this feature may not be available on all processors.
231
211INTERACTIVE PROMPTING KEYS 232INTERACTIVE PROMPTING KEYS
212-------------------------- 233--------------------------
213 234
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/alpha/Build
@@ -0,0 +1 @@
# empty
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/mips/Build
@@ -0,0 +1 @@
# empty
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/parisc/Build
@@ -0,0 +1 @@
# empty
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644
index 000000000000..54afe4a467e7
--- /dev/null
+++ b/tools/perf/arch/xtensa/Build
@@ -0,0 +1 @@
libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644
index 000000000000..7fbca175099e
--- /dev/null
+++ b/tools/perf/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
1ifndef NO_DWARF
2PERF_HAVE_DWARF_REGS := 1
3endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644
index 000000000000..954e287bbb89
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/Build
@@ -0,0 +1 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 000000000000..4dba76bfb4ce
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
1/*
2 * Mapping of DWARF debug register numbers into register names.
3 *
4 * Copyright (c) 2015 Cadence Design Systems Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <stddef.h>
13#include <dwarf-regs.h>
14
15#define XTENSA_MAX_REGS 16
16
17const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
18 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
19 "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
20};
21
22const char *get_arch_regstr(unsigned int n)
23{
24 return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
25}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 2c1bec39c30e..467a23b14e2f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -187,6 +187,7 @@ find_next:
187 * symbol, free he->ms.sym->src to signal we already 187 * symbol, free he->ms.sym->src to signal we already
188 * processed this symbol. 188 * processed this symbol.
189 */ 189 */
190 zfree(&notes->src->cycles_hist);
190 zfree(&notes->src); 191 zfree(&notes->src);
191 } 192 }
192 } 193 }
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 95a47719aec3..3a9d1b659fcd 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,7 @@ struct report {
53 bool mem_mode; 53 bool mem_mode;
54 bool header; 54 bool header;
55 bool header_only; 55 bool header_only;
56 bool nonany_branch_mode;
56 int max_stack; 57 int max_stack;
57 struct perf_read_values show_threads_values; 58 struct perf_read_values show_threads_values;
58 const char *pretty_printing_style; 59 const char *pretty_printing_style;
@@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
102 if (!ui__has_annotation()) 103 if (!ui__has_annotation())
103 return 0; 104 return 0;
104 105
106 hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
107 rep->nonany_branch_mode);
108
105 if (sort__mode == SORT_MODE__BRANCH) { 109 if (sort__mode == SORT_MODE__BRANCH) {
106 bi = he->branch_info; 110 bi = he->branch_info;
107 err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); 111 err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
@@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep)
258 else 262 else
259 callchain_param.record_mode = CALLCHAIN_FP; 263 callchain_param.record_mode = CALLCHAIN_FP;
260 } 264 }
265
266 /* ??? handle more cases than just ANY? */
267 if (!(perf_evlist__combined_branch_type(session->evlist) &
268 PERF_SAMPLE_BRANCH_ANY))
269 rep->nonany_branch_mode = true;
270
261 return 0; 271 return 0;
262} 272}
263 273
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d99d850e1444..a054ddc0b2a0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -101,8 +101,6 @@ static struct target target = {
101 101
102static int run_count = 1; 102static int run_count = 1;
103static bool no_inherit = false; 103static bool no_inherit = false;
104static bool scale = true;
105static enum aggr_mode aggr_mode = AGGR_GLOBAL;
106static volatile pid_t child_pid = -1; 104static volatile pid_t child_pid = -1;
107static bool null_run = false; 105static bool null_run = false;
108static int detailed_run = 0; 106static int detailed_run = 0;
@@ -112,11 +110,9 @@ static int big_num_opt = -1;
112static const char *csv_sep = NULL; 110static const char *csv_sep = NULL;
113static bool csv_output = false; 111static bool csv_output = false;
114static bool group = false; 112static bool group = false;
115static FILE *output = NULL;
116static const char *pre_cmd = NULL; 113static const char *pre_cmd = NULL;
117static const char *post_cmd = NULL; 114static const char *post_cmd = NULL;
118static bool sync_run = false; 115static bool sync_run = false;
119static unsigned int interval = 0;
120static unsigned int initial_delay = 0; 116static unsigned int initial_delay = 0;
121static unsigned int unit_width = 4; /* strlen("unit") */ 117static unsigned int unit_width = 4; /* strlen("unit") */
122static bool forever = false; 118static bool forever = false;
@@ -126,6 +122,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu);
126 122
127static volatile int done = 0; 123static volatile int done = 0;
128 124
125static struct perf_stat_config stat_config = {
126 .aggr_mode = AGGR_GLOBAL,
127 .scale = true,
128};
129
129static inline void diff_timespec(struct timespec *r, struct timespec *a, 130static inline void diff_timespec(struct timespec *r, struct timespec *a,
130 struct timespec *b) 131 struct timespec *b)
131{ 132{
@@ -148,7 +149,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
148{ 149{
149 struct perf_event_attr *attr = &evsel->attr; 150 struct perf_event_attr *attr = &evsel->attr;
150 151
151 if (scale) 152 if (stat_config.scale)
152 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 153 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
153 PERF_FORMAT_TOTAL_TIME_RUNNING; 154 PERF_FORMAT_TOTAL_TIME_RUNNING;
154 155
@@ -178,142 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
178 return 0; 179 return 0;
179} 180}
180 181
181static void zero_per_pkg(struct perf_evsel *counter)
182{
183 if (counter->per_pkg_mask)
184 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
185}
186
187static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
188{
189 unsigned long *mask = counter->per_pkg_mask;
190 struct cpu_map *cpus = perf_evsel__cpus(counter);
191 int s;
192
193 *skip = false;
194
195 if (!counter->per_pkg)
196 return 0;
197
198 if (cpu_map__empty(cpus))
199 return 0;
200
201 if (!mask) {
202 mask = zalloc(MAX_NR_CPUS);
203 if (!mask)
204 return -ENOMEM;
205
206 counter->per_pkg_mask = mask;
207 }
208
209 s = cpu_map__get_socket(cpus, cpu);
210 if (s < 0)
211 return -1;
212
213 *skip = test_and_set_bit(s, mask) == 1;
214 return 0;
215}
216
217static int
218process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
219 struct perf_counts_values *count)
220{
221 struct perf_counts_values *aggr = &evsel->counts->aggr;
222 static struct perf_counts_values zero;
223 bool skip = false;
224
225 if (check_per_pkg(evsel, cpu, &skip)) {
226 pr_err("failed to read per-pkg counter\n");
227 return -1;
228 }
229
230 if (skip)
231 count = &zero;
232
233 switch (aggr_mode) {
234 case AGGR_THREAD:
235 case AGGR_CORE:
236 case AGGR_SOCKET:
237 case AGGR_NONE:
238 if (!evsel->snapshot)
239 perf_evsel__compute_deltas(evsel, cpu, thread, count);
240 perf_counts_values__scale(count, scale, NULL);
241 if (aggr_mode == AGGR_NONE)
242 perf_stat__update_shadow_stats(evsel, count->values, cpu);
243 break;
244 case AGGR_GLOBAL:
245 aggr->val += count->val;
246 if (scale) {
247 aggr->ena += count->ena;
248 aggr->run += count->run;
249 }
250 default:
251 break;
252 }
253
254 return 0;
255}
256
257static int process_counter_maps(struct perf_evsel *counter)
258{
259 int nthreads = thread_map__nr(counter->threads);
260 int ncpus = perf_evsel__nr_cpus(counter);
261 int cpu, thread;
262
263 if (counter->system_wide)
264 nthreads = 1;
265
266 for (thread = 0; thread < nthreads; thread++) {
267 for (cpu = 0; cpu < ncpus; cpu++) {
268 if (process_counter_values(counter, cpu, thread,
269 perf_counts(counter->counts, cpu, thread)))
270 return -1;
271 }
272 }
273
274 return 0;
275}
276
277static int process_counter(struct perf_evsel *counter)
278{
279 struct perf_counts_values *aggr = &counter->counts->aggr;
280 struct perf_stat *ps = counter->priv;
281 u64 *count = counter->counts->aggr.values;
282 int i, ret;
283
284 aggr->val = aggr->ena = aggr->run = 0;
285 init_stats(ps->res_stats);
286
287 if (counter->per_pkg)
288 zero_per_pkg(counter);
289
290 ret = process_counter_maps(counter);
291 if (ret)
292 return ret;
293
294 if (aggr_mode != AGGR_GLOBAL)
295 return 0;
296
297 if (!counter->snapshot)
298 perf_evsel__compute_deltas(counter, -1, -1, aggr);
299 perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
300
301 for (i = 0; i < 3; i++)
302 update_stats(&ps->res_stats[i], count[i]);
303
304 if (verbose) {
305 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
306 perf_evsel__name(counter), count[0], count[1], count[2]);
307 }
308
309 /*
310 * Save the full runtime - to allow normalization during printout:
311 */
312 perf_stat__update_shadow_stats(counter, count, 0);
313
314 return 0;
315}
316
317/* 182/*
318 * Read out the results of a single counter: 183 * Read out the results of a single counter:
319 * do not aggregate counts across CPUs in system-wide mode 184 * do not aggregate counts across CPUs in system-wide mode
@@ -351,7 +216,7 @@ static void read_counters(bool close_counters)
351 if (read_counter(counter)) 216 if (read_counter(counter))
352 pr_warning("failed to read counter %s\n", counter->name); 217 pr_warning("failed to read counter %s\n", counter->name);
353 218
354 if (process_counter(counter)) 219 if (perf_stat_process_counter(&stat_config, counter))
355 pr_warning("failed to process counter %s\n", counter->name); 220 pr_warning("failed to process counter %s\n", counter->name);
356 221
357 if (close_counters) { 222 if (close_counters) {
@@ -402,6 +267,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
402 267
403static int __run_perf_stat(int argc, const char **argv) 268static int __run_perf_stat(int argc, const char **argv)
404{ 269{
270 int interval = stat_config.interval;
405 char msg[512]; 271 char msg[512];
406 unsigned long long t0, t1; 272 unsigned long long t0, t1;
407 struct perf_evsel *counter; 273 struct perf_evsel *counter;
@@ -545,13 +411,13 @@ static int run_perf_stat(int argc, const char **argv)
545static void print_running(u64 run, u64 ena) 411static void print_running(u64 run, u64 ena)
546{ 412{
547 if (csv_output) { 413 if (csv_output) {
548 fprintf(output, "%s%" PRIu64 "%s%.2f", 414 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
549 csv_sep, 415 csv_sep,
550 run, 416 run,
551 csv_sep, 417 csv_sep,
552 ena ? 100.0 * run / ena : 100.0); 418 ena ? 100.0 * run / ena : 100.0);
553 } else if (run != ena) { 419 } else if (run != ena) {
554 fprintf(output, " (%.2f%%)", 100.0 * run / ena); 420 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
555 } 421 }
556} 422}
557 423
@@ -560,9 +426,9 @@ static void print_noise_pct(double total, double avg)
560 double pct = rel_stddev_stats(total, avg); 426 double pct = rel_stddev_stats(total, avg);
561 427
562 if (csv_output) 428 if (csv_output)
563 fprintf(output, "%s%.2f%%", csv_sep, pct); 429 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
564 else if (pct) 430 else if (pct)
565 fprintf(output, " ( +-%6.2f%% )", pct); 431 fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
566} 432}
567 433
568static void print_noise(struct perf_evsel *evsel, double avg) 434static void print_noise(struct perf_evsel *evsel, double avg)
@@ -578,9 +444,9 @@ static void print_noise(struct perf_evsel *evsel, double avg)
578 444
579static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 445static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
580{ 446{
581 switch (aggr_mode) { 447 switch (stat_config.aggr_mode) {
582 case AGGR_CORE: 448 case AGGR_CORE:
583 fprintf(output, "S%d-C%*d%s%*d%s", 449 fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
584 cpu_map__id_to_socket(id), 450 cpu_map__id_to_socket(id),
585 csv_output ? 0 : -8, 451 csv_output ? 0 : -8,
586 cpu_map__id_to_cpu(id), 452 cpu_map__id_to_cpu(id),
@@ -590,7 +456,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
590 csv_sep); 456 csv_sep);
591 break; 457 break;
592 case AGGR_SOCKET: 458 case AGGR_SOCKET:
593 fprintf(output, "S%*d%s%*d%s", 459 fprintf(stat_config.output, "S%*d%s%*d%s",
594 csv_output ? 0 : -5, 460 csv_output ? 0 : -5,
595 id, 461 id,
596 csv_sep, 462 csv_sep,
@@ -599,12 +465,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
599 csv_sep); 465 csv_sep);
600 break; 466 break;
601 case AGGR_NONE: 467 case AGGR_NONE:
602 fprintf(output, "CPU%*d%s", 468 fprintf(stat_config.output, "CPU%*d%s",
603 csv_output ? 0 : -4, 469 csv_output ? 0 : -4,
604 perf_evsel__cpus(evsel)->map[id], csv_sep); 470 perf_evsel__cpus(evsel)->map[id], csv_sep);
605 break; 471 break;
606 case AGGR_THREAD: 472 case AGGR_THREAD:
607 fprintf(output, "%*s-%*d%s", 473 fprintf(stat_config.output, "%*s-%*d%s",
608 csv_output ? 0 : 16, 474 csv_output ? 0 : 16,
609 thread_map__comm(evsel->threads, id), 475 thread_map__comm(evsel->threads, id),
610 csv_output ? 0 : -8, 476 csv_output ? 0 : -8,
@@ -619,6 +485,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
619 485
620static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 486static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
621{ 487{
488 FILE *output = stat_config.output;
622 double msecs = avg / 1e6; 489 double msecs = avg / 1e6;
623 const char *fmt_v, *fmt_n; 490 const char *fmt_v, *fmt_n;
624 char name[25]; 491 char name[25];
@@ -643,7 +510,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
643 if (evsel->cgrp) 510 if (evsel->cgrp)
644 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 511 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
645 512
646 if (csv_output || interval) 513 if (csv_output || stat_config.interval)
647 return; 514 return;
648 515
649 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 516 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -655,6 +522,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
655 522
656static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 523static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
657{ 524{
525 FILE *output = stat_config.output;
658 double sc = evsel->scale; 526 double sc = evsel->scale;
659 const char *fmt; 527 const char *fmt;
660 int cpu = cpu_map__id_to_cpu(id); 528 int cpu = cpu_map__id_to_cpu(id);
@@ -670,7 +538,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
670 538
671 aggr_printout(evsel, id, nr); 539 aggr_printout(evsel, id, nr);
672 540
673 if (aggr_mode == AGGR_GLOBAL) 541 if (stat_config.aggr_mode == AGGR_GLOBAL)
674 cpu = 0; 542 cpu = 0;
675 543
676 fprintf(output, fmt, avg, csv_sep); 544 fprintf(output, fmt, avg, csv_sep);
@@ -685,14 +553,16 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
685 if (evsel->cgrp) 553 if (evsel->cgrp)
686 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 554 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
687 555
688 if (csv_output || interval) 556 if (csv_output || stat_config.interval)
689 return; 557 return;
690 558
691 perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); 559 perf_stat__print_shadow_stats(output, evsel, avg, cpu,
560 stat_config.aggr_mode);
692} 561}
693 562
694static void print_aggr(char *prefix) 563static void print_aggr(char *prefix)
695{ 564{
565 FILE *output = stat_config.output;
696 struct perf_evsel *counter; 566 struct perf_evsel *counter;
697 int cpu, cpu2, s, s2, id, nr; 567 int cpu, cpu2, s, s2, id, nr;
698 double uval; 568 double uval;
@@ -761,6 +631,7 @@ static void print_aggr(char *prefix)
761 631
762static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 632static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
763{ 633{
634 FILE *output = stat_config.output;
764 int nthreads = thread_map__nr(counter->threads); 635 int nthreads = thread_map__nr(counter->threads);
765 int ncpus = cpu_map__nr(counter->cpus); 636 int ncpus = cpu_map__nr(counter->cpus);
766 int cpu, thread; 637 int cpu, thread;
@@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
799 */ 670 */
800static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 671static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
801{ 672{
673 FILE *output = stat_config.output;
802 struct perf_stat *ps = counter->priv; 674 struct perf_stat *ps = counter->priv;
803 double avg = avg_stats(&ps->res_stats[0]); 675 double avg = avg_stats(&ps->res_stats[0]);
804 int scaled = counter->counts->scaled; 676 int scaled = counter->counts->scaled;
@@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
850 */ 722 */
851static void print_counter(struct perf_evsel *counter, char *prefix) 723static void print_counter(struct perf_evsel *counter, char *prefix)
852{ 724{
725 FILE *output = stat_config.output;
853 u64 ena, run, val; 726 u64 ena, run, val;
854 double uval; 727 double uval;
855 int cpu; 728 int cpu;
@@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
904 777
905static void print_interval(char *prefix, struct timespec *ts) 778static void print_interval(char *prefix, struct timespec *ts)
906{ 779{
780 FILE *output = stat_config.output;
907 static int num_print_interval; 781 static int num_print_interval;
908 782
909 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 783 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
910 784
911 if (num_print_interval == 0 && !csv_output) { 785 if (num_print_interval == 0 && !csv_output) {
912 switch (aggr_mode) { 786 switch (stat_config.aggr_mode) {
913 case AGGR_SOCKET: 787 case AGGR_SOCKET:
914 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 788 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
915 break; 789 break;
@@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts)
934 808
935static void print_header(int argc, const char **argv) 809static void print_header(int argc, const char **argv)
936{ 810{
811 FILE *output = stat_config.output;
937 int i; 812 int i;
938 813
939 fflush(stdout); 814 fflush(stdout);
@@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv)
963 838
964static void print_footer(void) 839static void print_footer(void)
965{ 840{
841 FILE *output = stat_config.output;
842
966 if (!null_run) 843 if (!null_run)
967 fprintf(output, "\n"); 844 fprintf(output, "\n");
968 fprintf(output, " %17.9f seconds time elapsed", 845 fprintf(output, " %17.9f seconds time elapsed",
@@ -977,6 +854,7 @@ static void print_footer(void)
977 854
978static void print_counters(struct timespec *ts, int argc, const char **argv) 855static void print_counters(struct timespec *ts, int argc, const char **argv)
979{ 856{
857 int interval = stat_config.interval;
980 struct perf_evsel *counter; 858 struct perf_evsel *counter;
981 char buf[64], *prefix = NULL; 859 char buf[64], *prefix = NULL;
982 860
@@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
985 else 863 else
986 print_header(argc, argv); 864 print_header(argc, argv);
987 865
988 switch (aggr_mode) { 866 switch (stat_config.aggr_mode) {
989 case AGGR_CORE: 867 case AGGR_CORE:
990 case AGGR_SOCKET: 868 case AGGR_SOCKET:
991 print_aggr(prefix); 869 print_aggr(prefix);
@@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
1009 if (!interval && !csv_output) 887 if (!interval && !csv_output)
1010 print_footer(); 888 print_footer();
1011 889
1012 fflush(output); 890 fflush(stat_config.output);
1013} 891}
1014 892
1015static volatile int signr = -1; 893static volatile int signr = -1;
1016 894
1017static void skip_signal(int signo) 895static void skip_signal(int signo)
1018{ 896{
1019 if ((child_pid == -1) || interval) 897 if ((child_pid == -1) || stat_config.interval)
1020 done = 1; 898 done = 1;
1021 899
1022 signr = signo; 900 signr = signo;
@@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
1064 942
1065static int perf_stat_init_aggr_mode(void) 943static int perf_stat_init_aggr_mode(void)
1066{ 944{
1067 switch (aggr_mode) { 945 switch (stat_config.aggr_mode) {
1068 case AGGR_SOCKET: 946 case AGGR_SOCKET:
1069 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 947 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1070 perror("cannot build socket map"); 948 perror("cannot build socket map");
@@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1270 "system-wide collection from all CPUs"), 1148 "system-wide collection from all CPUs"),
1271 OPT_BOOLEAN('g', "group", &group, 1149 OPT_BOOLEAN('g', "group", &group,
1272 "put the counters into a counter group"), 1150 "put the counters into a counter group"),
1273 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1151 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1274 OPT_INCR('v', "verbose", &verbose, 1152 OPT_INCR('v', "verbose", &verbose,
1275 "be more verbose (show counter open errors, etc)"), 1153 "be more verbose (show counter open errors, etc)"),
1276 OPT_INTEGER('r', "repeat", &run_count, 1154 OPT_INTEGER('r', "repeat", &run_count,
@@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1286 stat__set_big_num), 1164 stat__set_big_num),
1287 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1165 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1288 "list of cpus to monitor in system-wide"), 1166 "list of cpus to monitor in system-wide"),
1289 OPT_SET_UINT('A', "no-aggr", &aggr_mode, 1167 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1290 "disable CPU count aggregation", AGGR_NONE), 1168 "disable CPU count aggregation", AGGR_NONE),
1291 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1169 OPT_STRING('x', "field-separator", &csv_sep, "separator",
1292 "print counts with custom separator"), 1170 "print counts with custom separator"),
@@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1300 "command to run prior to the measured command"), 1178 "command to run prior to the measured command"),
1301 OPT_STRING(0, "post", &post_cmd, "command", 1179 OPT_STRING(0, "post", &post_cmd, "command",
1302 "command to run after to the measured command"), 1180 "command to run after to the measured command"),
1303 OPT_UINTEGER('I', "interval-print", &interval, 1181 OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1304 "print counts at regular interval in ms (>= 100)"), 1182 "print counts at regular interval in ms (>= 100)"),
1305 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1183 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1306 "aggregate counts per processor socket", AGGR_SOCKET), 1184 "aggregate counts per processor socket", AGGR_SOCKET),
1307 OPT_SET_UINT(0, "per-core", &aggr_mode, 1185 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1308 "aggregate counts per physical processor core", AGGR_CORE), 1186 "aggregate counts per physical processor core", AGGR_CORE),
1309 OPT_SET_UINT(0, "per-thread", &aggr_mode, 1187 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1310 "aggregate counts per thread", AGGR_THREAD), 1188 "aggregate counts per thread", AGGR_THREAD),
1311 OPT_UINTEGER('D', "delay", &initial_delay, 1189 OPT_UINTEGER('D', "delay", &initial_delay,
1312 "ms to wait before starting measurement after program start"), 1190 "ms to wait before starting measurement after program start"),
@@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1318 }; 1196 };
1319 int status = -EINVAL, run_idx; 1197 int status = -EINVAL, run_idx;
1320 const char *mode; 1198 const char *mode;
1199 FILE *output = stderr;
1200 unsigned int interval;
1321 1201
1322 setlocale(LC_ALL, ""); 1202 setlocale(LC_ALL, "");
1323 1203
@@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1328 argc = parse_options(argc, argv, options, stat_usage, 1208 argc = parse_options(argc, argv, options, stat_usage,
1329 PARSE_OPT_STOP_AT_NON_OPTION); 1209 PARSE_OPT_STOP_AT_NON_OPTION);
1330 1210
1331 output = stderr; 1211 interval = stat_config.interval;
1212
1332 if (output_name && strcmp(output_name, "-")) 1213 if (output_name && strcmp(output_name, "-"))
1333 output = NULL; 1214 output = NULL;
1334 1215
@@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1365 } 1246 }
1366 } 1247 }
1367 1248
1249 stat_config.output = output;
1250
1368 if (csv_sep) { 1251 if (csv_sep) {
1369 csv_output = true; 1252 csv_output = true;
1370 if (!strcmp(csv_sep, "\\t")) 1253 if (!strcmp(csv_sep, "\\t"))
@@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1399 run_count = 1; 1282 run_count = 1;
1400 } 1283 }
1401 1284
1402 if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 1285 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1403 fprintf(stderr, "The --per-thread option is only available " 1286 fprintf(stderr, "The --per-thread option is only available "
1404 "when monitoring via -p -t options.\n"); 1287 "when monitoring via -p -t options.\n");
1405 parse_options_usage(NULL, options, "p", 1); 1288 parse_options_usage(NULL, options, "p", 1);
@@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1411 * no_aggr, cgroup are for system-wide only 1294 * no_aggr, cgroup are for system-wide only
1412 * --per-thread is aggregated per thread, we dont mix it with cpu mode 1295 * --per-thread is aggregated per thread, we dont mix it with cpu mode
1413 */ 1296 */
1414 if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && 1297 if (((stat_config.aggr_mode != AGGR_GLOBAL &&
1298 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1415 !target__has_cpu(&target)) { 1299 !target__has_cpu(&target)) {
1416 fprintf(stderr, "both cgroup and no-aggregation " 1300 fprintf(stderr, "both cgroup and no-aggregation "
1417 "modes only available in system-wide mode\n"); 1301 "modes only available in system-wide mode\n");
@@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1444 * Initialize thread_map with comm names, 1328 * Initialize thread_map with comm names,
1445 * so we could print it out on output. 1329 * so we could print it out on output.
1446 */ 1330 */
1447 if (aggr_mode == AGGR_THREAD) 1331 if (stat_config.aggr_mode == AGGR_THREAD)
1448 thread_map__read_comms(evsel_list->threads); 1332 thread_map__read_comms(evsel_list->threads);
1449 1333
1450 if (interval && interval < 100) { 1334 if (interval && interval < 100) {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ecf319728f25..bfe24f1e362f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
40#include "util/xyarray.h" 40#include "util/xyarray.h"
41#include "util/sort.h" 41#include "util/sort.h"
42#include "util/intlist.h" 42#include "util/intlist.h"
43#include "util/parse-branch-options.h"
43#include "arch/common.h" 44#include "arch/common.h"
44 45
45#include "util/debug.h" 46#include "util/debug.h"
@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
695 perf_top__record_precise_ip(top, he, evsel->idx, ip); 696 perf_top__record_precise_ip(top, he, evsel->idx, ip);
696 } 697 }
697 698
699 hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
700 !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
698 return 0; 701 return 0;
699} 702}
700 703
@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1171 "don't try to adjust column width, use these fixed values"), 1174 "don't try to adjust column width, use these fixed values"),
1172 OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, 1175 OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
1173 "per thread proc mmap processing timeout in ms"), 1176 "per thread proc mmap processing timeout in ms"),
1177 OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
1178 "branch any", "sample any taken branches",
1179 parse_branch_stack),
1180 OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
1181 "branch filter mask", "branch stack filter modes",
1182 parse_branch_stack),
1174 OPT_END() 1183 OPT_END()
1175 }; 1184 };
1176 const char * const top_usage[] = { 1185 const char * const top_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a47497011c93..a25048c85b76 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
2773 2773
2774 printed += fprintf(fp, "\n"); 2774 printed += fprintf(fp, "\n");
2775 2775
2776 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2776 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2777 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2777 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2778 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2778 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2779 2779
2780 /* each int_node is a syscall */ 2780 /* each int_node is a syscall */
2781 while (inode) { 2781 while (inode) {
@@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
2792 2792
2793 sc = &trace->syscalls.table[inode->i]; 2793 sc = &trace->syscalls.table[inode->i];
2794 printed += fprintf(fp, " %-15s", sc->name); 2794 printed += fprintf(fp, " %-15s", sc->name);
2795 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2795 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2796 n, min, avg); 2796 n, avg * n, min, avg);
2797 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2797 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2798 } 2798 }
2799 2799
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
index c7ff90a90e4e..7e47a7cbc195 100644
--- a/tools/perf/perf-with-kcore.sh
+++ b/tools/perf/perf-with-kcore.sh
@@ -50,7 +50,7 @@ copy_kcore()
50 fi 50 fi
51 51
52 rm -f perf.data.junk 52 rm -f perf.data.junk
53 ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & 53 ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
54 PERF_PID=$! 54 PERF_PID=$!
55 55
56 # Need to make sure that perf has started 56 # Need to make sure that perf has started
@@ -160,18 +160,18 @@ record()
160 echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 160 echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
161 fi 161 fi
162 162
163 if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then 163 if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
164 echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 164 echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
165 fi 165 fi
166 166
167 if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then 167 if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
168 if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then 168 if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
169 echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 169 echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
170 fi 170 fi
171 171
172 if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then 172 if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
173 true 173 true
174 elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then 174 elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
175 true 175 true
176 elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then 176 elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
177 echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 177 echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
@@ -193,8 +193,8 @@ record()
193 193
194 mkdir "$PERF_DATA_DIR" 194 mkdir "$PERF_DATA_DIR"
195 195
196 echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" 196 echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
197 "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true 197 "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
198 198
199 if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then 199 if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
200 exit 1 200 exit 1
@@ -209,8 +209,8 @@ subcommand()
209{ 209{
210 find_perf 210 find_perf
211 check_buildid_cache_permissions 211 check_buildid_cache_permissions
212 echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" 212 echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
213 "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* 213 "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
214} 214}
215 215
216if [ "$1" = "fix_buildid_cache_permissions" ] ; then 216if [ "$1" = "fix_buildid_cache_permissions" ] ; then
@@ -234,7 +234,7 @@ fi
234case "$PERF_SUB_COMMAND" in 234case "$PERF_SUB_COMMAND" in
235"record") 235"record")
236 while [ "$1" != "--" ] ; do 236 while [ "$1" != "--" ] ; do
237 PERF_OPTIONS+="$1 " 237 PERF_OPTIONS+=("$1")
238 shift || break 238 shift || break
239 done 239 done
240 if [ "$1" != "--" ] ; then 240 if [ "$1" != "--" ] ; then
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
242 usage 242 usage
243 fi 243 fi
244 shift 244 shift
245 record $* 245 record "$@"
246;; 246;;
247"script") 247"script")
248 subcommand $* 248 subcommand "$@"
249;; 249;;
250"report") 250"report")
251 subcommand $* 251 subcommand "$@"
252;; 252;;
253"inject") 253"inject")
254 subcommand $* 254 subcommand "$@"
255;; 255;;
256*) 256*)
257 usage 257 usage
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 5995a8bd7c69..b5fc847f9660 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -16,6 +16,9 @@ struct disasm_line_samples {
16 u64 nr; 16 u64 nr;
17}; 17};
18 18
19#define IPC_WIDTH 6
20#define CYCLES_WIDTH 6
21
19struct browser_disasm_line { 22struct browser_disasm_line {
20 struct rb_node rb_node; 23 struct rb_node rb_node;
21 u32 idx; 24 u32 idx;
@@ -53,6 +56,7 @@ struct annotate_browser {
53 int max_jump_sources; 56 int max_jump_sources;
54 int nr_jumps; 57 int nr_jumps;
55 bool searching_backwards; 58 bool searching_backwards;
59 bool have_cycles;
56 u8 addr_width; 60 u8 addr_width;
57 u8 jumps_width; 61 u8 jumps_width;
58 u8 target_width; 62 u8 target_width;
@@ -96,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
96 return ui_browser__set_color(&browser->b, color); 100 return ui_browser__set_color(&browser->b, color);
97} 101}
98 102
103static int annotate_browser__pcnt_width(struct annotate_browser *ab)
104{
105 int w = 7 * ab->nr_events;
106
107 if (ab->have_cycles)
108 w += IPC_WIDTH + CYCLES_WIDTH;
109 return w;
110}
111
99static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) 112static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
100{ 113{
101 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 114 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -106,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
106 (!current_entry || (browser->use_navkeypressed && 119 (!current_entry || (browser->use_navkeypressed &&
107 !browser->navkeypressed))); 120 !browser->navkeypressed)));
108 int width = browser->width, printed; 121 int width = browser->width, printed;
109 int i, pcnt_width = 7 * ab->nr_events; 122 int i, pcnt_width = annotate_browser__pcnt_width(ab);
110 double percent_max = 0.0; 123 double percent_max = 0.0;
111 char bf[256]; 124 char bf[256];
112 125
@@ -116,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
116 } 129 }
117 130
118 if (dl->offset != -1 && percent_max != 0.0) { 131 if (dl->offset != -1 && percent_max != 0.0) {
119 for (i = 0; i < ab->nr_events; i++) { 132 if (percent_max != 0.0) {
120 ui_browser__set_percent_color(browser, 133 for (i = 0; i < ab->nr_events; i++) {
121 bdl->samples[i].percent, 134 ui_browser__set_percent_color(browser,
122 current_entry); 135 bdl->samples[i].percent,
123 if (annotate_browser__opts.show_total_period) 136 current_entry);
124 slsmg_printf("%6" PRIu64 " ", 137 if (annotate_browser__opts.show_total_period)
125 bdl->samples[i].nr); 138 slsmg_printf("%6" PRIu64 " ",
126 else 139 bdl->samples[i].nr);
127 slsmg_printf("%6.2f ", bdl->samples[i].percent); 140 else
141 slsmg_printf("%6.2f ", bdl->samples[i].percent);
142 }
143 } else {
144 slsmg_write_nstring(" ", 7 * ab->nr_events);
128 } 145 }
129 } else { 146 } else {
130 ui_browser__set_percent_color(browser, 0, current_entry); 147 ui_browser__set_percent_color(browser, 0, current_entry);
131 slsmg_write_nstring(" ", pcnt_width); 148 slsmg_write_nstring(" ", 7 * ab->nr_events);
149 }
150 if (ab->have_cycles) {
151 if (dl->ipc)
152 slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc);
153 else
154 slsmg_write_nstring(" ", IPC_WIDTH);
155 if (dl->cycles)
156 slsmg_printf("%*" PRIu64 " ",
157 CYCLES_WIDTH - 1, dl->cycles);
158 else
159 slsmg_write_nstring(" ", CYCLES_WIDTH);
132 } 160 }
133 161
134 SLsmg_write_char(' '); 162 SLsmg_write_char(' ');
@@ -231,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
231 unsigned int from, to; 259 unsigned int from, to;
232 struct map_symbol *ms = ab->b.priv; 260 struct map_symbol *ms = ab->b.priv;
233 struct symbol *sym = ms->sym; 261 struct symbol *sym = ms->sym;
234 u8 pcnt_width = 7; 262 u8 pcnt_width = annotate_browser__pcnt_width(ab);
235 263
236 /* PLT symbols contain external offsets */ 264 /* PLT symbols contain external offsets */
237 if (strstr(sym->name, "@plt")) 265 if (strstr(sym->name, "@plt"))
@@ -255,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
255 to = (u64)btarget->idx; 283 to = (u64)btarget->idx;
256 } 284 }
257 285
258 pcnt_width *= ab->nr_events;
259
260 ui_browser__set_color(browser, HE_COLORSET_CODE); 286 ui_browser__set_color(browser, HE_COLORSET_CODE);
261 __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, 287 __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
262 from, to); 288 from, to);
@@ -266,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
266{ 292{
267 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 293 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
268 int ret = ui_browser__list_head_refresh(browser); 294 int ret = ui_browser__list_head_refresh(browser);
269 int pcnt_width; 295 int pcnt_width = annotate_browser__pcnt_width(ab);
270
271 pcnt_width = 7 * ab->nr_events;
272 296
273 if (annotate_browser__opts.jump_arrows) 297 if (annotate_browser__opts.jump_arrows)
274 annotate_browser__draw_current_jump(browser); 298 annotate_browser__draw_current_jump(browser);
@@ -390,7 +414,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
390 max_percent = bpos->samples[i].percent; 414 max_percent = bpos->samples[i].percent;
391 } 415 }
392 416
393 if (max_percent < 0.01) { 417 if (max_percent < 0.01 && pos->ipc == 0) {
394 RB_CLEAR_NODE(&bpos->rb_node); 418 RB_CLEAR_NODE(&bpos->rb_node);
395 continue; 419 continue;
396 } 420 }
@@ -869,6 +893,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
869 return map_symbol__tui_annotate(&he->ms, evsel, hbt); 893 return map_symbol__tui_annotate(&he->ms, evsel, hbt);
870} 894}
871 895
896
897static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
898{
899 unsigned n_insn = 0;
900 u64 offset;
901
902 for (offset = start; offset <= end; offset++) {
903 if (browser->offsets[offset])
904 n_insn++;
905 }
906 return n_insn;
907}
908
909static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
910 struct cyc_hist *ch)
911{
912 unsigned n_insn;
913 u64 offset;
914
915 n_insn = count_insn(browser, start, end);
916 if (n_insn && ch->num && ch->cycles) {
917 float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
918
919 /* Hide data when there are too many overlaps. */
920 if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
921 return;
922
923 for (offset = start; offset <= end; offset++) {
924 struct disasm_line *dl = browser->offsets[offset];
925
926 if (dl)
927 dl->ipc = ipc;
928 }
929 }
930}
931
932/*
933 * This should probably be in util/annotate.c to share with the tty
934 * annotate, but right now we need the per byte offsets arrays,
935 * which are only here.
936 */
937static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
938 struct symbol *sym)
939{
940 u64 offset;
941 struct annotation *notes = symbol__annotation(sym);
942
943 if (!notes->src || !notes->src->cycles_hist)
944 return;
945
946 pthread_mutex_lock(&notes->lock);
947 for (offset = 0; offset < size; ++offset) {
948 struct cyc_hist *ch;
949
950 ch = &notes->src->cycles_hist[offset];
951 if (ch && ch->cycles) {
952 struct disasm_line *dl;
953
954 if (ch->have_start)
955 count_and_fill(browser, ch->start, offset, ch);
956 dl = browser->offsets[offset];
957 if (dl && ch->num_aggr)
958 dl->cycles = ch->cycles_aggr / ch->num_aggr;
959 browser->have_cycles = true;
960 }
961 }
962 pthread_mutex_unlock(&notes->lock);
963}
964
872static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, 965static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
873 size_t size) 966 size_t size)
874{ 967{
@@ -991,6 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
991 } 1084 }
992 1085
993 annotate_browser__mark_jump_targets(&browser, size); 1086 annotate_browser__mark_jump_targets(&browser, size);
1087 annotate__compute_ipc(&browser, size, sym);
994 1088
995 browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); 1089 browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
996 browser.max_addr_width = hex_width(sym->end); 1090 browser.max_addr_width = hex_width(sym->end);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 03b7bc70eb66..e0b614648044 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
473 return 0; 473 return 0;
474} 474}
475 475
476/* The cycles histogram is lazily allocated. */
477static int symbol__alloc_hist_cycles(struct symbol *sym)
478{
479 struct annotation *notes = symbol__annotation(sym);
480 const size_t size = symbol__size(sym);
481
482 notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
483 if (notes->src->cycles_hist == NULL)
484 return -1;
485 return 0;
486}
487
476void symbol__annotate_zero_histograms(struct symbol *sym) 488void symbol__annotate_zero_histograms(struct symbol *sym)
477{ 489{
478 struct annotation *notes = symbol__annotation(sym); 490 struct annotation *notes = symbol__annotation(sym);
479 491
480 pthread_mutex_lock(&notes->lock); 492 pthread_mutex_lock(&notes->lock);
481 if (notes->src != NULL) 493 if (notes->src != NULL) {
482 memset(notes->src->histograms, 0, 494 memset(notes->src->histograms, 0,
483 notes->src->nr_histograms * notes->src->sizeof_sym_hist); 495 notes->src->nr_histograms * notes->src->sizeof_sym_hist);
496 if (notes->src->cycles_hist)
497 memset(notes->src->cycles_hist, 0,
498 symbol__size(sym) * sizeof(struct cyc_hist));
499 }
484 pthread_mutex_unlock(&notes->lock); 500 pthread_mutex_unlock(&notes->lock);
485} 501}
486 502
503static int __symbol__account_cycles(struct annotation *notes,
504 u64 start,
505 unsigned offset, unsigned cycles,
506 unsigned have_start)
507{
508 struct cyc_hist *ch;
509
510 ch = notes->src->cycles_hist;
511 /*
512 * For now we can only account one basic block per
513 * final jump. But multiple could be overlapping.
514 * Always account the longest one. So when
515 * a shorter one has been already seen throw it away.
516 *
517 * We separately always account the full cycles.
518 */
519 ch[offset].num_aggr++;
520 ch[offset].cycles_aggr += cycles;
521
522 if (!have_start && ch[offset].have_start)
523 return 0;
524 if (ch[offset].num) {
525 if (have_start && (!ch[offset].have_start ||
526 ch[offset].start > start)) {
527 ch[offset].have_start = 0;
528 ch[offset].cycles = 0;
529 ch[offset].num = 0;
530 if (ch[offset].reset < 0xffff)
531 ch[offset].reset++;
532 } else if (have_start &&
533 ch[offset].start < start)
534 return 0;
535 }
536 ch[offset].have_start = have_start;
537 ch[offset].start = start;
538 ch[offset].cycles += cycles;
539 ch[offset].num++;
540 return 0;
541}
542
487static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, 543static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
488 struct annotation *notes, int evidx, u64 addr) 544 struct annotation *notes, int evidx, u64 addr)
489{ 545{
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
506 return 0; 562 return 0;
507} 563}
508 564
509static struct annotation *symbol__get_annotation(struct symbol *sym) 565static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
510{ 566{
511 struct annotation *notes = symbol__annotation(sym); 567 struct annotation *notes = symbol__annotation(sym);
512 568
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
514 if (symbol__alloc_hist(sym) < 0) 570 if (symbol__alloc_hist(sym) < 0)
515 return NULL; 571 return NULL;
516 } 572 }
573 if (!notes->src->cycles_hist && cycles) {
574 if (symbol__alloc_hist_cycles(sym) < 0)
575 return NULL;
576 }
517 return notes; 577 return notes;
518} 578}
519 579
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
524 584
525 if (sym == NULL) 585 if (sym == NULL)
526 return 0; 586 return 0;
527 notes = symbol__get_annotation(sym); 587 notes = symbol__get_annotation(sym, false);
528 if (notes == NULL) 588 if (notes == NULL)
529 return -ENOMEM; 589 return -ENOMEM;
530 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); 590 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
531} 591}
532 592
593static int symbol__account_cycles(u64 addr, u64 start,
594 struct symbol *sym, unsigned cycles)
595{
596 struct annotation *notes;
597 unsigned offset;
598
599 if (sym == NULL)
600 return 0;
601 notes = symbol__get_annotation(sym, true);
602 if (notes == NULL)
603 return -ENOMEM;
604 if (addr < sym->start || addr >= sym->end)
605 return -ERANGE;
606
607 if (start) {
608 if (start < sym->start || start >= sym->end)
609 return -ERANGE;
610 if (start >= addr)
611 start = 0;
612 }
613 offset = addr - sym->start;
614 return __symbol__account_cycles(notes,
615 start ? start - sym->start : 0,
616 offset, cycles,
617 !!start);
618}
619
620int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
621 struct addr_map_symbol *start,
622 unsigned cycles)
623{
624 unsigned long saddr = 0;
625 int err;
626
627 if (!cycles)
628 return 0;
629
630 /*
631 * Only set start when IPC can be computed. We can only
632 * compute it when the basic block is completely in a single
633 * function.
634 * Special case the case when the jump is elsewhere, but
635 * it starts on the function start.
636 */
637 if (start &&
638 (start->sym == ams->sym ||
639 (ams->sym &&
640 start->addr == ams->sym->start + ams->map->start)))
641 saddr = start->al_addr;
642 if (saddr == 0)
643 pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
644 ams->addr,
645 start ? start->addr : 0,
646 ams->sym ? ams->sym->start + ams->map->start : 0,
647 saddr);
648 err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
649 if (err)
650 pr_debug2("account_cycles failed %d\n", err);
651 return err;
652}
653
533int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) 654int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
534{ 655{
535 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); 656 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 7e78e6c27078..e9996092a093 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -59,6 +59,8 @@ struct disasm_line {
59 char *name; 59 char *name;
60 struct ins *ins; 60 struct ins *ins;
61 int line_nr; 61 int line_nr;
62 float ipc;
63 u64 cycles;
62 struct ins_operands ops; 64 struct ins_operands ops;
63}; 65};
64 66
@@ -79,6 +81,17 @@ struct sym_hist {
79 u64 addr[0]; 81 u64 addr[0];
80}; 82};
81 83
84struct cyc_hist {
85 u64 start;
86 u64 cycles;
87 u64 cycles_aggr;
88 u32 num;
89 u32 num_aggr;
90 u8 have_start;
91 /* 1 byte padding */
92 u16 reset;
93};
94
82struct source_line_samples { 95struct source_line_samples {
83 double percent; 96 double percent;
84 double percent_sum; 97 double percent_sum;
@@ -97,6 +110,7 @@ struct source_line {
97 * @histogram: Array of addr hit histograms per event being monitored 110 * @histogram: Array of addr hit histograms per event being monitored
98 * @lines: If 'print_lines' is specified, per source code line percentages 111 * @lines: If 'print_lines' is specified, per source code line percentages
99 * @source: source parsed from a disassembler like objdump -dS 112 * @source: source parsed from a disassembler like objdump -dS
113 * @cyc_hist: Average cycles per basic block
100 * 114 *
101 * lines is allocated, percentages calculated and all sorted by percentage 115 * lines is allocated, percentages calculated and all sorted by percentage
102 * when the annotation is about to be presented, so the percentages are for 116 * when the annotation is about to be presented, so the percentages are for
@@ -109,6 +123,7 @@ struct annotated_source {
109 struct source_line *lines; 123 struct source_line *lines;
110 int nr_histograms; 124 int nr_histograms;
111 int sizeof_sym_hist; 125 int sizeof_sym_hist;
126 struct cyc_hist *cycles_hist;
112 struct sym_hist histograms[0]; 127 struct sym_hist histograms[0];
113}; 128};
114 129
@@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
130 145
131int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); 146int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
132 147
148int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
149 struct addr_map_symbol *start,
150 unsigned cycles);
151
133int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); 152int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
134 153
135int symbol__alloc_hist(struct symbol *sym); 154int symbol__alloc_hist(struct symbol *sym);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 83d9dd96fe08..a25b3609cef8 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
942 struct itrace_synth_opts *synth_opts = opt->value; 942 struct itrace_synth_opts *synth_opts = opt->value;
943 const char *p; 943 const char *p;
944 char *endptr; 944 char *endptr;
945 bool period_type_set = false;
945 946
946 synth_opts->set = true; 947 synth_opts->set = true;
947 948
@@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
970 case 'i': 971 case 'i':
971 synth_opts->period_type = 972 synth_opts->period_type =
972 PERF_ITRACE_PERIOD_INSTRUCTIONS; 973 PERF_ITRACE_PERIOD_INSTRUCTIONS;
974 period_type_set = true;
973 break; 975 break;
974 case 't': 976 case 't':
975 synth_opts->period_type = 977 synth_opts->period_type =
976 PERF_ITRACE_PERIOD_TICKS; 978 PERF_ITRACE_PERIOD_TICKS;
979 period_type_set = true;
977 break; 980 break;
978 case 'm': 981 case 'm':
979 synth_opts->period *= 1000; 982 synth_opts->period *= 1000;
@@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
986 goto out_err; 989 goto out_err;
987 synth_opts->period_type = 990 synth_opts->period_type =
988 PERF_ITRACE_PERIOD_NANOSECS; 991 PERF_ITRACE_PERIOD_NANOSECS;
992 period_type_set = true;
989 break; 993 break;
990 case '\0': 994 case '\0':
991 goto out; 995 goto out;
@@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
1039 } 1043 }
1040out: 1044out:
1041 if (synth_opts->instructions) { 1045 if (synth_opts->instructions) {
1042 if (!synth_opts->period_type) 1046 if (!period_type_set)
1043 synth_opts->period_type = 1047 synth_opts->period_type =
1044 PERF_ITRACE_DEFAULT_PERIOD_TYPE; 1048 PERF_ITRACE_DEFAULT_PERIOD_TYPE;
1045 if (!synth_opts->period) 1049 if (!synth_opts->period)
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 2da5581ec74d..86d9c7302598 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
36 return ret; 36 return ret;
37} 37}
38 38
39int veprintf(int level, int var, const char *fmt, va_list args)
40{
41 return _eprintf(level, var, fmt, args);
42}
43
39int eprintf(int level, int var, const char *fmt, ...) 44int eprintf(int level, int var, const char *fmt, ...)
40{ 45{
41 va_list args; 46 va_list args;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index caac2fdc6105..8b9a088c32ab 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
50 50
51int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); 51int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
52int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); 52int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
53int veprintf(int level, int var, const char *fmt, va_list args);
53 54
54int perf_debug_option(const char *str); 55int perf_debug_option(const char *str);
55 56
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 4bb2ae894c78..f729df5e25e6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -134,7 +134,8 @@ struct branch_flags {
134 u64 predicted:1; 134 u64 predicted:1;
135 u64 in_tx:1; 135 u64 in_tx:1;
136 u64 abort:1; 136 u64 abort:1;
137 u64 reserved:60; 137 u64 cycles:16;
138 u64 reserved:44;
138}; 139};
139 140
140struct branch_entry { 141struct branch_entry {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3b9f411a6b46..373f65b02545 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1273 return __perf_evlist__combined_sample_type(evlist); 1273 return __perf_evlist__combined_sample_type(evlist);
1274} 1274}
1275 1275
1276u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1277{
1278 struct perf_evsel *evsel;
1279 u64 branch_type = 0;
1280
1281 evlist__for_each(evlist, evsel)
1282 branch_type |= evsel->attr.branch_sample_type;
1283 return branch_type;
1284}
1285
1276bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1286bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1277{ 1287{
1278 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1288 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8930b68456b..397757063da1 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
165u64 perf_evlist__read_format(struct perf_evlist *evlist); 165u64 perf_evlist__read_format(struct perf_evlist *evlist);
166u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); 166u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
167u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); 167u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
168u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
168bool perf_evlist__sample_id_all(struct perf_evlist *evlist); 169bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
169u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); 170u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
170 171
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6f28d53d4e46..a6e9ddd37913 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
618 * and not events sampled. Thus we use a pseudo period of 1. 618 * and not events sampled. Thus we use a pseudo period of 1.
619 */ 619 */
620 he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, 620 he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
621 1, 1, 0, true); 621 1, bi->flags.cycles ? bi->flags.cycles : 1,
622 0, true);
622 if (he == NULL) 623 if (he == NULL)
623 return -ENOMEM; 624 return -ENOMEM;
624 625
@@ -1414,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other)
1414 return 0; 1415 return 0;
1415} 1416}
1416 1417
1418void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
1419 struct perf_sample *sample, bool nonany_branch_mode)
1420{
1421 struct branch_info *bi;
1422
1423 /* If we have branch cycles always annotate them. */
1424 if (bs && bs->nr && bs->entries[0].flags.cycles) {
1425 int i;
1426
1427 bi = sample__resolve_bstack(sample, al);
1428 if (bi) {
1429 struct addr_map_symbol *prev = NULL;
1430
1431 /*
1432 * Ignore errors, still want to process the
1433 * other entries.
1434 *
1435 * For non standard branch modes always
1436 * force no IPC (prev == NULL)
1437 *
1438 * Note that perf stores branches reversed from
1439 * program order!
1440 */
1441 for (i = bs->nr - 1; i >= 0; i--) {
1442 addr_map_symbol__account_cycles(&bi[i].from,
1443 nonany_branch_mode ? NULL : prev,
1444 bi[i].flags.cycles);
1445 prev = &bi[i].to;
1446 }
1447 free(bi);
1448 }
1449 }
1450}
1417 1451
1418size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) 1452size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
1419{ 1453{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5ed8d9c22981..e2f712f85d2e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
47 HISTC_MEM_SNOOP, 47 HISTC_MEM_SNOOP,
48 HISTC_MEM_DCACHELINE, 48 HISTC_MEM_DCACHELINE,
49 HISTC_TRANSACTION, 49 HISTC_TRANSACTION,
50 HISTC_CYCLES,
50 HISTC_NR_COLS, /* Last entry */ 51 HISTC_NR_COLS, /* Last entry */
51}; 52};
52 53
@@ -349,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
349 350
350unsigned int hists__sort_list_width(struct hists *hists); 351unsigned int hists__sort_list_width(struct hists *hists);
351 352
353void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
354 struct perf_sample *sample, bool nonany_branch_mode);
355
352struct option; 356struct option;
353int parse_filter_percentage(const struct option *opt __maybe_unused, 357int parse_filter_percentage(const struct option *opt __maybe_unused,
354 const char *arg, int unset __maybe_unused); 358 const char *arg, int unset __maybe_unused);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a6cb9afc20e2..828936dc3f1e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err,
1168 * Maximum error index indent, we will cut 1168 * Maximum error index indent, we will cut
1169 * the event string if it's bigger. 1169 * the event string if it's bigger.
1170 */ 1170 */
1171 int max_err_idx = 10; 1171 int max_err_idx = 13;
1172 1172
1173 /* 1173 /*
1174 * Let's be specific with the message when 1174 * Let's be specific with the message when
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b615cdf211d6..d4b0e6454bc6 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
542} 542}
543 543
544static struct perf_pmu_format * 544static struct perf_pmu_format *
545pmu_find_format(struct list_head *formats, char *name) 545pmu_find_format(struct list_head *formats, const char *name)
546{ 546{
547 struct perf_pmu_format *format; 547 struct perf_pmu_format *format;
548 548
@@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name)
553 return NULL; 553 return NULL;
554} 554}
555 555
556__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
557{
558 struct perf_pmu_format *format = pmu_find_format(formats, name);
559 __u64 bits = 0;
560 int fbit;
561
562 if (!format)
563 return 0;
564
565 for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
566 bits |= 1ULL << fbit;
567
568 return bits;
569}
570
556/* 571/*
557 * Sets value based on the format definition (format parameter) 572 * Sets value based on the format definition (format parameter)
558 * and unformated value (value parameter). 573 * and unformated value (value parameter).
@@ -574,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
574 } 589 }
575} 590}
576 591
592static __u64 pmu_format_max_value(const unsigned long *format)
593{
594 int w;
595
596 w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
597 if (!w)
598 return 0;
599 if (w < 64)
600 return (1ULL << w) - 1;
601 return -1;
602}
603
577/* 604/*
578 * Term is a string term, and might be a param-term. Try to look up it's value 605 * Term is a string term, and might be a param-term. Try to look up it's value
579 * in the remaining terms. 606 * in the remaining terms.
@@ -647,7 +674,7 @@ static int pmu_config_term(struct list_head *formats,
647{ 674{
648 struct perf_pmu_format *format; 675 struct perf_pmu_format *format;
649 __u64 *vp; 676 __u64 *vp;
650 __u64 val; 677 __u64 val, max_val;
651 678
652 /* 679 /*
653 * If this is a parameter we've already used for parameterized-eval, 680 * If this is a parameter we've already used for parameterized-eval,
@@ -713,6 +740,22 @@ static int pmu_config_term(struct list_head *formats,
713 } else 740 } else
714 return -EINVAL; 741 return -EINVAL;
715 742
743 max_val = pmu_format_max_value(format->bits);
744 if (val > max_val) {
745 if (err) {
746 err->idx = term->err_val;
747 if (asprintf(&err->str,
748 "value too big for format, maximum is %llu",
749 (unsigned long long)max_val) < 0)
750 err->str = strdup("value too big for format");
751 return -EINVAL;
752 }
753 /*
754 * Assume we don't care if !err, in which case the value will be
755 * silently truncated.
756 */
757 }
758
716 pmu_format_value(format->bits, val, vp, zero); 759 pmu_format_value(format->bits, val, vp, zero);
717 return 0; 760 return 0;
718} 761}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 7b9c8cf8ae3e..5d7e84466bee 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
54 struct perf_event_attr *attr, 54 struct perf_event_attr *attr,
55 struct list_head *head_terms, 55 struct list_head *head_terms,
56 bool zero, struct parse_events_error *error); 56 bool zero, struct parse_events_error *error);
57__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
57int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, 58int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
58 struct perf_pmu_info *info); 59 struct perf_pmu_info *info);
59struct list_head *perf_pmu__alias(struct perf_pmu *pmu, 60struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 20f555d1ae1c..83ee95e9743b 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -106,6 +106,8 @@ struct variable_list {
106 struct strlist *vars; /* Available variables */ 106 struct strlist *vars; /* Available variables */
107}; 107};
108 108
109struct map;
110
109/* Command string to events */ 111/* Command string to events */
110extern int parse_perf_probe_command(const char *cmd, 112extern int parse_perf_probe_command(const char *cmd,
111 struct perf_probe_event *pev); 113 struct perf_probe_event *pev);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f51eb54aeeb3..18722e774a69 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
784 784
785 printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); 785 printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
786 786
787 for (i = 0; i < sample->branch_stack->nr; i++) 787 for (i = 0; i < sample->branch_stack->nr; i++) {
788 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", 788 struct branch_entry *e = &sample->branch_stack->entries[i];
789 i, sample->branch_stack->entries[i].from, 789
790 sample->branch_stack->entries[i].to); 790 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
791 i, e->from, e->to,
792 e->flags.cycles,
793 e->flags.mispred ? "M" : " ",
794 e->flags.predicted ? "P" : " ",
795 e->flags.abort ? "A" : " ",
796 e->flags.in_tx ? "T" : " ",
797 (unsigned)e->flags.reserved);
798 }
791} 799}
792 800
793static void regs_dump__printf(u64 mask, u64 *regs) 801static void regs_dump__printf(u64 mask, u64 *regs)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4c65a143a34c..5177088a71d3 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -9,7 +9,7 @@ regex_t parent_regex;
9const char default_parent_pattern[] = "^sys_|^do_page_fault"; 9const char default_parent_pattern[] = "^sys_|^do_page_fault";
10const char *parent_pattern = default_parent_pattern; 10const char *parent_pattern = default_parent_pattern;
11const char default_sort_order[] = "comm,dso,symbol"; 11const char default_sort_order[] = "comm,dso,symbol";
12const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; 12const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
13const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; 13const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
14const char default_top_sort_order[] = "dso,symbol"; 14const char default_top_sort_order[] = "dso,symbol";
15const char default_diff_sort_order[] = "dso,symbol"; 15const char default_diff_sort_order[] = "dso,symbol";
@@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
526 return repsep_snprintf(bf, size, "%-*.*s", width, width, out); 526 return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
527} 527}
528 528
529static int64_t
530sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
531{
532 return left->branch_info->flags.cycles -
533 right->branch_info->flags.cycles;
534}
535
536static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
537 size_t size, unsigned int width)
538{
539 if (he->branch_info->flags.cycles == 0)
540 return repsep_snprintf(bf, size, "%-*s", width, "-");
541 return repsep_snprintf(bf, size, "%-*hd", width,
542 he->branch_info->flags.cycles);
543}
544
545struct sort_entry sort_cycles = {
546 .se_header = "Basic Block Cycles",
547 .se_cmp = sort__cycles_cmp,
548 .se_snprintf = hist_entry__cycles_snprintf,
549 .se_width_idx = HISTC_CYCLES,
550};
551
529/* --sort daddr_sym */ 552/* --sort daddr_sym */
530static int64_t 553static int64_t
531sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) 554sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
@@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
1190 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), 1213 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
1191 DIM(SORT_IN_TX, "in_tx", sort_in_tx), 1214 DIM(SORT_IN_TX, "in_tx", sort_in_tx),
1192 DIM(SORT_ABORT, "abort", sort_abort), 1215 DIM(SORT_ABORT, "abort", sort_abort),
1216 DIM(SORT_CYCLES, "cycles", sort_cycles),
1193}; 1217};
1194 1218
1195#undef DIM 1219#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e97cd476d336..bc6c87a76d16 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -185,6 +185,7 @@ enum sort_type {
185 SORT_MISPREDICT, 185 SORT_MISPREDICT,
186 SORT_ABORT, 186 SORT_ABORT,
187 SORT_IN_TX, 187 SORT_IN_TX,
188 SORT_CYCLES,
188 189
189 /* memory mode specific sort keys */ 190 /* memory mode specific sort keys */
190 __SORT_MEMORY_MODE, 191 __SORT_MEMORY_MODE,
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index f2a0d1521e26..c5c709cdc3ce 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
238 perf_evsel__reset_counts(evsel); 238 perf_evsel__reset_counts(evsel);
239 } 239 }
240} 240}
241
242static void zero_per_pkg(struct perf_evsel *counter)
243{
244 if (counter->per_pkg_mask)
245 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
246}
247
248static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
249{
250 unsigned long *mask = counter->per_pkg_mask;
251 struct cpu_map *cpus = perf_evsel__cpus(counter);
252 int s;
253
254 *skip = false;
255
256 if (!counter->per_pkg)
257 return 0;
258
259 if (cpu_map__empty(cpus))
260 return 0;
261
262 if (!mask) {
263 mask = zalloc(MAX_NR_CPUS);
264 if (!mask)
265 return -ENOMEM;
266
267 counter->per_pkg_mask = mask;
268 }
269
270 s = cpu_map__get_socket(cpus, cpu);
271 if (s < 0)
272 return -1;
273
274 *skip = test_and_set_bit(s, mask) == 1;
275 return 0;
276}
277
278static int
279process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
280 int cpu, int thread,
281 struct perf_counts_values *count)
282{
283 struct perf_counts_values *aggr = &evsel->counts->aggr;
284 static struct perf_counts_values zero;
285 bool skip = false;
286
287 if (check_per_pkg(evsel, cpu, &skip)) {
288 pr_err("failed to read per-pkg counter\n");
289 return -1;
290 }
291
292 if (skip)
293 count = &zero;
294
295 switch (config->aggr_mode) {
296 case AGGR_THREAD:
297 case AGGR_CORE:
298 case AGGR_SOCKET:
299 case AGGR_NONE:
300 if (!evsel->snapshot)
301 perf_evsel__compute_deltas(evsel, cpu, thread, count);
302 perf_counts_values__scale(count, config->scale, NULL);
303 if (config->aggr_mode == AGGR_NONE)
304 perf_stat__update_shadow_stats(evsel, count->values, cpu);
305 break;
306 case AGGR_GLOBAL:
307 aggr->val += count->val;
308 if (config->scale) {
309 aggr->ena += count->ena;
310 aggr->run += count->run;
311 }
312 default:
313 break;
314 }
315
316 return 0;
317}
318
319static int process_counter_maps(struct perf_stat_config *config,
320 struct perf_evsel *counter)
321{
322 int nthreads = thread_map__nr(counter->threads);
323 int ncpus = perf_evsel__nr_cpus(counter);
324 int cpu, thread;
325
326 if (counter->system_wide)
327 nthreads = 1;
328
329 for (thread = 0; thread < nthreads; thread++) {
330 for (cpu = 0; cpu < ncpus; cpu++) {
331 if (process_counter_values(config, counter, cpu, thread,
332 perf_counts(counter->counts, cpu, thread)))
333 return -1;
334 }
335 }
336
337 return 0;
338}
339
340int perf_stat_process_counter(struct perf_stat_config *config,
341 struct perf_evsel *counter)
342{
343 struct perf_counts_values *aggr = &counter->counts->aggr;
344 struct perf_stat *ps = counter->priv;
345 u64 *count = counter->counts->aggr.values;
346 int i, ret;
347
348 aggr->val = aggr->ena = aggr->run = 0;
349 init_stats(ps->res_stats);
350
351 if (counter->per_pkg)
352 zero_per_pkg(counter);
353
354 ret = process_counter_maps(config, counter);
355 if (ret)
356 return ret;
357
358 if (config->aggr_mode != AGGR_GLOBAL)
359 return 0;
360
361 if (!counter->snapshot)
362 perf_evsel__compute_deltas(counter, -1, -1, aggr);
363 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
364
365 for (i = 0; i < 3; i++)
366 update_stats(&ps->res_stats[i], count[i]);
367
368 if (verbose) {
369 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
370 perf_evsel__name(counter), count[0], count[1], count[2]);
371 }
372
373 /*
374 * Save the full runtime - to allow normalization during printout:
375 */
376 perf_stat__update_shadow_stats(counter, count, 0);
377
378 return 0;
379}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 1cfbe0a980ac..0b897b083682 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -50,6 +50,13 @@ struct perf_counts {
50 struct xyarray *values; 50 struct xyarray *values;
51}; 51};
52 52
53struct perf_stat_config {
54 enum aggr_mode aggr_mode;
55 bool scale;
56 FILE *output;
57 unsigned int interval;
58};
59
53static inline struct perf_counts_values* 60static inline struct perf_counts_values*
54perf_counts(struct perf_counts *counts, int cpu, int thread) 61perf_counts(struct perf_counts *counts, int cpu, int thread)
55{ 62{
@@ -109,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
109int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); 116int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
110void perf_evlist__free_stats(struct perf_evlist *evlist); 117void perf_evlist__free_stats(struct perf_evlist *evlist);
111void perf_evlist__reset_stats(struct perf_evlist *evlist); 118void perf_evlist__reset_stats(struct perf_evlist *evlist);
119
120int perf_stat_process_counter(struct perf_stat_config *config,
121 struct perf_evsel *counter);
112#endif 122#endif