diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 1075 |
1 files changed, 842 insertions, 233 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a6b4d44f9502..a9f06715e44d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -6,24 +6,28 @@ | |||
6 | * | 6 | * |
7 | * Sample output: | 7 | * Sample output: |
8 | 8 | ||
9 | $ perf stat ~/hackbench 10 | 9 | $ perf stat ./hackbench 10 |
10 | Time: 0.104 | ||
11 | 10 | ||
12 | Performance counter stats for '/home/mingo/hackbench': | 11 | Time: 0.118 |
13 | 12 | ||
14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor | 13 | Performance counter stats for './hackbench 10': |
15 | 54011 context switches # 0.043 M/sec | ||
16 | 385 CPU migrations # 0.000 M/sec | ||
17 | 17755 pagefaults # 0.014 M/sec | ||
18 | 3808323185 CPU cycles # 3033.219 M/sec | ||
19 | 1575111190 instructions # 1254.530 M/sec | ||
20 | 17367895 cache references # 13.833 M/sec | ||
21 | 7674421 cache misses # 6.112 M/sec | ||
22 | 14 | ||
23 | Wall-clock time elapsed: 123.786620 msecs | 15 | 1708.761321 task-clock # 11.037 CPUs utilized |
16 | 41,190 context-switches # 0.024 M/sec | ||
17 | 6,735 CPU-migrations # 0.004 M/sec | ||
18 | 17,318 page-faults # 0.010 M/sec | ||
19 | 5,205,202,243 cycles # 3.046 GHz | ||
20 | 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle | ||
21 | 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle | ||
22 | 2,603,501,247 instructions # 0.50 insns per cycle | ||
23 | # 1.48 stalled cycles per insn | ||
24 | 484,357,498 branches # 283.455 M/sec | ||
25 | 6,388,934 branch-misses # 1.32% of all branches | ||
26 | |||
27 | 0.154822978 seconds time elapsed | ||
24 | 28 | ||
25 | * | 29 | * |
26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | 30 | * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> |
27 | * | 31 | * |
28 | * Improvements and fixes by: | 32 | * Improvements and fixes by: |
29 | * | 33 | * |
@@ -43,15 +47,21 @@ | |||
43 | #include "util/parse-options.h" | 47 | #include "util/parse-options.h" |
44 | #include "util/parse-events.h" | 48 | #include "util/parse-events.h" |
45 | #include "util/event.h" | 49 | #include "util/event.h" |
50 | #include "util/evlist.h" | ||
51 | #include "util/evsel.h" | ||
46 | #include "util/debug.h" | 52 | #include "util/debug.h" |
53 | #include "util/color.h" | ||
47 | #include "util/header.h" | 54 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 55 | #include "util/cpumap.h" |
49 | #include "util/thread.h" | 56 | #include "util/thread.h" |
57 | #include "util/thread_map.h" | ||
50 | 58 | ||
51 | #include <sys/prctl.h> | 59 | #include <sys/prctl.h> |
52 | #include <math.h> | 60 | #include <math.h> |
53 | #include <locale.h> | 61 | #include <locale.h> |
54 | 62 | ||
63 | #define DEFAULT_SEPARATOR " " | ||
64 | |||
55 | static struct perf_event_attr default_attrs[] = { | 65 | static struct perf_event_attr default_attrs[] = { |
56 | 66 | ||
57 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 67 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
@@ -60,34 +70,127 @@ static struct perf_event_attr default_attrs[] = { | |||
60 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | 70 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, |
61 | 71 | ||
62 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | 72 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
73 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | ||
74 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | ||
63 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | 75 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
64 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 76 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
65 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | 77 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, |
66 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, | ||
67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
68 | 78 | ||
69 | }; | 79 | }; |
70 | 80 | ||
81 | /* | ||
82 | * Detailed stats (-d), covering the L1 and last level data caches: | ||
83 | */ | ||
84 | static struct perf_event_attr detailed_attrs[] = { | ||
85 | |||
86 | { .type = PERF_TYPE_HW_CACHE, | ||
87 | .config = | ||
88 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
89 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
90 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
91 | |||
92 | { .type = PERF_TYPE_HW_CACHE, | ||
93 | .config = | ||
94 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
95 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
96 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
97 | |||
98 | { .type = PERF_TYPE_HW_CACHE, | ||
99 | .config = | ||
100 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
101 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
102 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
103 | |||
104 | { .type = PERF_TYPE_HW_CACHE, | ||
105 | .config = | ||
106 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
107 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
108 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: | ||
113 | */ | ||
114 | static struct perf_event_attr very_detailed_attrs[] = { | ||
115 | |||
116 | { .type = PERF_TYPE_HW_CACHE, | ||
117 | .config = | ||
118 | PERF_COUNT_HW_CACHE_L1I << 0 | | ||
119 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
120 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
121 | |||
122 | { .type = PERF_TYPE_HW_CACHE, | ||
123 | .config = | ||
124 | PERF_COUNT_HW_CACHE_L1I << 0 | | ||
125 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
126 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
127 | |||
128 | { .type = PERF_TYPE_HW_CACHE, | ||
129 | .config = | ||
130 | PERF_COUNT_HW_CACHE_DTLB << 0 | | ||
131 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
132 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
133 | |||
134 | { .type = PERF_TYPE_HW_CACHE, | ||
135 | .config = | ||
136 | PERF_COUNT_HW_CACHE_DTLB << 0 | | ||
137 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
138 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
139 | |||
140 | { .type = PERF_TYPE_HW_CACHE, | ||
141 | .config = | ||
142 | PERF_COUNT_HW_CACHE_ITLB << 0 | | ||
143 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
144 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
145 | |||
146 | { .type = PERF_TYPE_HW_CACHE, | ||
147 | .config = | ||
148 | PERF_COUNT_HW_CACHE_ITLB << 0 | | ||
149 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
150 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
151 | |||
152 | }; | ||
153 | |||
154 | /* | ||
155 | * Very, very detailed stats (-d -d -d), adding prefetch events: | ||
156 | */ | ||
157 | static struct perf_event_attr very_very_detailed_attrs[] = { | ||
158 | |||
159 | { .type = PERF_TYPE_HW_CACHE, | ||
160 | .config = | ||
161 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
162 | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | | ||
163 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
164 | |||
165 | { .type = PERF_TYPE_HW_CACHE, | ||
166 | .config = | ||
167 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
168 | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | | ||
169 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
170 | }; | ||
171 | |||
172 | |||
173 | |||
174 | struct perf_evlist *evsel_list; | ||
175 | |||
71 | static bool system_wide = false; | 176 | static bool system_wide = false; |
72 | static int nr_cpus = 0; | ||
73 | static int run_idx = 0; | 177 | static int run_idx = 0; |
74 | 178 | ||
75 | static int run_count = 1; | 179 | static int run_count = 1; |
76 | static bool no_inherit = false; | 180 | static bool no_inherit = false; |
77 | static bool scale = true; | 181 | static bool scale = true; |
182 | static bool no_aggr = false; | ||
78 | static pid_t target_pid = -1; | 183 | static pid_t target_pid = -1; |
79 | static pid_t target_tid = -1; | 184 | static pid_t target_tid = -1; |
80 | static pid_t *all_tids = NULL; | ||
81 | static int thread_num = 0; | ||
82 | static pid_t child_pid = -1; | 185 | static pid_t child_pid = -1; |
83 | static bool null_run = false; | 186 | static bool null_run = false; |
84 | static bool big_num = false; | 187 | static int detailed_run = 0; |
188 | static bool sync_run = false; | ||
189 | static bool big_num = true; | ||
190 | static int big_num_opt = -1; | ||
85 | static const char *cpu_list; | 191 | static const char *cpu_list; |
86 | 192 | static const char *csv_sep = NULL; | |
87 | 193 | static bool csv_output = false; | |
88 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
89 | |||
90 | static int event_scaled[MAX_COUNTERS]; | ||
91 | 194 | ||
92 | static volatile int done = 0; | 195 | static volatile int done = 0; |
93 | 196 | ||
@@ -96,6 +199,22 @@ struct stats | |||
96 | double n, mean, M2; | 199 | double n, mean, M2; |
97 | }; | 200 | }; |
98 | 201 | ||
202 | struct perf_stat { | ||
203 | struct stats res_stats[3]; | ||
204 | }; | ||
205 | |||
206 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | ||
207 | { | ||
208 | evsel->priv = zalloc(sizeof(struct perf_stat)); | ||
209 | return evsel->priv == NULL ? -ENOMEM : 0; | ||
210 | } | ||
211 | |||
212 | static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) | ||
213 | { | ||
214 | free(evsel->priv); | ||
215 | evsel->priv = NULL; | ||
216 | } | ||
217 | |||
99 | static void update_stats(struct stats *stats, u64 val) | 218 | static void update_stats(struct stats *stats, u64 val) |
100 | { | 219 | { |
101 | double delta; | 220 | double delta; |
@@ -135,154 +254,143 @@ static double stddev_stats(struct stats *stats) | |||
135 | return sqrt(variance_mean); | 254 | return sqrt(variance_mean); |
136 | } | 255 | } |
137 | 256 | ||
138 | struct stats event_res_stats[MAX_COUNTERS][3]; | 257 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
139 | struct stats runtime_nsecs_stats; | 258 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
259 | struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; | ||
260 | struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; | ||
261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | ||
262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | ||
263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | ||
264 | struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; | ||
265 | struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; | ||
266 | struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | ||
267 | struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | ||
140 | struct stats walltime_nsecs_stats; | 268 | struct stats walltime_nsecs_stats; |
141 | struct stats runtime_cycles_stats; | ||
142 | struct stats runtime_branches_stats; | ||
143 | |||
144 | #define MATCH_EVENT(t, c, counter) \ | ||
145 | (attrs[counter].type == PERF_TYPE_##t && \ | ||
146 | attrs[counter].config == PERF_COUNT_##c) | ||
147 | 269 | ||
148 | #define ERR_PERF_OPEN \ | 270 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
149 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | ||
150 | |||
151 | static int create_perf_stat_counter(int counter) | ||
152 | { | 271 | { |
153 | struct perf_event_attr *attr = attrs + counter; | 272 | struct perf_event_attr *attr = &evsel->attr; |
154 | int thread; | ||
155 | int ncreated = 0; | ||
156 | 273 | ||
157 | if (scale) | 274 | if (scale) |
158 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 275 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
159 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 276 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
160 | 277 | ||
161 | if (system_wide) { | 278 | attr->inherit = !no_inherit; |
162 | int cpu; | 279 | |
163 | 280 | if (system_wide) | |
164 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 281 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false); |
165 | fd[cpu][counter][0] = sys_perf_event_open(attr, | 282 | |
166 | -1, cpumap[cpu], -1, 0); | 283 | if (target_pid == -1 && target_tid == -1) { |
167 | if (fd[cpu][counter][0] < 0) | 284 | attr->disabled = 1; |
168 | pr_debug(ERR_PERF_OPEN, counter, | 285 | attr->enable_on_exec = 1; |
169 | fd[cpu][counter][0], strerror(errno)); | ||
170 | else | ||
171 | ++ncreated; | ||
172 | } | ||
173 | } else { | ||
174 | attr->inherit = !no_inherit; | ||
175 | if (target_pid == -1 && target_tid == -1) { | ||
176 | attr->disabled = 1; | ||
177 | attr->enable_on_exec = 1; | ||
178 | } | ||
179 | for (thread = 0; thread < thread_num; thread++) { | ||
180 | fd[0][counter][thread] = sys_perf_event_open(attr, | ||
181 | all_tids[thread], -1, -1, 0); | ||
182 | if (fd[0][counter][thread] < 0) | ||
183 | pr_debug(ERR_PERF_OPEN, counter, | ||
184 | fd[0][counter][thread], | ||
185 | strerror(errno)); | ||
186 | else | ||
187 | ++ncreated; | ||
188 | } | ||
189 | } | 286 | } |
190 | 287 | ||
191 | return ncreated; | 288 | return perf_evsel__open_per_thread(evsel, evsel_list->threads, false); |
192 | } | 289 | } |
193 | 290 | ||
194 | /* | 291 | /* |
195 | * Does the counter have nsecs as a unit? | 292 | * Does the counter have nsecs as a unit? |
196 | */ | 293 | */ |
197 | static inline int nsec_counter(int counter) | 294 | static inline int nsec_counter(struct perf_evsel *evsel) |
198 | { | 295 | { |
199 | if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || | 296 | if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || |
200 | MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 297 | perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
201 | return 1; | 298 | return 1; |
202 | 299 | ||
203 | return 0; | 300 | return 0; |
204 | } | 301 | } |
205 | 302 | ||
206 | /* | 303 | /* |
304 | * Update various tracking values we maintain to print | ||
305 | * more semantic information such as miss/hit ratios, | ||
306 | * instruction rates, etc: | ||
307 | */ | ||
308 | static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | ||
309 | { | ||
310 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
311 | update_stats(&runtime_nsecs_stats[0], count[0]); | ||
312 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
313 | update_stats(&runtime_cycles_stats[0], count[0]); | ||
314 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
315 | update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); | ||
316 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
317 | update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); | ||
318 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
319 | update_stats(&runtime_branches_stats[0], count[0]); | ||
320 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
321 | update_stats(&runtime_cacherefs_stats[0], count[0]); | ||
322 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
323 | update_stats(&runtime_l1_dcache_stats[0], count[0]); | ||
324 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
325 | update_stats(&runtime_l1_icache_stats[0], count[0]); | ||
326 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
327 | update_stats(&runtime_ll_cache_stats[0], count[0]); | ||
328 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
329 | update_stats(&runtime_dtlb_cache_stats[0], count[0]); | ||
330 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
331 | update_stats(&runtime_itlb_cache_stats[0], count[0]); | ||
332 | } | ||
333 | |||
334 | /* | ||
207 | * Read out the results of a single counter: | 335 | * Read out the results of a single counter: |
336 | * aggregate counts across CPUs in system-wide mode | ||
208 | */ | 337 | */ |
209 | static void read_counter(int counter) | 338 | static int read_counter_aggr(struct perf_evsel *counter) |
210 | { | 339 | { |
211 | u64 count[3], single_count[3]; | 340 | struct perf_stat *ps = counter->priv; |
212 | int cpu; | 341 | u64 *count = counter->counts->aggr.values; |
213 | size_t res, nv; | 342 | int i; |
214 | int scaled; | ||
215 | int i, thread; | ||
216 | 343 | ||
217 | count[0] = count[1] = count[2] = 0; | 344 | if (__perf_evsel__read(counter, evsel_list->cpus->nr, |
345 | evsel_list->threads->nr, scale) < 0) | ||
346 | return -1; | ||
218 | 347 | ||
219 | nv = scale ? 3 : 1; | 348 | for (i = 0; i < 3; i++) |
220 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 349 | update_stats(&ps->res_stats[i], count[i]); |
221 | for (thread = 0; thread < thread_num; thread++) { | ||
222 | if (fd[cpu][counter][thread] < 0) | ||
223 | continue; | ||
224 | 350 | ||
225 | res = read(fd[cpu][counter][thread], | 351 | if (verbose) { |
226 | single_count, nv * sizeof(u64)); | 352 | fprintf(stderr, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", |
227 | assert(res == nv * sizeof(u64)); | 353 | event_name(counter), count[0], count[1], count[2]); |
354 | } | ||
228 | 355 | ||
229 | close(fd[cpu][counter][thread]); | 356 | /* |
230 | fd[cpu][counter][thread] = -1; | 357 | * Save the full runtime - to allow normalization during printout: |
358 | */ | ||
359 | update_shadow_stats(counter, count); | ||
231 | 360 | ||
232 | count[0] += single_count[0]; | 361 | return 0; |
233 | if (scale) { | 362 | } |
234 | count[1] += single_count[1]; | ||
235 | count[2] += single_count[2]; | ||
236 | } | ||
237 | } | ||
238 | } | ||
239 | 363 | ||
240 | scaled = 0; | 364 | /* |
241 | if (scale) { | 365 | * Read out the results of a single counter: |
242 | if (count[2] == 0) { | 366 | * do not aggregate counts across CPUs in system-wide mode |
243 | event_scaled[counter] = -1; | 367 | */ |
244 | count[0] = 0; | 368 | static int read_counter(struct perf_evsel *counter) |
245 | return; | 369 | { |
246 | } | 370 | u64 *count; |
371 | int cpu; | ||
247 | 372 | ||
248 | if (count[2] < count[1]) { | 373 | for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { |
249 | event_scaled[counter] = 1; | 374 | if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) |
250 | count[0] = (unsigned long long) | 375 | return -1; |
251 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
252 | } | ||
253 | } | ||
254 | 376 | ||
255 | for (i = 0; i < 3; i++) | 377 | count = counter->counts->cpu[cpu].values; |
256 | update_stats(&event_res_stats[counter][i], count[i]); | ||
257 | 378 | ||
258 | if (verbose) { | 379 | update_shadow_stats(counter, count); |
259 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), | ||
260 | count[0], count[1], count[2]); | ||
261 | } | 380 | } |
262 | 381 | ||
263 | /* | 382 | return 0; |
264 | * Save the full runtime - to allow normalization during printout: | ||
265 | */ | ||
266 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | ||
267 | update_stats(&runtime_nsecs_stats, count[0]); | ||
268 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | ||
269 | update_stats(&runtime_cycles_stats, count[0]); | ||
270 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | ||
271 | update_stats(&runtime_branches_stats, count[0]); | ||
272 | } | 383 | } |
273 | 384 | ||
274 | static int run_perf_stat(int argc __used, const char **argv) | 385 | static int run_perf_stat(int argc __used, const char **argv) |
275 | { | 386 | { |
276 | unsigned long long t0, t1; | 387 | unsigned long long t0, t1; |
388 | struct perf_evsel *counter; | ||
277 | int status = 0; | 389 | int status = 0; |
278 | int counter, ncreated = 0; | ||
279 | int child_ready_pipe[2], go_pipe[2]; | 390 | int child_ready_pipe[2], go_pipe[2]; |
280 | const bool forks = (argc > 0); | 391 | const bool forks = (argc > 0); |
281 | char buf; | 392 | char buf; |
282 | 393 | ||
283 | if (!system_wide) | ||
284 | nr_cpus = 1; | ||
285 | |||
286 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | 394 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
287 | perror("failed to create pipes"); | 395 | perror("failed to create pipes"); |
288 | exit(1); | 396 | exit(1); |
@@ -322,7 +430,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
322 | } | 430 | } |
323 | 431 | ||
324 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 432 | if (target_tid == -1 && target_pid == -1 && !system_wide) |
325 | all_tids[0] = child_pid; | 433 | evsel_list->threads->map[0] = child_pid; |
326 | 434 | ||
327 | /* | 435 | /* |
328 | * Wait for the child to be ready to exec. | 436 | * Wait for the child to be ready to exec. |
@@ -334,15 +442,35 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
334 | close(child_ready_pipe[0]); | 442 | close(child_ready_pipe[0]); |
335 | } | 443 | } |
336 | 444 | ||
337 | for (counter = 0; counter < nr_counters; counter++) | 445 | list_for_each_entry(counter, &evsel_list->entries, node) { |
338 | ncreated += create_perf_stat_counter(counter); | 446 | if (create_perf_stat_counter(counter) < 0) { |
447 | if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) { | ||
448 | if (verbose) | ||
449 | ui__warning("%s event is not supported by the kernel.\n", | ||
450 | event_name(counter)); | ||
451 | continue; | ||
452 | } | ||
453 | |||
454 | if (errno == EPERM || errno == EACCES) { | ||
455 | error("You may not have permission to collect %sstats.\n" | ||
456 | "\t Consider tweaking" | ||
457 | " /proc/sys/kernel/perf_event_paranoid or running as root.", | ||
458 | system_wide ? "system-wide " : ""); | ||
459 | } else { | ||
460 | error("open_counter returned with %d (%s). " | ||
461 | "/bin/dmesg may provide additional information.\n", | ||
462 | errno, strerror(errno)); | ||
463 | } | ||
464 | if (child_pid != -1) | ||
465 | kill(child_pid, SIGTERM); | ||
466 | die("Not all events could be opened.\n"); | ||
467 | return -1; | ||
468 | } | ||
469 | } | ||
339 | 470 | ||
340 | if (ncreated == 0) { | 471 | if (perf_evlist__set_filters(evsel_list)) { |
341 | pr_err("No permission to collect %sstats.\n" | 472 | error("failed to set filter with %d (%s)\n", errno, |
342 | "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", | 473 | strerror(errno)); |
343 | system_wide ? "system-wide " : ""); | ||
344 | if (child_pid != -1) | ||
345 | kill(child_pid, SIGTERM); | ||
346 | return -1; | 474 | return -1; |
347 | } | 475 | } |
348 | 476 | ||
@@ -362,136 +490,501 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
362 | 490 | ||
363 | update_stats(&walltime_nsecs_stats, t1 - t0); | 491 | update_stats(&walltime_nsecs_stats, t1 - t0); |
364 | 492 | ||
365 | for (counter = 0; counter < nr_counters; counter++) | 493 | if (no_aggr) { |
366 | read_counter(counter); | 494 | list_for_each_entry(counter, &evsel_list->entries, node) { |
495 | read_counter(counter); | ||
496 | perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1); | ||
497 | } | ||
498 | } else { | ||
499 | list_for_each_entry(counter, &evsel_list->entries, node) { | ||
500 | read_counter_aggr(counter); | ||
501 | perf_evsel__close_fd(counter, evsel_list->cpus->nr, | ||
502 | evsel_list->threads->nr); | ||
503 | } | ||
504 | } | ||
367 | 505 | ||
368 | return WEXITSTATUS(status); | 506 | return WEXITSTATUS(status); |
369 | } | 507 | } |
370 | 508 | ||
371 | static void print_noise(int counter, double avg) | 509 | static void print_noise_pct(double total, double avg) |
510 | { | ||
511 | double pct = 0.0; | ||
512 | |||
513 | if (avg) | ||
514 | pct = 100.0*total/avg; | ||
515 | |||
516 | fprintf(stderr, " ( +-%6.2f%% )", pct); | ||
517 | } | ||
518 | |||
519 | static void print_noise(struct perf_evsel *evsel, double avg) | ||
372 | { | 520 | { |
521 | struct perf_stat *ps; | ||
522 | |||
373 | if (run_count == 1) | 523 | if (run_count == 1) |
374 | return; | 524 | return; |
375 | 525 | ||
376 | fprintf(stderr, " ( +- %7.3f%% )", | 526 | ps = evsel->priv; |
377 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); | 527 | print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); |
378 | } | 528 | } |
379 | 529 | ||
380 | static void nsec_printout(int counter, double avg) | 530 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) |
381 | { | 531 | { |
382 | double msecs = avg / 1e6; | 532 | double msecs = avg / 1e6; |
533 | char cpustr[16] = { '\0', }; | ||
534 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; | ||
383 | 535 | ||
384 | fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); | 536 | if (no_aggr) |
537 | sprintf(cpustr, "CPU%*d%s", | ||
538 | csv_output ? 0 : -4, | ||
539 | evsel_list->cpus->map[cpu], csv_sep); | ||
385 | 540 | ||
386 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | 541 | fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); |
387 | fprintf(stderr, " # %10.3f CPUs ", | 542 | |
388 | avg / avg_stats(&walltime_nsecs_stats)); | 543 | if (evsel->cgrp) |
389 | } | 544 | fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name); |
545 | |||
546 | if (csv_output) | ||
547 | return; | ||
548 | |||
549 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | ||
550 | fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); | ||
551 | } | ||
552 | |||
553 | static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
554 | { | ||
555 | double total, ratio = 0.0; | ||
556 | const char *color; | ||
557 | |||
558 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
559 | |||
560 | if (total) | ||
561 | ratio = avg / total * 100.0; | ||
562 | |||
563 | color = PERF_COLOR_NORMAL; | ||
564 | if (ratio > 50.0) | ||
565 | color = PERF_COLOR_RED; | ||
566 | else if (ratio > 30.0) | ||
567 | color = PERF_COLOR_MAGENTA; | ||
568 | else if (ratio > 10.0) | ||
569 | color = PERF_COLOR_YELLOW; | ||
570 | |||
571 | fprintf(stderr, " # "); | ||
572 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
573 | fprintf(stderr, " frontend cycles idle "); | ||
574 | } | ||
575 | |||
576 | static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
577 | { | ||
578 | double total, ratio = 0.0; | ||
579 | const char *color; | ||
580 | |||
581 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
582 | |||
583 | if (total) | ||
584 | ratio = avg / total * 100.0; | ||
585 | |||
586 | color = PERF_COLOR_NORMAL; | ||
587 | if (ratio > 75.0) | ||
588 | color = PERF_COLOR_RED; | ||
589 | else if (ratio > 50.0) | ||
590 | color = PERF_COLOR_MAGENTA; | ||
591 | else if (ratio > 20.0) | ||
592 | color = PERF_COLOR_YELLOW; | ||
593 | |||
594 | fprintf(stderr, " # "); | ||
595 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
596 | fprintf(stderr, " backend cycles idle "); | ||
597 | } | ||
598 | |||
599 | static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
600 | { | ||
601 | double total, ratio = 0.0; | ||
602 | const char *color; | ||
603 | |||
604 | total = avg_stats(&runtime_branches_stats[cpu]); | ||
605 | |||
606 | if (total) | ||
607 | ratio = avg / total * 100.0; | ||
608 | |||
609 | color = PERF_COLOR_NORMAL; | ||
610 | if (ratio > 20.0) | ||
611 | color = PERF_COLOR_RED; | ||
612 | else if (ratio > 10.0) | ||
613 | color = PERF_COLOR_MAGENTA; | ||
614 | else if (ratio > 5.0) | ||
615 | color = PERF_COLOR_YELLOW; | ||
616 | |||
617 | fprintf(stderr, " # "); | ||
618 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
619 | fprintf(stderr, " of all branches "); | ||
620 | } | ||
621 | |||
622 | static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
623 | { | ||
624 | double total, ratio = 0.0; | ||
625 | const char *color; | ||
626 | |||
627 | total = avg_stats(&runtime_l1_dcache_stats[cpu]); | ||
628 | |||
629 | if (total) | ||
630 | ratio = avg / total * 100.0; | ||
631 | |||
632 | color = PERF_COLOR_NORMAL; | ||
633 | if (ratio > 20.0) | ||
634 | color = PERF_COLOR_RED; | ||
635 | else if (ratio > 10.0) | ||
636 | color = PERF_COLOR_MAGENTA; | ||
637 | else if (ratio > 5.0) | ||
638 | color = PERF_COLOR_YELLOW; | ||
639 | |||
640 | fprintf(stderr, " # "); | ||
641 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
642 | fprintf(stderr, " of all L1-dcache hits "); | ||
390 | } | 643 | } |
391 | 644 | ||
392 | static void abs_printout(int counter, double avg) | 645 | static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) |
393 | { | 646 | { |
394 | double total, ratio = 0.0; | 647 | double total, ratio = 0.0; |
648 | const char *color; | ||
395 | 649 | ||
396 | if (big_num) | 650 | total = avg_stats(&runtime_l1_icache_stats[cpu]); |
397 | fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); | 651 | |
652 | if (total) | ||
653 | ratio = avg / total * 100.0; | ||
654 | |||
655 | color = PERF_COLOR_NORMAL; | ||
656 | if (ratio > 20.0) | ||
657 | color = PERF_COLOR_RED; | ||
658 | else if (ratio > 10.0) | ||
659 | color = PERF_COLOR_MAGENTA; | ||
660 | else if (ratio > 5.0) | ||
661 | color = PERF_COLOR_YELLOW; | ||
662 | |||
663 | fprintf(stderr, " # "); | ||
664 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
665 | fprintf(stderr, " of all L1-icache hits "); | ||
666 | } | ||
667 | |||
668 | static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
669 | { | ||
670 | double total, ratio = 0.0; | ||
671 | const char *color; | ||
672 | |||
673 | total = avg_stats(&runtime_dtlb_cache_stats[cpu]); | ||
674 | |||
675 | if (total) | ||
676 | ratio = avg / total * 100.0; | ||
677 | |||
678 | color = PERF_COLOR_NORMAL; | ||
679 | if (ratio > 20.0) | ||
680 | color = PERF_COLOR_RED; | ||
681 | else if (ratio > 10.0) | ||
682 | color = PERF_COLOR_MAGENTA; | ||
683 | else if (ratio > 5.0) | ||
684 | color = PERF_COLOR_YELLOW; | ||
685 | |||
686 | fprintf(stderr, " # "); | ||
687 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
688 | fprintf(stderr, " of all dTLB cache hits "); | ||
689 | } | ||
690 | |||
691 | static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
692 | { | ||
693 | double total, ratio = 0.0; | ||
694 | const char *color; | ||
695 | |||
696 | total = avg_stats(&runtime_itlb_cache_stats[cpu]); | ||
697 | |||
698 | if (total) | ||
699 | ratio = avg / total * 100.0; | ||
700 | |||
701 | color = PERF_COLOR_NORMAL; | ||
702 | if (ratio > 20.0) | ||
703 | color = PERF_COLOR_RED; | ||
704 | else if (ratio > 10.0) | ||
705 | color = PERF_COLOR_MAGENTA; | ||
706 | else if (ratio > 5.0) | ||
707 | color = PERF_COLOR_YELLOW; | ||
708 | |||
709 | fprintf(stderr, " # "); | ||
710 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
711 | fprintf(stderr, " of all iTLB cache hits "); | ||
712 | } | ||
713 | |||
714 | static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
715 | { | ||
716 | double total, ratio = 0.0; | ||
717 | const char *color; | ||
718 | |||
719 | total = avg_stats(&runtime_ll_cache_stats[cpu]); | ||
720 | |||
721 | if (total) | ||
722 | ratio = avg / total * 100.0; | ||
723 | |||
724 | color = PERF_COLOR_NORMAL; | ||
725 | if (ratio > 20.0) | ||
726 | color = PERF_COLOR_RED; | ||
727 | else if (ratio > 10.0) | ||
728 | color = PERF_COLOR_MAGENTA; | ||
729 | else if (ratio > 5.0) | ||
730 | color = PERF_COLOR_YELLOW; | ||
731 | |||
732 | fprintf(stderr, " # "); | ||
733 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
734 | fprintf(stderr, " of all LL-cache hits "); | ||
735 | } | ||
736 | |||
737 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | ||
738 | { | ||
739 | double total, ratio = 0.0; | ||
740 | char cpustr[16] = { '\0', }; | ||
741 | const char *fmt; | ||
742 | |||
743 | if (csv_output) | ||
744 | fmt = "%s%.0f%s%s"; | ||
745 | else if (big_num) | ||
746 | fmt = "%s%'18.0f%s%-25s"; | ||
398 | else | 747 | else |
399 | fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); | 748 | fmt = "%s%18.0f%s%-25s"; |
400 | 749 | ||
401 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { | 750 | if (no_aggr) |
402 | total = avg_stats(&runtime_cycles_stats); | 751 | sprintf(cpustr, "CPU%*d%s", |
752 | csv_output ? 0 : -4, | ||
753 | evsel_list->cpus->map[cpu], csv_sep); | ||
754 | else | ||
755 | cpu = 0; | ||
756 | |||
757 | fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); | ||
758 | |||
759 | if (evsel->cgrp) | ||
760 | fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name); | ||
761 | |||
762 | if (csv_output) | ||
763 | return; | ||
764 | |||
765 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | ||
766 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
403 | 767 | ||
404 | if (total) | 768 | if (total) |
405 | ratio = avg / total; | 769 | ratio = avg / total; |
406 | 770 | ||
407 | fprintf(stderr, " # %10.3f IPC ", ratio); | 771 | fprintf(stderr, " # %5.2f insns per cycle ", ratio); |
408 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && | 772 | |
409 | runtime_branches_stats.n != 0) { | 773 | total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); |
410 | total = avg_stats(&runtime_branches_stats); | 774 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); |
775 | |||
776 | if (total && avg) { | ||
777 | ratio = total / avg; | ||
778 | fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); | ||
779 | } | ||
780 | |||
781 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | ||
782 | runtime_branches_stats[cpu].n != 0) { | ||
783 | print_branch_misses(cpu, evsel, avg); | ||
784 | } else if ( | ||
785 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
786 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
787 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
788 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
789 | runtime_l1_dcache_stats[cpu].n != 0) { | ||
790 | print_l1_dcache_misses(cpu, evsel, avg); | ||
791 | } else if ( | ||
792 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
793 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
794 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
795 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
796 | runtime_l1_icache_stats[cpu].n != 0) { | ||
797 | print_l1_icache_misses(cpu, evsel, avg); | ||
798 | } else if ( | ||
799 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
800 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
801 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
802 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
803 | runtime_dtlb_cache_stats[cpu].n != 0) { | ||
804 | print_dtlb_cache_misses(cpu, evsel, avg); | ||
805 | } else if ( | ||
806 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
807 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
808 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
809 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
810 | runtime_itlb_cache_stats[cpu].n != 0) { | ||
811 | print_itlb_cache_misses(cpu, evsel, avg); | ||
812 | } else if ( | ||
813 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
814 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
815 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
816 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
817 | runtime_ll_cache_stats[cpu].n != 0) { | ||
818 | print_ll_cache_misses(cpu, evsel, avg); | ||
819 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
820 | runtime_cacherefs_stats[cpu].n != 0) { | ||
821 | total = avg_stats(&runtime_cacherefs_stats[cpu]); | ||
411 | 822 | ||
412 | if (total) | 823 | if (total) |
413 | ratio = avg * 100 / total; | 824 | ratio = avg * 100 / total; |
414 | 825 | ||
415 | fprintf(stderr, " # %10.3f %% ", ratio); | 826 | fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); |
827 | |||
828 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
829 | print_stalled_cycles_frontend(cpu, evsel, avg); | ||
830 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
831 | print_stalled_cycles_backend(cpu, evsel, avg); | ||
832 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
833 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
834 | |||
835 | if (total) | ||
836 | ratio = 1.0 * avg / total; | ||
416 | 837 | ||
417 | } else if (runtime_nsecs_stats.n != 0) { | 838 | fprintf(stderr, " # %8.3f GHz ", ratio); |
418 | total = avg_stats(&runtime_nsecs_stats); | 839 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
840 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
419 | 841 | ||
420 | if (total) | 842 | if (total) |
421 | ratio = 1000.0 * avg / total; | 843 | ratio = 1000.0 * avg / total; |
422 | 844 | ||
423 | fprintf(stderr, " # %10.3f M/sec", ratio); | 845 | fprintf(stderr, " # %8.3f M/sec ", ratio); |
846 | } else { | ||
847 | fprintf(stderr, " "); | ||
424 | } | 848 | } |
425 | } | 849 | } |
426 | 850 | ||
427 | /* | 851 | /* |
428 | * Print out the results of a single counter: | 852 | * Print out the results of a single counter: |
853 | * aggregated counts in system-wide mode | ||
429 | */ | 854 | */ |
430 | static void print_counter(int counter) | 855 | static void print_counter_aggr(struct perf_evsel *counter) |
431 | { | 856 | { |
432 | double avg = avg_stats(&event_res_stats[counter][0]); | 857 | struct perf_stat *ps = counter->priv; |
433 | int scaled = event_scaled[counter]; | 858 | double avg = avg_stats(&ps->res_stats[0]); |
859 | int scaled = counter->counts->scaled; | ||
434 | 860 | ||
435 | if (scaled == -1) { | 861 | if (scaled == -1) { |
436 | fprintf(stderr, " %18s %-24s\n", | 862 | fprintf(stderr, "%*s%s%*s", |
437 | "<not counted>", event_name(counter)); | 863 | csv_output ? 0 : 18, |
864 | "<not counted>", | ||
865 | csv_sep, | ||
866 | csv_output ? 0 : -24, | ||
867 | event_name(counter)); | ||
868 | |||
869 | if (counter->cgrp) | ||
870 | fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name); | ||
871 | |||
872 | fputc('\n', stderr); | ||
438 | return; | 873 | return; |
439 | } | 874 | } |
440 | 875 | ||
441 | if (nsec_counter(counter)) | 876 | if (nsec_counter(counter)) |
442 | nsec_printout(counter, avg); | 877 | nsec_printout(-1, counter, avg); |
443 | else | 878 | else |
444 | abs_printout(counter, avg); | 879 | abs_printout(-1, counter, avg); |
880 | |||
881 | if (csv_output) { | ||
882 | fputc('\n', stderr); | ||
883 | return; | ||
884 | } | ||
445 | 885 | ||
446 | print_noise(counter, avg); | 886 | print_noise(counter, avg); |
447 | 887 | ||
448 | if (scaled) { | 888 | if (scaled) { |
449 | double avg_enabled, avg_running; | 889 | double avg_enabled, avg_running; |
450 | 890 | ||
451 | avg_enabled = avg_stats(&event_res_stats[counter][1]); | 891 | avg_enabled = avg_stats(&ps->res_stats[1]); |
452 | avg_running = avg_stats(&event_res_stats[counter][2]); | 892 | avg_running = avg_stats(&ps->res_stats[2]); |
453 | 893 | ||
454 | fprintf(stderr, " (scaled from %.2f%%)", | 894 | fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled); |
455 | 100 * avg_running / avg_enabled); | ||
456 | } | 895 | } |
457 | |||
458 | fprintf(stderr, "\n"); | 896 | fprintf(stderr, "\n"); |
459 | } | 897 | } |
460 | 898 | ||
899 | /* | ||
900 | * Print out the results of a single counter: | ||
901 | * does not use aggregated count in system-wide | ||
902 | */ | ||
903 | static void print_counter(struct perf_evsel *counter) | ||
904 | { | ||
905 | u64 ena, run, val; | ||
906 | int cpu; | ||
907 | |||
908 | for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { | ||
909 | val = counter->counts->cpu[cpu].val; | ||
910 | ena = counter->counts->cpu[cpu].ena; | ||
911 | run = counter->counts->cpu[cpu].run; | ||
912 | if (run == 0 || ena == 0) { | ||
913 | fprintf(stderr, "CPU%*d%s%*s%s%*s", | ||
914 | csv_output ? 0 : -4, | ||
915 | evsel_list->cpus->map[cpu], csv_sep, | ||
916 | csv_output ? 0 : 18, | ||
917 | "<not counted>", csv_sep, | ||
918 | csv_output ? 0 : -24, | ||
919 | event_name(counter)); | ||
920 | |||
921 | if (counter->cgrp) | ||
922 | fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name); | ||
923 | |||
924 | fputc('\n', stderr); | ||
925 | continue; | ||
926 | } | ||
927 | |||
928 | if (nsec_counter(counter)) | ||
929 | nsec_printout(cpu, counter, val); | ||
930 | else | ||
931 | abs_printout(cpu, counter, val); | ||
932 | |||
933 | if (!csv_output) { | ||
934 | print_noise(counter, 1.0); | ||
935 | |||
936 | if (run != ena) | ||
937 | fprintf(stderr, " (%.2f%%)", 100.0 * run / ena); | ||
938 | } | ||
939 | fputc('\n', stderr); | ||
940 | } | ||
941 | } | ||
942 | |||
461 | static void print_stat(int argc, const char **argv) | 943 | static void print_stat(int argc, const char **argv) |
462 | { | 944 | { |
463 | int i, counter; | 945 | struct perf_evsel *counter; |
946 | int i; | ||
464 | 947 | ||
465 | fflush(stdout); | 948 | fflush(stdout); |
466 | 949 | ||
467 | fprintf(stderr, "\n"); | 950 | if (!csv_output) { |
468 | fprintf(stderr, " Performance counter stats for "); | 951 | fprintf(stderr, "\n"); |
469 | if(target_pid == -1 && target_tid == -1) { | 952 | fprintf(stderr, " Performance counter stats for "); |
470 | fprintf(stderr, "\'%s", argv[0]); | 953 | if(target_pid == -1 && target_tid == -1) { |
471 | for (i = 1; i < argc; i++) | 954 | fprintf(stderr, "\'%s", argv[0]); |
472 | fprintf(stderr, " %s", argv[i]); | 955 | for (i = 1; i < argc; i++) |
473 | } else if (target_pid != -1) | 956 | fprintf(stderr, " %s", argv[i]); |
474 | fprintf(stderr, "process id \'%d", target_pid); | 957 | } else if (target_pid != -1) |
475 | else | 958 | fprintf(stderr, "process id \'%d", target_pid); |
476 | fprintf(stderr, "thread id \'%d", target_tid); | 959 | else |
477 | 960 | fprintf(stderr, "thread id \'%d", target_tid); | |
478 | fprintf(stderr, "\'"); | 961 | |
479 | if (run_count > 1) | 962 | fprintf(stderr, "\'"); |
480 | fprintf(stderr, " (%d runs)", run_count); | 963 | if (run_count > 1) |
481 | fprintf(stderr, ":\n\n"); | 964 | fprintf(stderr, " (%d runs)", run_count); |
965 | fprintf(stderr, ":\n\n"); | ||
966 | } | ||
482 | 967 | ||
483 | for (counter = 0; counter < nr_counters; counter++) | 968 | if (no_aggr) { |
484 | print_counter(counter); | 969 | list_for_each_entry(counter, &evsel_list->entries, node) |
970 | print_counter(counter); | ||
971 | } else { | ||
972 | list_for_each_entry(counter, &evsel_list->entries, node) | ||
973 | print_counter_aggr(counter); | ||
974 | } | ||
485 | 975 | ||
486 | fprintf(stderr, "\n"); | 976 | if (!csv_output) { |
487 | fprintf(stderr, " %18.9f seconds time elapsed", | 977 | if (!null_run) |
488 | avg_stats(&walltime_nsecs_stats)/1e9); | 978 | fprintf(stderr, "\n"); |
489 | if (run_count > 1) { | 979 | fprintf(stderr, " %17.9f seconds time elapsed", |
490 | fprintf(stderr, " ( +- %7.3f%% )", | 980 | avg_stats(&walltime_nsecs_stats)/1e9); |
491 | 100*stddev_stats(&walltime_nsecs_stats) / | 981 | if (run_count > 1) { |
492 | avg_stats(&walltime_nsecs_stats)); | 982 | fprintf(stderr, " "); |
983 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), | ||
984 | avg_stats(&walltime_nsecs_stats)); | ||
985 | } | ||
986 | fprintf(stderr, "\n\n"); | ||
493 | } | 987 | } |
494 | fprintf(stderr, "\n\n"); | ||
495 | } | 988 | } |
496 | 989 | ||
497 | static volatile int signr = -1; | 990 | static volatile int signr = -1; |
@@ -521,10 +1014,19 @@ static const char * const stat_usage[] = { | |||
521 | NULL | 1014 | NULL |
522 | }; | 1015 | }; |
523 | 1016 | ||
1017 | static int stat__set_big_num(const struct option *opt __used, | ||
1018 | const char *s __used, int unset) | ||
1019 | { | ||
1020 | big_num_opt = unset ? 0 : 1; | ||
1021 | return 0; | ||
1022 | } | ||
1023 | |||
524 | static const struct option options[] = { | 1024 | static const struct option options[] = { |
525 | OPT_CALLBACK('e', "event", NULL, "event", | 1025 | OPT_CALLBACK('e', "event", &evsel_list, "event", |
526 | "event selector. use 'perf list' to list available events", | 1026 | "event selector. use 'perf list' to list available events", |
527 | parse_events), | 1027 | parse_events), |
1028 | OPT_CALLBACK(0, "filter", &evsel_list, "filter", | ||
1029 | "event filter", parse_filter), | ||
528 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, | 1030 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
529 | "child tasks do not inherit counters"), | 1031 | "child tasks do not inherit counters"), |
530 | OPT_INTEGER('p', "pid", &target_pid, | 1032 | OPT_INTEGER('p', "pid", &target_pid, |
@@ -541,64 +1043,162 @@ static const struct option options[] = { | |||
541 | "repeat command and print average + stddev (max: 100)"), | 1043 | "repeat command and print average + stddev (max: 100)"), |
542 | OPT_BOOLEAN('n', "null", &null_run, | 1044 | OPT_BOOLEAN('n', "null", &null_run, |
543 | "null run - dont start any counters"), | 1045 | "null run - dont start any counters"), |
544 | OPT_BOOLEAN('B', "big-num", &big_num, | 1046 | OPT_INCR('d', "detailed", &detailed_run, |
545 | "print large numbers with thousands\' separators"), | 1047 | "detailed run - start a lot of events"), |
1048 | OPT_BOOLEAN('S', "sync", &sync_run, | ||
1049 | "call sync() before starting a run"), | ||
1050 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, | ||
1051 | "print large numbers with thousands\' separators", | ||
1052 | stat__set_big_num), | ||
546 | OPT_STRING('C', "cpu", &cpu_list, "cpu", | 1053 | OPT_STRING('C', "cpu", &cpu_list, "cpu", |
547 | "list of cpus to monitor in system-wide"), | 1054 | "list of cpus to monitor in system-wide"), |
1055 | OPT_BOOLEAN('A', "no-aggr", &no_aggr, | ||
1056 | "disable CPU count aggregation"), | ||
1057 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | ||
1058 | "print counts with custom separator"), | ||
1059 | OPT_CALLBACK('G', "cgroup", &evsel_list, "name", | ||
1060 | "monitor event in cgroup name only", | ||
1061 | parse_cgroups), | ||
548 | OPT_END() | 1062 | OPT_END() |
549 | }; | 1063 | }; |
550 | 1064 | ||
1065 | /* | ||
1066 | * Add default attributes, if there were no attributes specified or | ||
1067 | * if -d/--detailed, -d -d or -d -d -d is used: | ||
1068 | */ | ||
1069 | static int add_default_attributes(void) | ||
1070 | { | ||
1071 | struct perf_evsel *pos; | ||
1072 | size_t attr_nr = 0; | ||
1073 | size_t c; | ||
1074 | |||
1075 | /* Set attrs if no event is selected and !null_run: */ | ||
1076 | if (null_run) | ||
1077 | return 0; | ||
1078 | |||
1079 | if (!evsel_list->nr_entries) { | ||
1080 | for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { | ||
1081 | pos = perf_evsel__new(default_attrs + c, c + attr_nr); | ||
1082 | if (pos == NULL) | ||
1083 | return -1; | ||
1084 | perf_evlist__add(evsel_list, pos); | ||
1085 | } | ||
1086 | attr_nr += c; | ||
1087 | } | ||
1088 | |||
1089 | /* Detailed events get appended to the event list: */ | ||
1090 | |||
1091 | if (detailed_run < 1) | ||
1092 | return 0; | ||
1093 | |||
1094 | /* Append detailed run extra attributes: */ | ||
1095 | for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { | ||
1096 | pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); | ||
1097 | if (pos == NULL) | ||
1098 | return -1; | ||
1099 | perf_evlist__add(evsel_list, pos); | ||
1100 | } | ||
1101 | attr_nr += c; | ||
1102 | |||
1103 | if (detailed_run < 2) | ||
1104 | return 0; | ||
1105 | |||
1106 | /* Append very detailed run extra attributes: */ | ||
1107 | for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { | ||
1108 | pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); | ||
1109 | if (pos == NULL) | ||
1110 | return -1; | ||
1111 | perf_evlist__add(evsel_list, pos); | ||
1112 | } | ||
1113 | |||
1114 | if (detailed_run < 3) | ||
1115 | return 0; | ||
1116 | |||
1117 | /* Append very, very detailed run extra attributes: */ | ||
1118 | for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { | ||
1119 | pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); | ||
1120 | if (pos == NULL) | ||
1121 | return -1; | ||
1122 | perf_evlist__add(evsel_list, pos); | ||
1123 | } | ||
1124 | |||
1125 | |||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
551 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 1129 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
552 | { | 1130 | { |
553 | int status; | 1131 | struct perf_evsel *pos; |
554 | int i,j; | 1132 | int status = -ENOMEM; |
555 | 1133 | ||
556 | setlocale(LC_ALL, ""); | 1134 | setlocale(LC_ALL, ""); |
557 | 1135 | ||
1136 | evsel_list = perf_evlist__new(NULL, NULL); | ||
1137 | if (evsel_list == NULL) | ||
1138 | return -ENOMEM; | ||
1139 | |||
558 | argc = parse_options(argc, argv, options, stat_usage, | 1140 | argc = parse_options(argc, argv, options, stat_usage, |
559 | PARSE_OPT_STOP_AT_NON_OPTION); | 1141 | PARSE_OPT_STOP_AT_NON_OPTION); |
1142 | |||
1143 | if (csv_sep) | ||
1144 | csv_output = true; | ||
1145 | else | ||
1146 | csv_sep = DEFAULT_SEPARATOR; | ||
1147 | |||
1148 | /* | ||
1149 | * let the spreadsheet do the pretty-printing | ||
1150 | */ | ||
1151 | if (csv_output) { | ||
1152 | /* User explicitely passed -B? */ | ||
1153 | if (big_num_opt == 1) { | ||
1154 | fprintf(stderr, "-B option not supported with -x\n"); | ||
1155 | usage_with_options(stat_usage, options); | ||
1156 | } else /* Nope, so disable big number formatting */ | ||
1157 | big_num = false; | ||
1158 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | ||
1159 | big_num = false; | ||
1160 | |||
560 | if (!argc && target_pid == -1 && target_tid == -1) | 1161 | if (!argc && target_pid == -1 && target_tid == -1) |
561 | usage_with_options(stat_usage, options); | 1162 | usage_with_options(stat_usage, options); |
562 | if (run_count <= 0) | 1163 | if (run_count <= 0) |
563 | usage_with_options(stat_usage, options); | 1164 | usage_with_options(stat_usage, options); |
564 | 1165 | ||
565 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 1166 | /* no_aggr, cgroup are for system-wide only */ |
566 | if (!null_run && !nr_counters) { | 1167 | if ((no_aggr || nr_cgroups) && !system_wide) { |
567 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | 1168 | fprintf(stderr, "both cgroup and no-aggregation " |
568 | nr_counters = ARRAY_SIZE(default_attrs); | 1169 | "modes only available in system-wide mode\n"); |
1170 | |||
1171 | usage_with_options(stat_usage, options); | ||
569 | } | 1172 | } |
570 | 1173 | ||
571 | if (system_wide) | 1174 | if (add_default_attributes()) |
572 | nr_cpus = read_cpu_map(cpu_list); | 1175 | goto out; |
573 | else | ||
574 | nr_cpus = 1; | ||
575 | 1176 | ||
576 | if (nr_cpus < 1) | 1177 | if (target_pid != -1) |
1178 | target_tid = target_pid; | ||
1179 | |||
1180 | evsel_list->threads = thread_map__new(target_pid, target_tid); | ||
1181 | if (evsel_list->threads == NULL) { | ||
1182 | pr_err("Problems finding threads of monitor\n"); | ||
577 | usage_with_options(stat_usage, options); | 1183 | usage_with_options(stat_usage, options); |
1184 | } | ||
578 | 1185 | ||
579 | if (target_pid != -1) { | 1186 | if (system_wide) |
580 | target_tid = target_pid; | 1187 | evsel_list->cpus = cpu_map__new(cpu_list); |
581 | thread_num = find_all_tid(target_pid, &all_tids); | 1188 | else |
582 | if (thread_num <= 0) { | 1189 | evsel_list->cpus = cpu_map__dummy_new(); |
583 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
584 | target_pid); | ||
585 | usage_with_options(stat_usage, options); | ||
586 | } | ||
587 | } else { | ||
588 | all_tids=malloc(sizeof(pid_t)); | ||
589 | if (!all_tids) | ||
590 | return -ENOMEM; | ||
591 | 1190 | ||
592 | all_tids[0] = target_tid; | 1191 | if (evsel_list->cpus == NULL) { |
593 | thread_num = 1; | 1192 | perror("failed to parse CPUs map"); |
1193 | usage_with_options(stat_usage, options); | ||
1194 | return -1; | ||
594 | } | 1195 | } |
595 | 1196 | ||
596 | for (i = 0; i < MAX_NR_CPUS; i++) { | 1197 | list_for_each_entry(pos, &evsel_list->entries, node) { |
597 | for (j = 0; j < MAX_COUNTERS; j++) { | 1198 | if (perf_evsel__alloc_stat_priv(pos) < 0 || |
598 | fd[i][j] = malloc(sizeof(int)*thread_num); | 1199 | perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 || |
599 | if (!fd[i][j]) | 1200 | perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0) |
600 | return -ENOMEM; | 1201 | goto out_free_fd; |
601 | } | ||
602 | } | 1202 | } |
603 | 1203 | ||
604 | /* | 1204 | /* |
@@ -616,11 +1216,20 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
616 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 1216 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
617 | if (run_count != 1 && verbose) | 1217 | if (run_count != 1 && verbose) |
618 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); | 1218 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
1219 | |||
1220 | if (sync_run) | ||
1221 | sync(); | ||
1222 | |||
619 | status = run_perf_stat(argc, argv); | 1223 | status = run_perf_stat(argc, argv); |
620 | } | 1224 | } |
621 | 1225 | ||
622 | if (status != -1) | 1226 | if (status != -1) |
623 | print_stat(argc, argv); | 1227 | print_stat(argc, argv); |
624 | 1228 | out_free_fd: | |
1229 | list_for_each_entry(pos, &evsel_list->entries, node) | ||
1230 | perf_evsel__free_stat_priv(pos); | ||
1231 | perf_evlist__delete_maps(evsel_list); | ||
1232 | out: | ||
1233 | perf_evlist__delete(evsel_list); | ||
625 | return status; | 1234 | return status; |
626 | } | 1235 | } |