diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 368 |
1 files changed, 149 insertions, 219 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7ff746da7e6c..02b2d8013a61 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "util/parse-options.h" | 43 | #include "util/parse-options.h" |
44 | #include "util/parse-events.h" | 44 | #include "util/parse-events.h" |
45 | #include "util/event.h" | 45 | #include "util/event.h" |
46 | #include "util/evsel.h" | ||
46 | #include "util/debug.h" | 47 | #include "util/debug.h" |
47 | #include "util/header.h" | 48 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 49 | #include "util/cpumap.h" |
@@ -71,7 +72,7 @@ static struct perf_event_attr default_attrs[] = { | |||
71 | }; | 72 | }; |
72 | 73 | ||
73 | static bool system_wide = false; | 74 | static bool system_wide = false; |
74 | static int nr_cpus = 0; | 75 | static struct cpu_map *cpus; |
75 | static int run_idx = 0; | 76 | static int run_idx = 0; |
76 | 77 | ||
77 | static int run_count = 1; | 78 | static int run_count = 1; |
@@ -80,8 +81,7 @@ static bool scale = true; | |||
80 | static bool no_aggr = false; | 81 | static bool no_aggr = false; |
81 | static pid_t target_pid = -1; | 82 | static pid_t target_pid = -1; |
82 | static pid_t target_tid = -1; | 83 | static pid_t target_tid = -1; |
83 | static pid_t *all_tids = NULL; | 84 | static struct thread_map *threads; |
84 | static int thread_num = 0; | ||
85 | static pid_t child_pid = -1; | 85 | static pid_t child_pid = -1; |
86 | static bool null_run = false; | 86 | static bool null_run = false; |
87 | static bool big_num = true; | 87 | static bool big_num = true; |
@@ -90,17 +90,6 @@ static const char *cpu_list; | |||
90 | static const char *csv_sep = NULL; | 90 | static const char *csv_sep = NULL; |
91 | static bool csv_output = false; | 91 | static bool csv_output = false; |
92 | 92 | ||
93 | |||
94 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
95 | |||
96 | static int event_scaled[MAX_COUNTERS]; | ||
97 | |||
98 | static struct { | ||
99 | u64 val; | ||
100 | u64 ena; | ||
101 | u64 run; | ||
102 | } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS]; | ||
103 | |||
104 | static volatile int done = 0; | 93 | static volatile int done = 0; |
105 | 94 | ||
106 | struct stats | 95 | struct stats |
@@ -108,6 +97,22 @@ struct stats | |||
108 | double n, mean, M2; | 97 | double n, mean, M2; |
109 | }; | 98 | }; |
110 | 99 | ||
100 | struct perf_stat { | ||
101 | struct stats res_stats[3]; | ||
102 | }; | ||
103 | |||
104 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | ||
105 | { | ||
106 | evsel->priv = zalloc(sizeof(struct perf_stat)); | ||
107 | return evsel->priv == NULL ? -ENOMEM : 0; | ||
108 | } | ||
109 | |||
110 | static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) | ||
111 | { | ||
112 | free(evsel->priv); | ||
113 | evsel->priv = NULL; | ||
114 | } | ||
115 | |||
111 | static void update_stats(struct stats *stats, u64 val) | 116 | static void update_stats(struct stats *stats, u64 val) |
112 | { | 117 | { |
113 | double delta; | 118 | double delta; |
@@ -147,75 +152,38 @@ static double stddev_stats(struct stats *stats) | |||
147 | return sqrt(variance_mean); | 152 | return sqrt(variance_mean); |
148 | } | 153 | } |
149 | 154 | ||
150 | struct stats event_res_stats[MAX_COUNTERS][3]; | ||
151 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 155 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
152 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; | 156 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
153 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 157 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
154 | struct stats walltime_nsecs_stats; | 158 | struct stats walltime_nsecs_stats; |
155 | 159 | ||
156 | #define MATCH_EVENT(t, c, counter) \ | 160 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
157 | (attrs[counter].type == PERF_TYPE_##t && \ | ||
158 | attrs[counter].config == PERF_COUNT_##c) | ||
159 | |||
160 | #define ERR_PERF_OPEN \ | ||
161 | "counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information." | ||
162 | |||
163 | static int create_perf_stat_counter(int counter, bool *perm_err) | ||
164 | { | 161 | { |
165 | struct perf_event_attr *attr = attrs + counter; | 162 | struct perf_event_attr *attr = &evsel->attr; |
166 | int thread; | ||
167 | int ncreated = 0; | ||
168 | 163 | ||
169 | if (scale) | 164 | if (scale) |
170 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 165 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
171 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 166 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
172 | 167 | ||
173 | if (system_wide) { | 168 | if (system_wide) |
174 | int cpu; | 169 | return perf_evsel__open_per_cpu(evsel, cpus); |
175 | 170 | ||
176 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 171 | attr->inherit = !no_inherit; |
177 | fd[cpu][counter][0] = sys_perf_event_open(attr, | 172 | if (target_pid == -1 && target_tid == -1) { |
178 | -1, cpumap[cpu], -1, 0); | 173 | attr->disabled = 1; |
179 | if (fd[cpu][counter][0] < 0) { | 174 | attr->enable_on_exec = 1; |
180 | if (errno == EPERM || errno == EACCES) | ||
181 | *perm_err = true; | ||
182 | error(ERR_PERF_OPEN, counter, | ||
183 | fd[cpu][counter][0], strerror(errno)); | ||
184 | } else { | ||
185 | ++ncreated; | ||
186 | } | ||
187 | } | ||
188 | } else { | ||
189 | attr->inherit = !no_inherit; | ||
190 | if (target_pid == -1 && target_tid == -1) { | ||
191 | attr->disabled = 1; | ||
192 | attr->enable_on_exec = 1; | ||
193 | } | ||
194 | for (thread = 0; thread < thread_num; thread++) { | ||
195 | fd[0][counter][thread] = sys_perf_event_open(attr, | ||
196 | all_tids[thread], -1, -1, 0); | ||
197 | if (fd[0][counter][thread] < 0) { | ||
198 | if (errno == EPERM || errno == EACCES) | ||
199 | *perm_err = true; | ||
200 | error(ERR_PERF_OPEN, counter, | ||
201 | fd[0][counter][thread], | ||
202 | strerror(errno)); | ||
203 | } else { | ||
204 | ++ncreated; | ||
205 | } | ||
206 | } | ||
207 | } | 175 | } |
208 | 176 | ||
209 | return ncreated; | 177 | return perf_evsel__open_per_thread(evsel, threads); |
210 | } | 178 | } |
211 | 179 | ||
212 | /* | 180 | /* |
213 | * Does the counter have nsecs as a unit? | 181 | * Does the counter have nsecs as a unit? |
214 | */ | 182 | */ |
215 | static inline int nsec_counter(int counter) | 183 | static inline int nsec_counter(struct perf_evsel *evsel) |
216 | { | 184 | { |
217 | if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || | 185 | if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || |
218 | MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 186 | perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
219 | return 1; | 187 | return 1; |
220 | 188 | ||
221 | return 0; | 189 | return 0; |
@@ -225,54 +193,17 @@ static inline int nsec_counter(int counter) | |||
225 | * Read out the results of a single counter: | 193 | * Read out the results of a single counter: |
226 | * aggregate counts across CPUs in system-wide mode | 194 | * aggregate counts across CPUs in system-wide mode |
227 | */ | 195 | */ |
228 | static void read_counter_aggr(int counter) | 196 | static int read_counter_aggr(struct perf_evsel *counter) |
229 | { | 197 | { |
230 | u64 count[3], single_count[3]; | 198 | struct perf_stat *ps = counter->priv; |
231 | int cpu; | 199 | u64 *count = counter->counts->aggr.values; |
232 | size_t res, nv; | 200 | int i; |
233 | int scaled; | ||
234 | int i, thread; | ||
235 | 201 | ||
236 | count[0] = count[1] = count[2] = 0; | 202 | if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0) |
237 | 203 | return -1; | |
238 | nv = scale ? 3 : 1; | ||
239 | for (cpu = 0; cpu < nr_cpus; cpu++) { | ||
240 | for (thread = 0; thread < thread_num; thread++) { | ||
241 | if (fd[cpu][counter][thread] < 0) | ||
242 | continue; | ||
243 | |||
244 | res = read(fd[cpu][counter][thread], | ||
245 | single_count, nv * sizeof(u64)); | ||
246 | assert(res == nv * sizeof(u64)); | ||
247 | |||
248 | close(fd[cpu][counter][thread]); | ||
249 | fd[cpu][counter][thread] = -1; | ||
250 | |||
251 | count[0] += single_count[0]; | ||
252 | if (scale) { | ||
253 | count[1] += single_count[1]; | ||
254 | count[2] += single_count[2]; | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | |||
259 | scaled = 0; | ||
260 | if (scale) { | ||
261 | if (count[2] == 0) { | ||
262 | event_scaled[counter] = -1; | ||
263 | count[0] = 0; | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | if (count[2] < count[1]) { | ||
268 | event_scaled[counter] = 1; | ||
269 | count[0] = (unsigned long long) | ||
270 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
271 | } | ||
272 | } | ||
273 | 204 | ||
274 | for (i = 0; i < 3; i++) | 205 | for (i = 0; i < 3; i++) |
275 | update_stats(&event_res_stats[counter][i], count[i]); | 206 | update_stats(&ps->res_stats[i], count[i]); |
276 | 207 | ||
277 | if (verbose) { | 208 | if (verbose) { |
278 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), | 209 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), |
@@ -282,74 +213,51 @@ static void read_counter_aggr(int counter) | |||
282 | /* | 213 | /* |
283 | * Save the full runtime - to allow normalization during printout: | 214 | * Save the full runtime - to allow normalization during printout: |
284 | */ | 215 | */ |
285 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 216 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) |
286 | update_stats(&runtime_nsecs_stats[0], count[0]); | 217 | update_stats(&runtime_nsecs_stats[0], count[0]); |
287 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 218 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
288 | update_stats(&runtime_cycles_stats[0], count[0]); | 219 | update_stats(&runtime_cycles_stats[0], count[0]); |
289 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | 220 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) |
290 | update_stats(&runtime_branches_stats[0], count[0]); | 221 | update_stats(&runtime_branches_stats[0], count[0]); |
222 | |||
223 | return 0; | ||
291 | } | 224 | } |
292 | 225 | ||
293 | /* | 226 | /* |
294 | * Read out the results of a single counter: | 227 | * Read out the results of a single counter: |
295 | * do not aggregate counts across CPUs in system-wide mode | 228 | * do not aggregate counts across CPUs in system-wide mode |
296 | */ | 229 | */ |
297 | static void read_counter(int counter) | 230 | static int read_counter(struct perf_evsel *counter) |
298 | { | 231 | { |
299 | u64 count[3]; | 232 | u64 *count; |
300 | int cpu; | 233 | int cpu; |
301 | size_t res, nv; | ||
302 | |||
303 | count[0] = count[1] = count[2] = 0; | ||
304 | |||
305 | nv = scale ? 3 : 1; | ||
306 | |||
307 | for (cpu = 0; cpu < nr_cpus; cpu++) { | ||
308 | |||
309 | if (fd[cpu][counter][0] < 0) | ||
310 | continue; | ||
311 | |||
312 | res = read(fd[cpu][counter][0], count, nv * sizeof(u64)); | ||
313 | |||
314 | assert(res == nv * sizeof(u64)); | ||
315 | 234 | ||
316 | close(fd[cpu][counter][0]); | 235 | for (cpu = 0; cpu < cpus->nr; cpu++) { |
317 | fd[cpu][counter][0] = -1; | 236 | if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) |
237 | return -1; | ||
318 | 238 | ||
319 | if (scale) { | 239 | count = counter->counts->cpu[cpu].values; |
320 | if (count[2] == 0) { | ||
321 | count[0] = 0; | ||
322 | } else if (count[2] < count[1]) { | ||
323 | count[0] = (unsigned long long) | ||
324 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
325 | } | ||
326 | } | ||
327 | cpu_counts[cpu][counter].val = count[0]; /* scaled count */ | ||
328 | cpu_counts[cpu][counter].ena = count[1]; | ||
329 | cpu_counts[cpu][counter].run = count[2]; | ||
330 | 240 | ||
331 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 241 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) |
332 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | 242 | update_stats(&runtime_nsecs_stats[cpu], count[0]); |
333 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 243 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
334 | update_stats(&runtime_cycles_stats[cpu], count[0]); | 244 | update_stats(&runtime_cycles_stats[cpu], count[0]); |
335 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | 245 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) |
336 | update_stats(&runtime_branches_stats[cpu], count[0]); | 246 | update_stats(&runtime_branches_stats[cpu], count[0]); |
337 | } | 247 | } |
248 | |||
249 | return 0; | ||
338 | } | 250 | } |
339 | 251 | ||
340 | static int run_perf_stat(int argc __used, const char **argv) | 252 | static int run_perf_stat(int argc __used, const char **argv) |
341 | { | 253 | { |
342 | unsigned long long t0, t1; | 254 | unsigned long long t0, t1; |
255 | struct perf_evsel *counter; | ||
343 | int status = 0; | 256 | int status = 0; |
344 | int counter, ncreated = 0; | ||
345 | int child_ready_pipe[2], go_pipe[2]; | 257 | int child_ready_pipe[2], go_pipe[2]; |
346 | bool perm_err = false; | ||
347 | const bool forks = (argc > 0); | 258 | const bool forks = (argc > 0); |
348 | char buf; | 259 | char buf; |
349 | 260 | ||
350 | if (!system_wide) | ||
351 | nr_cpus = 1; | ||
352 | |||
353 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | 261 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
354 | perror("failed to create pipes"); | 262 | perror("failed to create pipes"); |
355 | exit(1); | 263 | exit(1); |
@@ -389,7 +297,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
389 | } | 297 | } |
390 | 298 | ||
391 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 299 | if (target_tid == -1 && target_pid == -1 && !system_wide) |
392 | all_tids[0] = child_pid; | 300 | threads->map[0] = child_pid; |
393 | 301 | ||
394 | /* | 302 | /* |
395 | * Wait for the child to be ready to exec. | 303 | * Wait for the child to be ready to exec. |
@@ -401,19 +309,23 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
401 | close(child_ready_pipe[0]); | 309 | close(child_ready_pipe[0]); |
402 | } | 310 | } |
403 | 311 | ||
404 | for (counter = 0; counter < nr_counters; counter++) | 312 | list_for_each_entry(counter, &evsel_list, node) { |
405 | ncreated += create_perf_stat_counter(counter, &perm_err); | 313 | if (create_perf_stat_counter(counter) < 0) { |
406 | 314 | if (errno == -EPERM || errno == -EACCES) { | |
407 | if (ncreated < nr_counters) { | 315 | error("You may not have permission to collect %sstats.\n" |
408 | if (perm_err) | 316 | "\t Consider tweaking" |
409 | error("You may not have permission to collect %sstats.\n" | 317 | " /proc/sys/kernel/perf_event_paranoid or running as root.", |
410 | "\t Consider tweaking" | 318 | system_wide ? "system-wide " : ""); |
411 | " /proc/sys/kernel/perf_event_paranoid or running as root.", | 319 | } else { |
412 | system_wide ? "system-wide " : ""); | 320 | error("open_counter returned with %d (%s). " |
413 | die("Not all events could be opened.\n"); | 321 | "/bin/dmesg may provide additional information.\n", |
414 | if (child_pid != -1) | 322 | errno, strerror(errno)); |
415 | kill(child_pid, SIGTERM); | 323 | } |
416 | return -1; | 324 | if (child_pid != -1) |
325 | kill(child_pid, SIGTERM); | ||
326 | die("Not all events could be opened.\n"); | ||
327 | return -1; | ||
328 | } | ||
417 | } | 329 | } |
418 | 330 | ||
419 | /* | 331 | /* |
@@ -433,25 +345,33 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
433 | update_stats(&walltime_nsecs_stats, t1 - t0); | 345 | update_stats(&walltime_nsecs_stats, t1 - t0); |
434 | 346 | ||
435 | if (no_aggr) { | 347 | if (no_aggr) { |
436 | for (counter = 0; counter < nr_counters; counter++) | 348 | list_for_each_entry(counter, &evsel_list, node) { |
437 | read_counter(counter); | 349 | read_counter(counter); |
350 | perf_evsel__close_fd(counter, cpus->nr, 1); | ||
351 | } | ||
438 | } else { | 352 | } else { |
439 | for (counter = 0; counter < nr_counters; counter++) | 353 | list_for_each_entry(counter, &evsel_list, node) { |
440 | read_counter_aggr(counter); | 354 | read_counter_aggr(counter); |
355 | perf_evsel__close_fd(counter, cpus->nr, threads->nr); | ||
356 | } | ||
441 | } | 357 | } |
358 | |||
442 | return WEXITSTATUS(status); | 359 | return WEXITSTATUS(status); |
443 | } | 360 | } |
444 | 361 | ||
445 | static void print_noise(int counter, double avg) | 362 | static void print_noise(struct perf_evsel *evsel, double avg) |
446 | { | 363 | { |
364 | struct perf_stat *ps; | ||
365 | |||
447 | if (run_count == 1) | 366 | if (run_count == 1) |
448 | return; | 367 | return; |
449 | 368 | ||
369 | ps = evsel->priv; | ||
450 | fprintf(stderr, " ( +- %7.3f%% )", | 370 | fprintf(stderr, " ( +- %7.3f%% )", |
451 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); | 371 | 100 * stddev_stats(&ps->res_stats[0]) / avg); |
452 | } | 372 | } |
453 | 373 | ||
454 | static void nsec_printout(int cpu, int counter, double avg) | 374 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) |
455 | { | 375 | { |
456 | double msecs = avg / 1e6; | 376 | double msecs = avg / 1e6; |
457 | char cpustr[16] = { '\0', }; | 377 | char cpustr[16] = { '\0', }; |
@@ -460,20 +380,19 @@ static void nsec_printout(int cpu, int counter, double avg) | |||
460 | if (no_aggr) | 380 | if (no_aggr) |
461 | sprintf(cpustr, "CPU%*d%s", | 381 | sprintf(cpustr, "CPU%*d%s", |
462 | csv_output ? 0 : -4, | 382 | csv_output ? 0 : -4, |
463 | cpumap[cpu], csv_sep); | 383 | cpus->map[cpu], csv_sep); |
464 | 384 | ||
465 | fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter)); | 385 | fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); |
466 | 386 | ||
467 | if (csv_output) | 387 | if (csv_output) |
468 | return; | 388 | return; |
469 | 389 | ||
470 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | 390 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
471 | fprintf(stderr, " # %10.3f CPUs ", | 391 | fprintf(stderr, " # %10.3f CPUs ", |
472 | avg / avg_stats(&walltime_nsecs_stats)); | 392 | avg / avg_stats(&walltime_nsecs_stats)); |
473 | } | ||
474 | } | 393 | } |
475 | 394 | ||
476 | static void abs_printout(int cpu, int counter, double avg) | 395 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
477 | { | 396 | { |
478 | double total, ratio = 0.0; | 397 | double total, ratio = 0.0; |
479 | char cpustr[16] = { '\0', }; | 398 | char cpustr[16] = { '\0', }; |
@@ -489,23 +408,23 @@ static void abs_printout(int cpu, int counter, double avg) | |||
489 | if (no_aggr) | 408 | if (no_aggr) |
490 | sprintf(cpustr, "CPU%*d%s", | 409 | sprintf(cpustr, "CPU%*d%s", |
491 | csv_output ? 0 : -4, | 410 | csv_output ? 0 : -4, |
492 | cpumap[cpu], csv_sep); | 411 | cpus->map[cpu], csv_sep); |
493 | else | 412 | else |
494 | cpu = 0; | 413 | cpu = 0; |
495 | 414 | ||
496 | fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter)); | 415 | fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); |
497 | 416 | ||
498 | if (csv_output) | 417 | if (csv_output) |
499 | return; | 418 | return; |
500 | 419 | ||
501 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { | 420 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { |
502 | total = avg_stats(&runtime_cycles_stats[cpu]); | 421 | total = avg_stats(&runtime_cycles_stats[cpu]); |
503 | 422 | ||
504 | if (total) | 423 | if (total) |
505 | ratio = avg / total; | 424 | ratio = avg / total; |
506 | 425 | ||
507 | fprintf(stderr, " # %10.3f IPC ", ratio); | 426 | fprintf(stderr, " # %10.3f IPC ", ratio); |
508 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && | 427 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && |
509 | runtime_branches_stats[cpu].n != 0) { | 428 | runtime_branches_stats[cpu].n != 0) { |
510 | total = avg_stats(&runtime_branches_stats[cpu]); | 429 | total = avg_stats(&runtime_branches_stats[cpu]); |
511 | 430 | ||
@@ -528,10 +447,11 @@ static void abs_printout(int cpu, int counter, double avg) | |||
528 | * Print out the results of a single counter: | 447 | * Print out the results of a single counter: |
529 | * aggregated counts in system-wide mode | 448 | * aggregated counts in system-wide mode |
530 | */ | 449 | */ |
531 | static void print_counter_aggr(int counter) | 450 | static void print_counter_aggr(struct perf_evsel *counter) |
532 | { | 451 | { |
533 | double avg = avg_stats(&event_res_stats[counter][0]); | 452 | struct perf_stat *ps = counter->priv; |
534 | int scaled = event_scaled[counter]; | 453 | double avg = avg_stats(&ps->res_stats[0]); |
454 | int scaled = counter->counts->scaled; | ||
535 | 455 | ||
536 | if (scaled == -1) { | 456 | if (scaled == -1) { |
537 | fprintf(stderr, "%*s%s%-24s\n", | 457 | fprintf(stderr, "%*s%s%-24s\n", |
@@ -555,8 +475,8 @@ static void print_counter_aggr(int counter) | |||
555 | if (scaled) { | 475 | if (scaled) { |
556 | double avg_enabled, avg_running; | 476 | double avg_enabled, avg_running; |
557 | 477 | ||
558 | avg_enabled = avg_stats(&event_res_stats[counter][1]); | 478 | avg_enabled = avg_stats(&ps->res_stats[1]); |
559 | avg_running = avg_stats(&event_res_stats[counter][2]); | 479 | avg_running = avg_stats(&ps->res_stats[2]); |
560 | 480 | ||
561 | fprintf(stderr, " (scaled from %.2f%%)", | 481 | fprintf(stderr, " (scaled from %.2f%%)", |
562 | 100 * avg_running / avg_enabled); | 482 | 100 * avg_running / avg_enabled); |
@@ -569,19 +489,19 @@ static void print_counter_aggr(int counter) | |||
569 | * Print out the results of a single counter: | 489 | * Print out the results of a single counter: |
570 | * does not use aggregated count in system-wide | 490 | * does not use aggregated count in system-wide |
571 | */ | 491 | */ |
572 | static void print_counter(int counter) | 492 | static void print_counter(struct perf_evsel *counter) |
573 | { | 493 | { |
574 | u64 ena, run, val; | 494 | u64 ena, run, val; |
575 | int cpu; | 495 | int cpu; |
576 | 496 | ||
577 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 497 | for (cpu = 0; cpu < cpus->nr; cpu++) { |
578 | val = cpu_counts[cpu][counter].val; | 498 | val = counter->counts->cpu[cpu].val; |
579 | ena = cpu_counts[cpu][counter].ena; | 499 | ena = counter->counts->cpu[cpu].ena; |
580 | run = cpu_counts[cpu][counter].run; | 500 | run = counter->counts->cpu[cpu].run; |
581 | if (run == 0 || ena == 0) { | 501 | if (run == 0 || ena == 0) { |
582 | fprintf(stderr, "CPU%*d%s%*s%s%-24s", | 502 | fprintf(stderr, "CPU%*d%s%*s%s%-24s", |
583 | csv_output ? 0 : -4, | 503 | csv_output ? 0 : -4, |
584 | cpumap[cpu], csv_sep, | 504 | cpus->map[cpu], csv_sep, |
585 | csv_output ? 0 : 18, | 505 | csv_output ? 0 : 18, |
586 | "<not counted>", csv_sep, | 506 | "<not counted>", csv_sep, |
587 | event_name(counter)); | 507 | event_name(counter)); |
@@ -609,7 +529,8 @@ static void print_counter(int counter) | |||
609 | 529 | ||
610 | static void print_stat(int argc, const char **argv) | 530 | static void print_stat(int argc, const char **argv) |
611 | { | 531 | { |
612 | int i, counter; | 532 | struct perf_evsel *counter; |
533 | int i; | ||
613 | 534 | ||
614 | fflush(stdout); | 535 | fflush(stdout); |
615 | 536 | ||
@@ -632,10 +553,10 @@ static void print_stat(int argc, const char **argv) | |||
632 | } | 553 | } |
633 | 554 | ||
634 | if (no_aggr) { | 555 | if (no_aggr) { |
635 | for (counter = 0; counter < nr_counters; counter++) | 556 | list_for_each_entry(counter, &evsel_list, node) |
636 | print_counter(counter); | 557 | print_counter(counter); |
637 | } else { | 558 | } else { |
638 | for (counter = 0; counter < nr_counters; counter++) | 559 | list_for_each_entry(counter, &evsel_list, node) |
639 | print_counter_aggr(counter); | 560 | print_counter_aggr(counter); |
640 | } | 561 | } |
641 | 562 | ||
@@ -720,8 +641,8 @@ static const struct option options[] = { | |||
720 | 641 | ||
721 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 642 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
722 | { | 643 | { |
723 | int status; | 644 | struct perf_evsel *pos; |
724 | int i,j; | 645 | int status = -ENOMEM; |
725 | 646 | ||
726 | setlocale(LC_ALL, ""); | 647 | setlocale(LC_ALL, ""); |
727 | 648 | ||
@@ -757,41 +678,45 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
757 | 678 | ||
758 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 679 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
759 | if (!null_run && !nr_counters) { | 680 | if (!null_run && !nr_counters) { |
760 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | 681 | size_t c; |
682 | |||
761 | nr_counters = ARRAY_SIZE(default_attrs); | 683 | nr_counters = ARRAY_SIZE(default_attrs); |
684 | |||
685 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { | ||
686 | pos = perf_evsel__new(default_attrs[c].type, | ||
687 | default_attrs[c].config, | ||
688 | nr_counters); | ||
689 | if (pos == NULL) | ||
690 | goto out; | ||
691 | list_add(&pos->node, &evsel_list); | ||
692 | } | ||
762 | } | 693 | } |
763 | 694 | ||
764 | if (system_wide) | 695 | if (target_pid != -1) |
765 | nr_cpus = read_cpu_map(cpu_list); | 696 | target_tid = target_pid; |
766 | else | ||
767 | nr_cpus = 1; | ||
768 | 697 | ||
769 | if (nr_cpus < 1) | 698 | threads = thread_map__new(target_pid, target_tid); |
699 | if (threads == NULL) { | ||
700 | pr_err("Problems finding threads of monitor\n"); | ||
770 | usage_with_options(stat_usage, options); | 701 | usage_with_options(stat_usage, options); |
702 | } | ||
771 | 703 | ||
772 | if (target_pid != -1) { | 704 | if (system_wide) |
773 | target_tid = target_pid; | 705 | cpus = cpu_map__new(cpu_list); |
774 | thread_num = find_all_tid(target_pid, &all_tids); | 706 | else |
775 | if (thread_num <= 0) { | 707 | cpus = cpu_map__dummy_new(); |
776 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
777 | target_pid); | ||
778 | usage_with_options(stat_usage, options); | ||
779 | } | ||
780 | } else { | ||
781 | all_tids=malloc(sizeof(pid_t)); | ||
782 | if (!all_tids) | ||
783 | return -ENOMEM; | ||
784 | 708 | ||
785 | all_tids[0] = target_tid; | 709 | if (cpus == NULL) { |
786 | thread_num = 1; | 710 | perror("failed to parse CPUs map"); |
711 | usage_with_options(stat_usage, options); | ||
712 | return -1; | ||
787 | } | 713 | } |
788 | 714 | ||
789 | for (i = 0; i < MAX_NR_CPUS; i++) { | 715 | list_for_each_entry(pos, &evsel_list, node) { |
790 | for (j = 0; j < MAX_COUNTERS; j++) { | 716 | if (perf_evsel__alloc_stat_priv(pos) < 0 || |
791 | fd[i][j] = malloc(sizeof(int)*thread_num); | 717 | perf_evsel__alloc_counts(pos, cpus->nr) < 0 || |
792 | if (!fd[i][j]) | 718 | perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) |
793 | return -ENOMEM; | 719 | goto out_free_fd; |
794 | } | ||
795 | } | 720 | } |
796 | 721 | ||
797 | /* | 722 | /* |
@@ -814,6 +739,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
814 | 739 | ||
815 | if (status != -1) | 740 | if (status != -1) |
816 | print_stat(argc, argv); | 741 | print_stat(argc, argv); |
817 | 742 | out_free_fd: | |
743 | list_for_each_entry(pos, &evsel_list, node) | ||
744 | perf_evsel__free_stat_priv(pos); | ||
745 | out: | ||
746 | thread_map__delete(threads); | ||
747 | threads = NULL; | ||
818 | return status; | 748 | return status; |
819 | } | 749 | } |