aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2011-01-03 14:45:52 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2011-01-03 21:22:55 -0500
commitc52b12ed2511e6c031a0295fd903ea72b93701fb (patch)
tree770915627e789401b820a104c1ed23a212e7bd50 /tools/perf
parent70d544d0576775a2b3923a7e68cb49b0313d80c9 (diff)
perf evsel: Steal the counter reading routines from stat
Making them hopefully generic enough to be used in 'perf test', well see. Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Tom Zanussi <tzanussi@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/builtin-stat.c121
-rw-r--r--tools/perf/util/evsel.c88
-rw-r--r--tools/perf/util/evsel.h79
3 files changed, 196 insertions, 92 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 589ba3a92423..a8b00b44b3cd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -93,12 +93,6 @@ static const char *cpu_list;
93static const char *csv_sep = NULL; 93static const char *csv_sep = NULL;
94static bool csv_output = false; 94static bool csv_output = false;
95 95
96struct cpu_counts {
97 u64 val;
98 u64 ena;
99 u64 run;
100};
101
102static volatile int done = 0; 96static volatile int done = 0;
103 97
104struct stats 98struct stats
@@ -108,15 +102,11 @@ struct stats
108 102
109struct perf_stat { 103struct perf_stat {
110 struct stats res_stats[3]; 104 struct stats res_stats[3];
111 int scaled;
112 struct cpu_counts cpu_counts[];
113}; 105};
114 106
115static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus) 107static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
116{ 108{
117 size_t priv_size = (sizeof(struct perf_stat) + 109 evsel->priv = zalloc(sizeof(struct perf_stat));
118 (ncpus * sizeof(struct cpu_counts)));
119 evsel->priv = zalloc(priv_size);
120 return evsel->priv == NULL ? -ENOMEM : 0; 110 return evsel->priv == NULL ? -ENOMEM : 0;
121} 111}
122 112
@@ -238,52 +228,14 @@ static inline int nsec_counter(struct perf_evsel *evsel)
238 * Read out the results of a single counter: 228 * Read out the results of a single counter:
239 * aggregate counts across CPUs in system-wide mode 229 * aggregate counts across CPUs in system-wide mode
240 */ 230 */
241static void read_counter_aggr(struct perf_evsel *counter) 231static int read_counter_aggr(struct perf_evsel *counter)
242{ 232{
243 struct perf_stat *ps = counter->priv; 233 struct perf_stat *ps = counter->priv;
244 u64 count[3], single_count[3]; 234 u64 *count = counter->counts->aggr.values;
245 int cpu; 235 int i;
246 size_t res, nv;
247 int scaled;
248 int i, thread;
249
250 count[0] = count[1] = count[2] = 0;
251
252 nv = scale ? 3 : 1;
253 for (cpu = 0; cpu < nr_cpus; cpu++) {
254 for (thread = 0; thread < thread_num; thread++) {
255 if (FD(counter, cpu, thread) < 0)
256 continue;
257
258 res = read(FD(counter, cpu, thread),
259 single_count, nv * sizeof(u64));
260 assert(res == nv * sizeof(u64));
261
262 close(FD(counter, cpu, thread));
263 FD(counter, cpu, thread) = -1;
264
265 count[0] += single_count[0];
266 if (scale) {
267 count[1] += single_count[1];
268 count[2] += single_count[2];
269 }
270 }
271 }
272
273 scaled = 0;
274 if (scale) {
275 if (count[2] == 0) {
276 ps->scaled = -1;
277 count[0] = 0;
278 return;
279 }
280 236
281 if (count[2] < count[1]) { 237 if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0)
282 ps->scaled = 1; 238 return -1;
283 count[0] = (unsigned long long)
284 ((double)count[0] * count[1] / count[2] + 0.5);
285 }
286 }
287 239
288 for (i = 0; i < 3; i++) 240 for (i = 0; i < 3; i++)
289 update_stats(&ps->res_stats[i], count[i]); 241 update_stats(&ps->res_stats[i], count[i]);
@@ -302,46 +254,24 @@ static void read_counter_aggr(struct perf_evsel *counter)
302 update_stats(&runtime_cycles_stats[0], count[0]); 254 update_stats(&runtime_cycles_stats[0], count[0]);
303 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 255 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
304 update_stats(&runtime_branches_stats[0], count[0]); 256 update_stats(&runtime_branches_stats[0], count[0]);
257
258 return 0;
305} 259}
306 260
307/* 261/*
308 * Read out the results of a single counter: 262 * Read out the results of a single counter:
309 * do not aggregate counts across CPUs in system-wide mode 263 * do not aggregate counts across CPUs in system-wide mode
310 */ 264 */
311static void read_counter(struct perf_evsel *counter) 265static int read_counter(struct perf_evsel *counter)
312{ 266{
313 struct cpu_counts *cpu_counts = counter->priv; 267 u64 *count;
314 u64 count[3];
315 int cpu; 268 int cpu;
316 size_t res, nv;
317
318 count[0] = count[1] = count[2] = 0;
319
320 nv = scale ? 3 : 1;
321 269
322 for (cpu = 0; cpu < nr_cpus; cpu++) { 270 for (cpu = 0; cpu < nr_cpus; cpu++) {
271 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
272 return -1;
323 273
324 if (FD(counter, cpu, 0) < 0) 274 count = counter->counts->cpu[cpu].values;
325 continue;
326
327 res = read(FD(counter, cpu, 0), count, nv * sizeof(u64));
328
329 assert(res == nv * sizeof(u64));
330
331 close(FD(counter, cpu, 0));
332 FD(counter, cpu, 0) = -1;
333
334 if (scale) {
335 if (count[2] == 0) {
336 count[0] = 0;
337 } else if (count[2] < count[1]) {
338 count[0] = (unsigned long long)
339 ((double)count[0] * count[1] / count[2] + 0.5);
340 }
341 }
342 cpu_counts[cpu].val = count[0]; /* scaled count */
343 cpu_counts[cpu].ena = count[1];
344 cpu_counts[cpu].run = count[2];
345 275
346 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 276 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
347 update_stats(&runtime_nsecs_stats[cpu], count[0]); 277 update_stats(&runtime_nsecs_stats[cpu], count[0]);
@@ -350,6 +280,8 @@ static void read_counter(struct perf_evsel *counter)
350 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 280 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
351 update_stats(&runtime_branches_stats[cpu], count[0]); 281 update_stats(&runtime_branches_stats[cpu], count[0]);
352 } 282 }
283
284 return 0;
353} 285}
354 286
355static int run_perf_stat(int argc __used, const char **argv) 287static int run_perf_stat(int argc __used, const char **argv)
@@ -449,12 +381,17 @@ static int run_perf_stat(int argc __used, const char **argv)
449 update_stats(&walltime_nsecs_stats, t1 - t0); 381 update_stats(&walltime_nsecs_stats, t1 - t0);
450 382
451 if (no_aggr) { 383 if (no_aggr) {
452 list_for_each_entry(counter, &evsel_list, node) 384 list_for_each_entry(counter, &evsel_list, node) {
453 read_counter(counter); 385 read_counter(counter);
386 perf_evsel__close_fd(counter, nr_cpus, 1);
387 }
454 } else { 388 } else {
455 list_for_each_entry(counter, &evsel_list, node) 389 list_for_each_entry(counter, &evsel_list, node) {
456 read_counter_aggr(counter); 390 read_counter_aggr(counter);
391 perf_evsel__close_fd(counter, nr_cpus, thread_num);
392 }
457 } 393 }
394
458 return WEXITSTATUS(status); 395 return WEXITSTATUS(status);
459} 396}
460 397
@@ -550,7 +487,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
550{ 487{
551 struct perf_stat *ps = counter->priv; 488 struct perf_stat *ps = counter->priv;
552 double avg = avg_stats(&ps->res_stats[0]); 489 double avg = avg_stats(&ps->res_stats[0]);
553 int scaled = ps->scaled; 490 int scaled = counter->counts->scaled;
554 491
555 if (scaled == -1) { 492 if (scaled == -1) {
556 fprintf(stderr, "%*s%s%-24s\n", 493 fprintf(stderr, "%*s%s%-24s\n",
@@ -590,14 +527,13 @@ static void print_counter_aggr(struct perf_evsel *counter)
590 */ 527 */
591static void print_counter(struct perf_evsel *counter) 528static void print_counter(struct perf_evsel *counter)
592{ 529{
593 struct perf_stat *ps = counter->priv;
594 u64 ena, run, val; 530 u64 ena, run, val;
595 int cpu; 531 int cpu;
596 532
597 for (cpu = 0; cpu < nr_cpus; cpu++) { 533 for (cpu = 0; cpu < nr_cpus; cpu++) {
598 val = ps->cpu_counts[cpu].val; 534 val = counter->counts->cpu[cpu].val;
599 ena = ps->cpu_counts[cpu].ena; 535 ena = counter->counts->cpu[cpu].ena;
600 run = ps->cpu_counts[cpu].run; 536 run = counter->counts->cpu[cpu].run;
601 if (run == 0 || ena == 0) { 537 if (run == 0 || ena == 0) {
602 fprintf(stderr, "CPU%*d%s%*s%s%-24s", 538 fprintf(stderr, "CPU%*d%s%*s%s%-24s",
603 csv_output ? 0 : -4, 539 csv_output ? 0 : -4,
@@ -818,7 +754,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
818 } 754 }
819 755
820 list_for_each_entry(pos, &evsel_list, node) { 756 list_for_each_entry(pos, &evsel_list, node) {
821 if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 || 757 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
758 perf_evsel__alloc_counts(pos, nr_cpus) < 0 ||
822 perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0) 759 perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0)
823 goto out_free_fd; 760 goto out_free_fd;
824 } 761 }
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 6539ec912c70..3f5de5196231 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,6 +1,8 @@
1#include "evsel.h" 1#include "evsel.h"
2#include "util.h" 2#include "util.h"
3 3
4#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
5
4struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) 6struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
5{ 7{
6 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 8 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
@@ -21,15 +23,101 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
21 return evsel->fd != NULL ? 0 : -ENOMEM; 23 return evsel->fd != NULL ? 0 : -ENOMEM;
22} 24}
23 25
26int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
27{
28 evsel->counts = zalloc((sizeof(*evsel->counts) +
29 (ncpus * sizeof(struct perf_counts_values))));
30 return evsel->counts != NULL ? 0 : -ENOMEM;
31}
32
24void perf_evsel__free_fd(struct perf_evsel *evsel) 33void perf_evsel__free_fd(struct perf_evsel *evsel)
25{ 34{
26 xyarray__delete(evsel->fd); 35 xyarray__delete(evsel->fd);
27 evsel->fd = NULL; 36 evsel->fd = NULL;
28} 37}
29 38
39void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
40{
41 int cpu, thread;
42
43 for (cpu = 0; cpu < ncpus; cpu++)
44 for (thread = 0; thread < nthreads; ++thread) {
45 close(FD(evsel, cpu, thread));
46 FD(evsel, cpu, thread) = -1;
47 }
48}
49
30void perf_evsel__delete(struct perf_evsel *evsel) 50void perf_evsel__delete(struct perf_evsel *evsel)
31{ 51{
32 assert(list_empty(&evsel->node)); 52 assert(list_empty(&evsel->node));
33 xyarray__delete(evsel->fd); 53 xyarray__delete(evsel->fd);
34 free(evsel); 54 free(evsel);
35} 55}
56
57int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
58 int cpu, int thread, bool scale)
59{
60 struct perf_counts_values count;
61 size_t nv = scale ? 3 : 1;
62
63 if (FD(evsel, cpu, thread) < 0)
64 return -EINVAL;
65
66 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
67 return -errno;
68
69 if (scale) {
70 if (count.run == 0)
71 count.val = 0;
72 else if (count.run < count.ena)
73 count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
74 } else
75 count.ena = count.run = 0;
76
77 evsel->counts->cpu[cpu] = count;
78 return 0;
79}
80
81int __perf_evsel__read(struct perf_evsel *evsel,
82 int ncpus, int nthreads, bool scale)
83{
84 size_t nv = scale ? 3 : 1;
85 int cpu, thread;
86 struct perf_counts_values *aggr = &evsel->counts->aggr, count;
87
88 aggr->val = 0;
89
90 for (cpu = 0; cpu < ncpus; cpu++) {
91 for (thread = 0; thread < nthreads; thread++) {
92 if (FD(evsel, cpu, thread) < 0)
93 continue;
94
95 if (readn(FD(evsel, cpu, thread),
96 &count, nv * sizeof(u64)) < 0)
97 return -errno;
98
99 aggr->val += count.val;
100 if (scale) {
101 aggr->ena += count.ena;
102 aggr->run += count.run;
103 }
104 }
105 }
106
107 evsel->counts->scaled = 0;
108 if (scale) {
109 if (aggr->run == 0) {
110 evsel->counts->scaled = -1;
111 aggr->val = 0;
112 return 0;
113 }
114
115 if (aggr->run < aggr->ena) {
116 evsel->counts->scaled = 1;
117 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
118 }
119 } else
120 aggr->ena = aggr->run = 0;
121
122 return 0;
123}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 8a5cfb656674..8b48ef1e672c 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -2,15 +2,34 @@
2#define __PERF_EVSEL_H 1 2#define __PERF_EVSEL_H 1
3 3
4#include <linux/list.h> 4#include <linux/list.h>
5#include <stdbool.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
6#include "types.h" 7#include "types.h"
7#include "xyarray.h" 8#include "xyarray.h"
9
10struct perf_counts_values {
11 union {
12 struct {
13 u64 val;
14 u64 ena;
15 u64 run;
16 };
17 u64 values[3];
18 };
19};
20
21struct perf_counts {
22 s8 scaled;
23 struct perf_counts_values aggr;
24 struct perf_counts_values cpu[];
25};
8 26
9struct perf_evsel { 27struct perf_evsel {
10 struct list_head node; 28 struct list_head node;
11 struct perf_event_attr attr; 29 struct perf_event_attr attr;
12 char *filter; 30 char *filter;
13 struct xyarray *fd; 31 struct xyarray *fd;
32 struct perf_counts *counts;
14 int idx; 33 int idx;
15 void *priv; 34 void *priv;
16}; 35};
@@ -19,10 +38,70 @@ struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
19void perf_evsel__delete(struct perf_evsel *evsel); 38void perf_evsel__delete(struct perf_evsel *evsel);
20 39
21int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 40int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
41int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
22void perf_evsel__free_fd(struct perf_evsel *evsel); 42void perf_evsel__free_fd(struct perf_evsel *evsel);
43void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
23 44
24#define perf_evsel__match(evsel, t, c) \ 45#define perf_evsel__match(evsel, t, c) \
25 (evsel->attr.type == PERF_TYPE_##t && \ 46 (evsel->attr.type == PERF_TYPE_##t && \
26 evsel->attr.config == PERF_COUNT_##c) 47 evsel->attr.config == PERF_COUNT_##c)
27 48
49int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
50 int cpu, int thread, bool scale);
51
52/**
53 * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
54 *
55 * @evsel - event selector to read value
56 * @cpu - CPU of interest
57 * @thread - thread of interest
58 */
59static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
60 int cpu, int thread)
61{
62 return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
63}
64
65/**
66 * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
67 *
68 * @evsel - event selector to read value
69 * @cpu - CPU of interest
70 * @thread - thread of interest
71 */
72static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
73 int cpu, int thread)
74{
75 return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
76}
77
78int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
79 bool scale);
80
81/**
82 * perf_evsel__read - Read the aggregate results on all CPUs
83 *
84 * @evsel - event selector to read value
85 * @ncpus - Number of cpus affected, from zero
86 * @nthreads - Number of threads affected, from zero
87 */
88static inline int perf_evsel__read(struct perf_evsel *evsel,
89 int ncpus, int nthreads)
90{
91 return __perf_evsel__read(evsel, ncpus, nthreads, false);
92}
93
94/**
95 * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
96 *
97 * @evsel - event selector to read value
98 * @ncpus - Number of cpus affected, from zero
99 * @nthreads - Number of threads affected, from zero
100 */
101static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
102 int ncpus, int nthreads)
103{
104 return __perf_evsel__read(evsel, ncpus, nthreads, true);
105}
106
28#endif /* __PERF_EVSEL_H */ 107#endif /* __PERF_EVSEL_H */