diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-01-07 08:14:15 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-07 08:14:15 -0500 |
commit | 1c2a48cf65580a276552151eb8f78d78c55b828e (patch) | |
tree | 68ed0628a276b33cb5aa0ad4899c1afe0a33a69d /tools/perf/builtin-stat.c | |
parent | 0aa002fe602939370e9476e5ec32b562000a0425 (diff) | |
parent | cb600d2f83c854ec3d6660063e4466431999489b (diff) |
Merge branch 'linus' into x86/apic-cleanups
Conflicts:
arch/x86/include/asm/io_apic.h
Merge reason: Resolve the conflict, update to a more recent -rc base
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 531 |
1 files changed, 327 insertions, 204 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a6b4d44f9502..02b2d8013a61 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "util/parse-options.h" | 43 | #include "util/parse-options.h" |
44 | #include "util/parse-events.h" | 44 | #include "util/parse-events.h" |
45 | #include "util/event.h" | 45 | #include "util/event.h" |
46 | #include "util/evsel.h" | ||
46 | #include "util/debug.h" | 47 | #include "util/debug.h" |
47 | #include "util/header.h" | 48 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 49 | #include "util/cpumap.h" |
@@ -52,6 +53,8 @@ | |||
52 | #include <math.h> | 53 | #include <math.h> |
53 | #include <locale.h> | 54 | #include <locale.h> |
54 | 55 | ||
56 | #define DEFAULT_SEPARATOR " " | ||
57 | |||
55 | static struct perf_event_attr default_attrs[] = { | 58 | static struct perf_event_attr default_attrs[] = { |
56 | 59 | ||
57 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 60 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
@@ -69,25 +72,23 @@ static struct perf_event_attr default_attrs[] = { | |||
69 | }; | 72 | }; |
70 | 73 | ||
71 | static bool system_wide = false; | 74 | static bool system_wide = false; |
72 | static int nr_cpus = 0; | 75 | static struct cpu_map *cpus; |
73 | static int run_idx = 0; | 76 | static int run_idx = 0; |
74 | 77 | ||
75 | static int run_count = 1; | 78 | static int run_count = 1; |
76 | static bool no_inherit = false; | 79 | static bool no_inherit = false; |
77 | static bool scale = true; | 80 | static bool scale = true; |
81 | static bool no_aggr = false; | ||
78 | static pid_t target_pid = -1; | 82 | static pid_t target_pid = -1; |
79 | static pid_t target_tid = -1; | 83 | static pid_t target_tid = -1; |
80 | static pid_t *all_tids = NULL; | 84 | static struct thread_map *threads; |
81 | static int thread_num = 0; | ||
82 | static pid_t child_pid = -1; | 85 | static pid_t child_pid = -1; |
83 | static bool null_run = false; | 86 | static bool null_run = false; |
84 | static bool big_num = false; | 87 | static bool big_num = true; |
88 | static int big_num_opt = -1; | ||
85 | static const char *cpu_list; | 89 | static const char *cpu_list; |
86 | 90 | static const char *csv_sep = NULL; | |
87 | 91 | static bool csv_output = false; | |
88 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
89 | |||
90 | static int event_scaled[MAX_COUNTERS]; | ||
91 | 92 | ||
92 | static volatile int done = 0; | 93 | static volatile int done = 0; |
93 | 94 | ||
@@ -96,6 +97,22 @@ struct stats | |||
96 | double n, mean, M2; | 97 | double n, mean, M2; |
97 | }; | 98 | }; |
98 | 99 | ||
100 | struct perf_stat { | ||
101 | struct stats res_stats[3]; | ||
102 | }; | ||
103 | |||
104 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | ||
105 | { | ||
106 | evsel->priv = zalloc(sizeof(struct perf_stat)); | ||
107 | return evsel->priv == NULL ? -ENOMEM : 0; | ||
108 | } | ||
109 | |||
110 | static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) | ||
111 | { | ||
112 | free(evsel->priv); | ||
113 | evsel->priv = NULL; | ||
114 | } | ||
115 | |||
99 | static void update_stats(struct stats *stats, u64 val) | 116 | static void update_stats(struct stats *stats, u64 val) |
100 | { | 117 | { |
101 | double delta; | 118 | double delta; |
@@ -135,69 +152,38 @@ static double stddev_stats(struct stats *stats) | |||
135 | return sqrt(variance_mean); | 152 | return sqrt(variance_mean); |
136 | } | 153 | } |
137 | 154 | ||
138 | struct stats event_res_stats[MAX_COUNTERS][3]; | 155 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
139 | struct stats runtime_nsecs_stats; | 156 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
157 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | ||
140 | struct stats walltime_nsecs_stats; | 158 | struct stats walltime_nsecs_stats; |
141 | struct stats runtime_cycles_stats; | ||
142 | struct stats runtime_branches_stats; | ||
143 | 159 | ||
144 | #define MATCH_EVENT(t, c, counter) \ | 160 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
145 | (attrs[counter].type == PERF_TYPE_##t && \ | ||
146 | attrs[counter].config == PERF_COUNT_##c) | ||
147 | |||
148 | #define ERR_PERF_OPEN \ | ||
149 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | ||
150 | |||
151 | static int create_perf_stat_counter(int counter) | ||
152 | { | 161 | { |
153 | struct perf_event_attr *attr = attrs + counter; | 162 | struct perf_event_attr *attr = &evsel->attr; |
154 | int thread; | ||
155 | int ncreated = 0; | ||
156 | 163 | ||
157 | if (scale) | 164 | if (scale) |
158 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 165 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
159 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 166 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
160 | 167 | ||
161 | if (system_wide) { | 168 | if (system_wide) |
162 | int cpu; | 169 | return perf_evsel__open_per_cpu(evsel, cpus); |
163 | 170 | ||
164 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 171 | attr->inherit = !no_inherit; |
165 | fd[cpu][counter][0] = sys_perf_event_open(attr, | 172 | if (target_pid == -1 && target_tid == -1) { |
166 | -1, cpumap[cpu], -1, 0); | 173 | attr->disabled = 1; |
167 | if (fd[cpu][counter][0] < 0) | 174 | attr->enable_on_exec = 1; |
168 | pr_debug(ERR_PERF_OPEN, counter, | ||
169 | fd[cpu][counter][0], strerror(errno)); | ||
170 | else | ||
171 | ++ncreated; | ||
172 | } | ||
173 | } else { | ||
174 | attr->inherit = !no_inherit; | ||
175 | if (target_pid == -1 && target_tid == -1) { | ||
176 | attr->disabled = 1; | ||
177 | attr->enable_on_exec = 1; | ||
178 | } | ||
179 | for (thread = 0; thread < thread_num; thread++) { | ||
180 | fd[0][counter][thread] = sys_perf_event_open(attr, | ||
181 | all_tids[thread], -1, -1, 0); | ||
182 | if (fd[0][counter][thread] < 0) | ||
183 | pr_debug(ERR_PERF_OPEN, counter, | ||
184 | fd[0][counter][thread], | ||
185 | strerror(errno)); | ||
186 | else | ||
187 | ++ncreated; | ||
188 | } | ||
189 | } | 175 | } |
190 | 176 | ||
191 | return ncreated; | 177 | return perf_evsel__open_per_thread(evsel, threads); |
192 | } | 178 | } |
193 | 179 | ||
194 | /* | 180 | /* |
195 | * Does the counter have nsecs as a unit? | 181 | * Does the counter have nsecs as a unit? |
196 | */ | 182 | */ |
197 | static inline int nsec_counter(int counter) | 183 | static inline int nsec_counter(struct perf_evsel *evsel) |
198 | { | 184 | { |
199 | if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || | 185 | if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || |
200 | MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 186 | perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
201 | return 1; | 187 | return 1; |
202 | 188 | ||
203 | return 0; | 189 | return 0; |
@@ -205,55 +191,19 @@ static inline int nsec_counter(int counter) | |||
205 | 191 | ||
206 | /* | 192 | /* |
207 | * Read out the results of a single counter: | 193 | * Read out the results of a single counter: |
194 | * aggregate counts across CPUs in system-wide mode | ||
208 | */ | 195 | */ |
209 | static void read_counter(int counter) | 196 | static int read_counter_aggr(struct perf_evsel *counter) |
210 | { | 197 | { |
211 | u64 count[3], single_count[3]; | 198 | struct perf_stat *ps = counter->priv; |
212 | int cpu; | 199 | u64 *count = counter->counts->aggr.values; |
213 | size_t res, nv; | 200 | int i; |
214 | int scaled; | ||
215 | int i, thread; | ||
216 | |||
217 | count[0] = count[1] = count[2] = 0; | ||
218 | |||
219 | nv = scale ? 3 : 1; | ||
220 | for (cpu = 0; cpu < nr_cpus; cpu++) { | ||
221 | for (thread = 0; thread < thread_num; thread++) { | ||
222 | if (fd[cpu][counter][thread] < 0) | ||
223 | continue; | ||
224 | |||
225 | res = read(fd[cpu][counter][thread], | ||
226 | single_count, nv * sizeof(u64)); | ||
227 | assert(res == nv * sizeof(u64)); | ||
228 | |||
229 | close(fd[cpu][counter][thread]); | ||
230 | fd[cpu][counter][thread] = -1; | ||
231 | |||
232 | count[0] += single_count[0]; | ||
233 | if (scale) { | ||
234 | count[1] += single_count[1]; | ||
235 | count[2] += single_count[2]; | ||
236 | } | ||
237 | } | ||
238 | } | ||
239 | |||
240 | scaled = 0; | ||
241 | if (scale) { | ||
242 | if (count[2] == 0) { | ||
243 | event_scaled[counter] = -1; | ||
244 | count[0] = 0; | ||
245 | return; | ||
246 | } | ||
247 | 201 | ||
248 | if (count[2] < count[1]) { | 202 | if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0) |
249 | event_scaled[counter] = 1; | 203 | return -1; |
250 | count[0] = (unsigned long long) | ||
251 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
252 | } | ||
253 | } | ||
254 | 204 | ||
255 | for (i = 0; i < 3; i++) | 205 | for (i = 0; i < 3; i++) |
256 | update_stats(&event_res_stats[counter][i], count[i]); | 206 | update_stats(&ps->res_stats[i], count[i]); |
257 | 207 | ||
258 | if (verbose) { | 208 | if (verbose) { |
259 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), | 209 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), |
@@ -263,26 +213,51 @@ static void read_counter(int counter) | |||
263 | /* | 213 | /* |
264 | * Save the full runtime - to allow normalization during printout: | 214 | * Save the full runtime - to allow normalization during printout: |
265 | */ | 215 | */ |
266 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 216 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) |
267 | update_stats(&runtime_nsecs_stats, count[0]); | 217 | update_stats(&runtime_nsecs_stats[0], count[0]); |
268 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 218 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
269 | update_stats(&runtime_cycles_stats, count[0]); | 219 | update_stats(&runtime_cycles_stats[0], count[0]); |
270 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | 220 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) |
271 | update_stats(&runtime_branches_stats, count[0]); | 221 | update_stats(&runtime_branches_stats[0], count[0]); |
222 | |||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Read out the results of a single counter: | ||
228 | * do not aggregate counts across CPUs in system-wide mode | ||
229 | */ | ||
230 | static int read_counter(struct perf_evsel *counter) | ||
231 | { | ||
232 | u64 *count; | ||
233 | int cpu; | ||
234 | |||
235 | for (cpu = 0; cpu < cpus->nr; cpu++) { | ||
236 | if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) | ||
237 | return -1; | ||
238 | |||
239 | count = counter->counts->cpu[cpu].values; | ||
240 | |||
241 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
242 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
243 | if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
244 | update_stats(&runtime_cycles_stats[cpu], count[0]); | ||
245 | if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
246 | update_stats(&runtime_branches_stats[cpu], count[0]); | ||
247 | } | ||
248 | |||
249 | return 0; | ||
272 | } | 250 | } |
273 | 251 | ||
274 | static int run_perf_stat(int argc __used, const char **argv) | 252 | static int run_perf_stat(int argc __used, const char **argv) |
275 | { | 253 | { |
276 | unsigned long long t0, t1; | 254 | unsigned long long t0, t1; |
255 | struct perf_evsel *counter; | ||
277 | int status = 0; | 256 | int status = 0; |
278 | int counter, ncreated = 0; | ||
279 | int child_ready_pipe[2], go_pipe[2]; | 257 | int child_ready_pipe[2], go_pipe[2]; |
280 | const bool forks = (argc > 0); | 258 | const bool forks = (argc > 0); |
281 | char buf; | 259 | char buf; |
282 | 260 | ||
283 | if (!system_wide) | ||
284 | nr_cpus = 1; | ||
285 | |||
286 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | 261 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
287 | perror("failed to create pipes"); | 262 | perror("failed to create pipes"); |
288 | exit(1); | 263 | exit(1); |
@@ -322,7 +297,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
322 | } | 297 | } |
323 | 298 | ||
324 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 299 | if (target_tid == -1 && target_pid == -1 && !system_wide) |
325 | all_tids[0] = child_pid; | 300 | threads->map[0] = child_pid; |
326 | 301 | ||
327 | /* | 302 | /* |
328 | * Wait for the child to be ready to exec. | 303 | * Wait for the child to be ready to exec. |
@@ -334,16 +309,23 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
334 | close(child_ready_pipe[0]); | 309 | close(child_ready_pipe[0]); |
335 | } | 310 | } |
336 | 311 | ||
337 | for (counter = 0; counter < nr_counters; counter++) | 312 | list_for_each_entry(counter, &evsel_list, node) { |
338 | ncreated += create_perf_stat_counter(counter); | 313 | if (create_perf_stat_counter(counter) < 0) { |
339 | 314 | if (errno == -EPERM || errno == -EACCES) { | |
340 | if (ncreated == 0) { | 315 | error("You may not have permission to collect %sstats.\n" |
341 | pr_err("No permission to collect %sstats.\n" | 316 | "\t Consider tweaking" |
342 | "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", | 317 | " /proc/sys/kernel/perf_event_paranoid or running as root.", |
343 | system_wide ? "system-wide " : ""); | 318 | system_wide ? "system-wide " : ""); |
344 | if (child_pid != -1) | 319 | } else { |
345 | kill(child_pid, SIGTERM); | 320 | error("open_counter returned with %d (%s). " |
346 | return -1; | 321 | "/bin/dmesg may provide additional information.\n", |
322 | errno, strerror(errno)); | ||
323 | } | ||
324 | if (child_pid != -1) | ||
325 | kill(child_pid, SIGTERM); | ||
326 | die("Not all events could be opened.\n"); | ||
327 | return -1; | ||
328 | } | ||
347 | } | 329 | } |
348 | 330 | ||
349 | /* | 331 | /* |
@@ -362,60 +344,97 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
362 | 344 | ||
363 | update_stats(&walltime_nsecs_stats, t1 - t0); | 345 | update_stats(&walltime_nsecs_stats, t1 - t0); |
364 | 346 | ||
365 | for (counter = 0; counter < nr_counters; counter++) | 347 | if (no_aggr) { |
366 | read_counter(counter); | 348 | list_for_each_entry(counter, &evsel_list, node) { |
349 | read_counter(counter); | ||
350 | perf_evsel__close_fd(counter, cpus->nr, 1); | ||
351 | } | ||
352 | } else { | ||
353 | list_for_each_entry(counter, &evsel_list, node) { | ||
354 | read_counter_aggr(counter); | ||
355 | perf_evsel__close_fd(counter, cpus->nr, threads->nr); | ||
356 | } | ||
357 | } | ||
367 | 358 | ||
368 | return WEXITSTATUS(status); | 359 | return WEXITSTATUS(status); |
369 | } | 360 | } |
370 | 361 | ||
371 | static void print_noise(int counter, double avg) | 362 | static void print_noise(struct perf_evsel *evsel, double avg) |
372 | { | 363 | { |
364 | struct perf_stat *ps; | ||
365 | |||
373 | if (run_count == 1) | 366 | if (run_count == 1) |
374 | return; | 367 | return; |
375 | 368 | ||
369 | ps = evsel->priv; | ||
376 | fprintf(stderr, " ( +- %7.3f%% )", | 370 | fprintf(stderr, " ( +- %7.3f%% )", |
377 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); | 371 | 100 * stddev_stats(&ps->res_stats[0]) / avg); |
378 | } | 372 | } |
379 | 373 | ||
380 | static void nsec_printout(int counter, double avg) | 374 | static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) |
381 | { | 375 | { |
382 | double msecs = avg / 1e6; | 376 | double msecs = avg / 1e6; |
377 | char cpustr[16] = { '\0', }; | ||
378 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; | ||
383 | 379 | ||
384 | fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); | 380 | if (no_aggr) |
381 | sprintf(cpustr, "CPU%*d%s", | ||
382 | csv_output ? 0 : -4, | ||
383 | cpus->map[cpu], csv_sep); | ||
384 | |||
385 | fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); | ||
386 | |||
387 | if (csv_output) | ||
388 | return; | ||
385 | 389 | ||
386 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | 390 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
387 | fprintf(stderr, " # %10.3f CPUs ", | 391 | fprintf(stderr, " # %10.3f CPUs ", |
388 | avg / avg_stats(&walltime_nsecs_stats)); | 392 | avg / avg_stats(&walltime_nsecs_stats)); |
389 | } | ||
390 | } | 393 | } |
391 | 394 | ||
392 | static void abs_printout(int counter, double avg) | 395 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
393 | { | 396 | { |
394 | double total, ratio = 0.0; | 397 | double total, ratio = 0.0; |
398 | char cpustr[16] = { '\0', }; | ||
399 | const char *fmt; | ||
400 | |||
401 | if (csv_output) | ||
402 | fmt = "%s%.0f%s%s"; | ||
403 | else if (big_num) | ||
404 | fmt = "%s%'18.0f%s%-24s"; | ||
405 | else | ||
406 | fmt = "%s%18.0f%s%-24s"; | ||
395 | 407 | ||
396 | if (big_num) | 408 | if (no_aggr) |
397 | fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); | 409 | sprintf(cpustr, "CPU%*d%s", |
410 | csv_output ? 0 : -4, | ||
411 | cpus->map[cpu], csv_sep); | ||
398 | else | 412 | else |
399 | fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); | 413 | cpu = 0; |
414 | |||
415 | fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); | ||
400 | 416 | ||
401 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { | 417 | if (csv_output) |
402 | total = avg_stats(&runtime_cycles_stats); | 418 | return; |
419 | |||
420 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | ||
421 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
403 | 422 | ||
404 | if (total) | 423 | if (total) |
405 | ratio = avg / total; | 424 | ratio = avg / total; |
406 | 425 | ||
407 | fprintf(stderr, " # %10.3f IPC ", ratio); | 426 | fprintf(stderr, " # %10.3f IPC ", ratio); |
408 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && | 427 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && |
409 | runtime_branches_stats.n != 0) { | 428 | runtime_branches_stats[cpu].n != 0) { |
410 | total = avg_stats(&runtime_branches_stats); | 429 | total = avg_stats(&runtime_branches_stats[cpu]); |
411 | 430 | ||
412 | if (total) | 431 | if (total) |
413 | ratio = avg * 100 / total; | 432 | ratio = avg * 100 / total; |
414 | 433 | ||
415 | fprintf(stderr, " # %10.3f %% ", ratio); | 434 | fprintf(stderr, " # %10.3f %% ", ratio); |
416 | 435 | ||
417 | } else if (runtime_nsecs_stats.n != 0) { | 436 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
418 | total = avg_stats(&runtime_nsecs_stats); | 437 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
419 | 438 | ||
420 | if (total) | 439 | if (total) |
421 | ratio = 1000.0 * avg / total; | 440 | ratio = 1000.0 * avg / total; |
@@ -426,30 +445,38 @@ static void abs_printout(int counter, double avg) | |||
426 | 445 | ||
427 | /* | 446 | /* |
428 | * Print out the results of a single counter: | 447 | * Print out the results of a single counter: |
448 | * aggregated counts in system-wide mode | ||
429 | */ | 449 | */ |
430 | static void print_counter(int counter) | 450 | static void print_counter_aggr(struct perf_evsel *counter) |
431 | { | 451 | { |
432 | double avg = avg_stats(&event_res_stats[counter][0]); | 452 | struct perf_stat *ps = counter->priv; |
433 | int scaled = event_scaled[counter]; | 453 | double avg = avg_stats(&ps->res_stats[0]); |
454 | int scaled = counter->counts->scaled; | ||
434 | 455 | ||
435 | if (scaled == -1) { | 456 | if (scaled == -1) { |
436 | fprintf(stderr, " %18s %-24s\n", | 457 | fprintf(stderr, "%*s%s%-24s\n", |
437 | "<not counted>", event_name(counter)); | 458 | csv_output ? 0 : 18, |
459 | "<not counted>", csv_sep, event_name(counter)); | ||
438 | return; | 460 | return; |
439 | } | 461 | } |
440 | 462 | ||
441 | if (nsec_counter(counter)) | 463 | if (nsec_counter(counter)) |
442 | nsec_printout(counter, avg); | 464 | nsec_printout(-1, counter, avg); |
443 | else | 465 | else |
444 | abs_printout(counter, avg); | 466 | abs_printout(-1, counter, avg); |
467 | |||
468 | if (csv_output) { | ||
469 | fputc('\n', stderr); | ||
470 | return; | ||
471 | } | ||
445 | 472 | ||
446 | print_noise(counter, avg); | 473 | print_noise(counter, avg); |
447 | 474 | ||
448 | if (scaled) { | 475 | if (scaled) { |
449 | double avg_enabled, avg_running; | 476 | double avg_enabled, avg_running; |
450 | 477 | ||
451 | avg_enabled = avg_stats(&event_res_stats[counter][1]); | 478 | avg_enabled = avg_stats(&ps->res_stats[1]); |
452 | avg_running = avg_stats(&event_res_stats[counter][2]); | 479 | avg_running = avg_stats(&ps->res_stats[2]); |
453 | 480 | ||
454 | fprintf(stderr, " (scaled from %.2f%%)", | 481 | fprintf(stderr, " (scaled from %.2f%%)", |
455 | 100 * avg_running / avg_enabled); | 482 | 100 * avg_running / avg_enabled); |
@@ -458,40 +485,92 @@ static void print_counter(int counter) | |||
458 | fprintf(stderr, "\n"); | 485 | fprintf(stderr, "\n"); |
459 | } | 486 | } |
460 | 487 | ||
488 | /* | ||
489 | * Print out the results of a single counter: | ||
490 | * does not use aggregated count in system-wide | ||
491 | */ | ||
492 | static void print_counter(struct perf_evsel *counter) | ||
493 | { | ||
494 | u64 ena, run, val; | ||
495 | int cpu; | ||
496 | |||
497 | for (cpu = 0; cpu < cpus->nr; cpu++) { | ||
498 | val = counter->counts->cpu[cpu].val; | ||
499 | ena = counter->counts->cpu[cpu].ena; | ||
500 | run = counter->counts->cpu[cpu].run; | ||
501 | if (run == 0 || ena == 0) { | ||
502 | fprintf(stderr, "CPU%*d%s%*s%s%-24s", | ||
503 | csv_output ? 0 : -4, | ||
504 | cpus->map[cpu], csv_sep, | ||
505 | csv_output ? 0 : 18, | ||
506 | "<not counted>", csv_sep, | ||
507 | event_name(counter)); | ||
508 | |||
509 | fprintf(stderr, "\n"); | ||
510 | continue; | ||
511 | } | ||
512 | |||
513 | if (nsec_counter(counter)) | ||
514 | nsec_printout(cpu, counter, val); | ||
515 | else | ||
516 | abs_printout(cpu, counter, val); | ||
517 | |||
518 | if (!csv_output) { | ||
519 | print_noise(counter, 1.0); | ||
520 | |||
521 | if (run != ena) { | ||
522 | fprintf(stderr, " (scaled from %.2f%%)", | ||
523 | 100.0 * run / ena); | ||
524 | } | ||
525 | } | ||
526 | fprintf(stderr, "\n"); | ||
527 | } | ||
528 | } | ||
529 | |||
461 | static void print_stat(int argc, const char **argv) | 530 | static void print_stat(int argc, const char **argv) |
462 | { | 531 | { |
463 | int i, counter; | 532 | struct perf_evsel *counter; |
533 | int i; | ||
464 | 534 | ||
465 | fflush(stdout); | 535 | fflush(stdout); |
466 | 536 | ||
467 | fprintf(stderr, "\n"); | 537 | if (!csv_output) { |
468 | fprintf(stderr, " Performance counter stats for "); | 538 | fprintf(stderr, "\n"); |
469 | if(target_pid == -1 && target_tid == -1) { | 539 | fprintf(stderr, " Performance counter stats for "); |
470 | fprintf(stderr, "\'%s", argv[0]); | 540 | if(target_pid == -1 && target_tid == -1) { |
471 | for (i = 1; i < argc; i++) | 541 | fprintf(stderr, "\'%s", argv[0]); |
472 | fprintf(stderr, " %s", argv[i]); | 542 | for (i = 1; i < argc; i++) |
473 | } else if (target_pid != -1) | 543 | fprintf(stderr, " %s", argv[i]); |
474 | fprintf(stderr, "process id \'%d", target_pid); | 544 | } else if (target_pid != -1) |
475 | else | 545 | fprintf(stderr, "process id \'%d", target_pid); |
476 | fprintf(stderr, "thread id \'%d", target_tid); | 546 | else |
477 | 547 | fprintf(stderr, "thread id \'%d", target_tid); | |
478 | fprintf(stderr, "\'"); | 548 | |
479 | if (run_count > 1) | 549 | fprintf(stderr, "\'"); |
480 | fprintf(stderr, " (%d runs)", run_count); | 550 | if (run_count > 1) |
481 | fprintf(stderr, ":\n\n"); | 551 | fprintf(stderr, " (%d runs)", run_count); |
552 | fprintf(stderr, ":\n\n"); | ||
553 | } | ||
482 | 554 | ||
483 | for (counter = 0; counter < nr_counters; counter++) | 555 | if (no_aggr) { |
484 | print_counter(counter); | 556 | list_for_each_entry(counter, &evsel_list, node) |
557 | print_counter(counter); | ||
558 | } else { | ||
559 | list_for_each_entry(counter, &evsel_list, node) | ||
560 | print_counter_aggr(counter); | ||
561 | } | ||
485 | 562 | ||
486 | fprintf(stderr, "\n"); | 563 | if (!csv_output) { |
487 | fprintf(stderr, " %18.9f seconds time elapsed", | 564 | fprintf(stderr, "\n"); |
488 | avg_stats(&walltime_nsecs_stats)/1e9); | 565 | fprintf(stderr, " %18.9f seconds time elapsed", |
489 | if (run_count > 1) { | 566 | avg_stats(&walltime_nsecs_stats)/1e9); |
490 | fprintf(stderr, " ( +- %7.3f%% )", | 567 | if (run_count > 1) { |
568 | fprintf(stderr, " ( +- %7.3f%% )", | ||
491 | 100*stddev_stats(&walltime_nsecs_stats) / | 569 | 100*stddev_stats(&walltime_nsecs_stats) / |
492 | avg_stats(&walltime_nsecs_stats)); | 570 | avg_stats(&walltime_nsecs_stats)); |
571 | } | ||
572 | fprintf(stderr, "\n\n"); | ||
493 | } | 573 | } |
494 | fprintf(stderr, "\n\n"); | ||
495 | } | 574 | } |
496 | 575 | ||
497 | static volatile int signr = -1; | 576 | static volatile int signr = -1; |
@@ -521,6 +600,13 @@ static const char * const stat_usage[] = { | |||
521 | NULL | 600 | NULL |
522 | }; | 601 | }; |
523 | 602 | ||
603 | static int stat__set_big_num(const struct option *opt __used, | ||
604 | const char *s __used, int unset) | ||
605 | { | ||
606 | big_num_opt = unset ? 0 : 1; | ||
607 | return 0; | ||
608 | } | ||
609 | |||
524 | static const struct option options[] = { | 610 | static const struct option options[] = { |
525 | OPT_CALLBACK('e', "event", NULL, "event", | 611 | OPT_CALLBACK('e', "event", NULL, "event", |
526 | "event selector. use 'perf list' to list available events", | 612 | "event selector. use 'perf list' to list available events", |
@@ -541,64 +627,96 @@ static const struct option options[] = { | |||
541 | "repeat command and print average + stddev (max: 100)"), | 627 | "repeat command and print average + stddev (max: 100)"), |
542 | OPT_BOOLEAN('n', "null", &null_run, | 628 | OPT_BOOLEAN('n', "null", &null_run, |
543 | "null run - dont start any counters"), | 629 | "null run - dont start any counters"), |
544 | OPT_BOOLEAN('B', "big-num", &big_num, | 630 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, |
545 | "print large numbers with thousands\' separators"), | 631 | "print large numbers with thousands\' separators", |
632 | stat__set_big_num), | ||
546 | OPT_STRING('C', "cpu", &cpu_list, "cpu", | 633 | OPT_STRING('C', "cpu", &cpu_list, "cpu", |
547 | "list of cpus to monitor in system-wide"), | 634 | "list of cpus to monitor in system-wide"), |
635 | OPT_BOOLEAN('A', "no-aggr", &no_aggr, | ||
636 | "disable CPU count aggregation"), | ||
637 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | ||
638 | "print counts with custom separator"), | ||
548 | OPT_END() | 639 | OPT_END() |
549 | }; | 640 | }; |
550 | 641 | ||
551 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 642 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
552 | { | 643 | { |
553 | int status; | 644 | struct perf_evsel *pos; |
554 | int i,j; | 645 | int status = -ENOMEM; |
555 | 646 | ||
556 | setlocale(LC_ALL, ""); | 647 | setlocale(LC_ALL, ""); |
557 | 648 | ||
558 | argc = parse_options(argc, argv, options, stat_usage, | 649 | argc = parse_options(argc, argv, options, stat_usage, |
559 | PARSE_OPT_STOP_AT_NON_OPTION); | 650 | PARSE_OPT_STOP_AT_NON_OPTION); |
651 | |||
652 | if (csv_sep) | ||
653 | csv_output = true; | ||
654 | else | ||
655 | csv_sep = DEFAULT_SEPARATOR; | ||
656 | |||
657 | /* | ||
658 | * let the spreadsheet do the pretty-printing | ||
659 | */ | ||
660 | if (csv_output) { | ||
661 | /* User explicitely passed -B? */ | ||
662 | if (big_num_opt == 1) { | ||
663 | fprintf(stderr, "-B option not supported with -x\n"); | ||
664 | usage_with_options(stat_usage, options); | ||
665 | } else /* Nope, so disable big number formatting */ | ||
666 | big_num = false; | ||
667 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | ||
668 | big_num = false; | ||
669 | |||
560 | if (!argc && target_pid == -1 && target_tid == -1) | 670 | if (!argc && target_pid == -1 && target_tid == -1) |
561 | usage_with_options(stat_usage, options); | 671 | usage_with_options(stat_usage, options); |
562 | if (run_count <= 0) | 672 | if (run_count <= 0) |
563 | usage_with_options(stat_usage, options); | 673 | usage_with_options(stat_usage, options); |
564 | 674 | ||
675 | /* no_aggr is for system-wide only */ | ||
676 | if (no_aggr && !system_wide) | ||
677 | usage_with_options(stat_usage, options); | ||
678 | |||
565 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 679 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
566 | if (!null_run && !nr_counters) { | 680 | if (!null_run && !nr_counters) { |
567 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | 681 | size_t c; |
682 | |||
568 | nr_counters = ARRAY_SIZE(default_attrs); | 683 | nr_counters = ARRAY_SIZE(default_attrs); |
684 | |||
685 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { | ||
686 | pos = perf_evsel__new(default_attrs[c].type, | ||
687 | default_attrs[c].config, | ||
688 | nr_counters); | ||
689 | if (pos == NULL) | ||
690 | goto out; | ||
691 | list_add(&pos->node, &evsel_list); | ||
692 | } | ||
569 | } | 693 | } |
570 | 694 | ||
571 | if (system_wide) | 695 | if (target_pid != -1) |
572 | nr_cpus = read_cpu_map(cpu_list); | 696 | target_tid = target_pid; |
573 | else | ||
574 | nr_cpus = 1; | ||
575 | 697 | ||
576 | if (nr_cpus < 1) | 698 | threads = thread_map__new(target_pid, target_tid); |
699 | if (threads == NULL) { | ||
700 | pr_err("Problems finding threads of monitor\n"); | ||
577 | usage_with_options(stat_usage, options); | 701 | usage_with_options(stat_usage, options); |
702 | } | ||
578 | 703 | ||
579 | if (target_pid != -1) { | 704 | if (system_wide) |
580 | target_tid = target_pid; | 705 | cpus = cpu_map__new(cpu_list); |
581 | thread_num = find_all_tid(target_pid, &all_tids); | 706 | else |
582 | if (thread_num <= 0) { | 707 | cpus = cpu_map__dummy_new(); |
583 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
584 | target_pid); | ||
585 | usage_with_options(stat_usage, options); | ||
586 | } | ||
587 | } else { | ||
588 | all_tids=malloc(sizeof(pid_t)); | ||
589 | if (!all_tids) | ||
590 | return -ENOMEM; | ||
591 | 708 | ||
592 | all_tids[0] = target_tid; | 709 | if (cpus == NULL) { |
593 | thread_num = 1; | 710 | perror("failed to parse CPUs map"); |
711 | usage_with_options(stat_usage, options); | ||
712 | return -1; | ||
594 | } | 713 | } |
595 | 714 | ||
596 | for (i = 0; i < MAX_NR_CPUS; i++) { | 715 | list_for_each_entry(pos, &evsel_list, node) { |
597 | for (j = 0; j < MAX_COUNTERS; j++) { | 716 | if (perf_evsel__alloc_stat_priv(pos) < 0 || |
598 | fd[i][j] = malloc(sizeof(int)*thread_num); | 717 | perf_evsel__alloc_counts(pos, cpus->nr) < 0 || |
599 | if (!fd[i][j]) | 718 | perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) |
600 | return -ENOMEM; | 719 | goto out_free_fd; |
601 | } | ||
602 | } | 720 | } |
603 | 721 | ||
604 | /* | 722 | /* |
@@ -621,6 +739,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
621 | 739 | ||
622 | if (status != -1) | 740 | if (status != -1) |
623 | print_stat(argc, argv); | 741 | print_stat(argc, argv); |
624 | 742 | out_free_fd: | |
743 | list_for_each_entry(pos, &evsel_list, node) | ||
744 | perf_evsel__free_stat_priv(pos); | ||
745 | out: | ||
746 | thread_map__delete(threads); | ||
747 | threads = NULL; | ||
625 | return status; | 748 | return status; |
626 | } | 749 | } |