diff options
author | David S. Miller <davem@davemloft.net> | 2009-09-11 23:35:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-09-11 23:35:13 -0400 |
commit | cabc5c0f7fa1342049042d6e147db5a73773955b (patch) | |
tree | 2be09ae1777d580c7dfe05d6d5b76e57281ec447 /tools/perf/builtin-stat.c | |
parent | b73d884756303316ead4cd7dad51236b2a515a1a (diff) | |
parent | 86d710146fb9975f04c505ec78caa43d227c1018 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts:
arch/sparc/Kconfig
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 239 |
1 files changed, 94 insertions, 145 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b4b06c7903e1..61b828236c11 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -42,6 +42,8 @@ | |||
42 | #include "util/util.h" | 42 | #include "util/util.h" |
43 | #include "util/parse-options.h" | 43 | #include "util/parse-options.h" |
44 | #include "util/parse-events.h" | 44 | #include "util/parse-events.h" |
45 | #include "util/event.h" | ||
46 | #include "util/debug.h" | ||
45 | 47 | ||
46 | #include <sys/prctl.h> | 48 | #include <sys/prctl.h> |
47 | #include <math.h> | 49 | #include <math.h> |
@@ -60,10 +62,7 @@ static struct perf_counter_attr default_attrs[] = { | |||
60 | 62 | ||
61 | }; | 63 | }; |
62 | 64 | ||
63 | #define MAX_RUN 100 | ||
64 | |||
65 | static int system_wide = 0; | 65 | static int system_wide = 0; |
66 | static int verbose = 0; | ||
67 | static unsigned int nr_cpus = 0; | 66 | static unsigned int nr_cpus = 0; |
68 | static int run_idx = 0; | 67 | static int run_idx = 0; |
69 | 68 | ||
@@ -75,26 +74,56 @@ static int null_run = 0; | |||
75 | 74 | ||
76 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 75 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; |
77 | 76 | ||
78 | static u64 runtime_nsecs[MAX_RUN]; | 77 | static int event_scaled[MAX_COUNTERS]; |
79 | static u64 walltime_nsecs[MAX_RUN]; | ||
80 | static u64 runtime_cycles[MAX_RUN]; | ||
81 | 78 | ||
82 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | 79 | struct stats |
83 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | 80 | { |
81 | double n, mean, M2; | ||
82 | }; | ||
84 | 83 | ||
85 | static u64 event_res_avg[MAX_COUNTERS][3]; | 84 | static void update_stats(struct stats *stats, u64 val) |
86 | static u64 event_res_noise[MAX_COUNTERS][3]; | 85 | { |
86 | double delta; | ||
87 | 87 | ||
88 | static u64 event_scaled_avg[MAX_COUNTERS]; | 88 | stats->n++; |
89 | delta = val - stats->mean; | ||
90 | stats->mean += delta / stats->n; | ||
91 | stats->M2 += delta*(val - stats->mean); | ||
92 | } | ||
89 | 93 | ||
90 | static u64 runtime_nsecs_avg; | 94 | static double avg_stats(struct stats *stats) |
91 | static u64 runtime_nsecs_noise; | 95 | { |
96 | return stats->mean; | ||
97 | } | ||
92 | 98 | ||
93 | static u64 walltime_nsecs_avg; | 99 | /* |
94 | static u64 walltime_nsecs_noise; | 100 | * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
101 | * | ||
102 | * (\Sum n_i^2) - ((\Sum n_i)^2)/n | ||
103 | * s^2 = ------------------------------- | ||
104 | * n - 1 | ||
105 | * | ||
106 | * http://en.wikipedia.org/wiki/Stddev | ||
107 | * | ||
108 | * The std dev of the mean is related to the std dev by: | ||
109 | * | ||
110 | * s | ||
111 | * s_mean = ------- | ||
112 | * sqrt(n) | ||
113 | * | ||
114 | */ | ||
115 | static double stddev_stats(struct stats *stats) | ||
116 | { | ||
117 | double variance = stats->M2 / (stats->n - 1); | ||
118 | double variance_mean = variance / stats->n; | ||
119 | |||
120 | return sqrt(variance_mean); | ||
121 | } | ||
95 | 122 | ||
96 | static u64 runtime_cycles_avg; | 123 | struct stats event_res_stats[MAX_COUNTERS][3]; |
97 | static u64 runtime_cycles_noise; | 124 | struct stats runtime_nsecs_stats; |
125 | struct stats walltime_nsecs_stats; | ||
126 | struct stats runtime_cycles_stats; | ||
98 | 127 | ||
99 | #define MATCH_EVENT(t, c, counter) \ | 128 | #define MATCH_EVENT(t, c, counter) \ |
100 | (attrs[counter].type == PERF_TYPE_##t && \ | 129 | (attrs[counter].type == PERF_TYPE_##t && \ |
@@ -149,12 +178,11 @@ static inline int nsec_counter(int counter) | |||
149 | */ | 178 | */ |
150 | static void read_counter(int counter) | 179 | static void read_counter(int counter) |
151 | { | 180 | { |
152 | u64 *count, single_count[3]; | 181 | u64 count[3], single_count[3]; |
153 | unsigned int cpu; | 182 | unsigned int cpu; |
154 | size_t res, nv; | 183 | size_t res, nv; |
155 | int scaled; | 184 | int scaled; |
156 | 185 | int i; | |
157 | count = event_res[run_idx][counter]; | ||
158 | 186 | ||
159 | count[0] = count[1] = count[2] = 0; | 187 | count[0] = count[1] = count[2] = 0; |
160 | 188 | ||
@@ -179,24 +207,33 @@ static void read_counter(int counter) | |||
179 | scaled = 0; | 207 | scaled = 0; |
180 | if (scale) { | 208 | if (scale) { |
181 | if (count[2] == 0) { | 209 | if (count[2] == 0) { |
182 | event_scaled[run_idx][counter] = -1; | 210 | event_scaled[counter] = -1; |
183 | count[0] = 0; | 211 | count[0] = 0; |
184 | return; | 212 | return; |
185 | } | 213 | } |
186 | 214 | ||
187 | if (count[2] < count[1]) { | 215 | if (count[2] < count[1]) { |
188 | event_scaled[run_idx][counter] = 1; | 216 | event_scaled[counter] = 1; |
189 | count[0] = (unsigned long long) | 217 | count[0] = (unsigned long long) |
190 | ((double)count[0] * count[1] / count[2] + 0.5); | 218 | ((double)count[0] * count[1] / count[2] + 0.5); |
191 | } | 219 | } |
192 | } | 220 | } |
221 | |||
222 | for (i = 0; i < 3; i++) | ||
223 | update_stats(&event_res_stats[counter][i], count[i]); | ||
224 | |||
225 | if (verbose) { | ||
226 | fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), | ||
227 | count[0], count[1], count[2]); | ||
228 | } | ||
229 | |||
193 | /* | 230 | /* |
194 | * Save the full runtime - to allow normalization during printout: | 231 | * Save the full runtime - to allow normalization during printout: |
195 | */ | 232 | */ |
196 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 233 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
197 | runtime_nsecs[run_idx] = count[0]; | 234 | update_stats(&runtime_nsecs_stats, count[0]); |
198 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 235 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) |
199 | runtime_cycles[run_idx] = count[0]; | 236 | update_stats(&runtime_cycles_stats, count[0]); |
200 | } | 237 | } |
201 | 238 | ||
202 | static int run_perf_stat(int argc __used, const char **argv) | 239 | static int run_perf_stat(int argc __used, const char **argv) |
@@ -270,7 +307,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
270 | 307 | ||
271 | t1 = rdclock(); | 308 | t1 = rdclock(); |
272 | 309 | ||
273 | walltime_nsecs[run_idx] = t1 - t0; | 310 | update_stats(&walltime_nsecs_stats, t1 - t0); |
274 | 311 | ||
275 | for (counter = 0; counter < nr_counters; counter++) | 312 | for (counter = 0; counter < nr_counters; counter++) |
276 | read_counter(counter); | 313 | read_counter(counter); |
@@ -278,42 +315,38 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
278 | return WEXITSTATUS(status); | 315 | return WEXITSTATUS(status); |
279 | } | 316 | } |
280 | 317 | ||
281 | static void print_noise(u64 *count, u64 *noise) | 318 | static void print_noise(int counter, double avg) |
282 | { | 319 | { |
283 | if (run_count > 1) | 320 | if (run_count == 1) |
284 | fprintf(stderr, " ( +- %7.3f%% )", | 321 | return; |
285 | (double)noise[0]/(count[0]+1)*100.0); | 322 | |
323 | fprintf(stderr, " ( +- %7.3f%% )", | ||
324 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); | ||
286 | } | 325 | } |
287 | 326 | ||
288 | static void nsec_printout(int counter, u64 *count, u64 *noise) | 327 | static void nsec_printout(int counter, double avg) |
289 | { | 328 | { |
290 | double msecs = (double)count[0] / 1000000; | 329 | double msecs = avg / 1e6; |
291 | 330 | ||
292 | fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); | 331 | fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); |
293 | 332 | ||
294 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | 333 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { |
295 | if (walltime_nsecs_avg) | 334 | fprintf(stderr, " # %10.3f CPUs ", |
296 | fprintf(stderr, " # %10.3f CPUs ", | 335 | avg / avg_stats(&walltime_nsecs_stats)); |
297 | (double)count[0] / (double)walltime_nsecs_avg); | ||
298 | } | 336 | } |
299 | print_noise(count, noise); | ||
300 | } | 337 | } |
301 | 338 | ||
302 | static void abs_printout(int counter, u64 *count, u64 *noise) | 339 | static void abs_printout(int counter, double avg) |
303 | { | 340 | { |
304 | fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); | 341 | fprintf(stderr, " %14.0f %-24s", avg, event_name(counter)); |
305 | 342 | ||
306 | if (runtime_cycles_avg && | 343 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { |
307 | MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { | ||
308 | fprintf(stderr, " # %10.3f IPC ", | 344 | fprintf(stderr, " # %10.3f IPC ", |
309 | (double)count[0] / (double)runtime_cycles_avg); | 345 | avg / avg_stats(&runtime_cycles_stats)); |
310 | } else { | 346 | } else { |
311 | if (runtime_nsecs_avg) { | 347 | fprintf(stderr, " # %10.3f M/sec", |
312 | fprintf(stderr, " # %10.3f M/sec", | 348 | 1000.0 * avg / avg_stats(&runtime_nsecs_stats)); |
313 | (double)count[0]/runtime_nsecs_avg*1000.0); | ||
314 | } | ||
315 | } | 349 | } |
316 | print_noise(count, noise); | ||
317 | } | 350 | } |
318 | 351 | ||
319 | /* | 352 | /* |
@@ -321,12 +354,8 @@ static void abs_printout(int counter, u64 *count, u64 *noise) | |||
321 | */ | 354 | */ |
322 | static void print_counter(int counter) | 355 | static void print_counter(int counter) |
323 | { | 356 | { |
324 | u64 *count, *noise; | 357 | double avg = avg_stats(&event_res_stats[counter][0]); |
325 | int scaled; | 358 | int scaled = event_scaled[counter]; |
326 | |||
327 | count = event_res_avg[counter]; | ||
328 | noise = event_res_noise[counter]; | ||
329 | scaled = event_scaled_avg[counter]; | ||
330 | 359 | ||
331 | if (scaled == -1) { | 360 | if (scaled == -1) { |
332 | fprintf(stderr, " %14s %-24s\n", | 361 | fprintf(stderr, " %14s %-24s\n", |
@@ -335,110 +364,29 @@ static void print_counter(int counter) | |||
335 | } | 364 | } |
336 | 365 | ||
337 | if (nsec_counter(counter)) | 366 | if (nsec_counter(counter)) |
338 | nsec_printout(counter, count, noise); | 367 | nsec_printout(counter, avg); |
339 | else | 368 | else |
340 | abs_printout(counter, count, noise); | 369 | abs_printout(counter, avg); |
341 | |||
342 | if (scaled) | ||
343 | fprintf(stderr, " (scaled from %.2f%%)", | ||
344 | (double) count[2] / count[1] * 100); | ||
345 | |||
346 | fprintf(stderr, "\n"); | ||
347 | } | ||
348 | 370 | ||
349 | /* | 371 | print_noise(counter, avg); |
350 | * normalize_noise noise values down to stddev: | ||
351 | */ | ||
352 | static void normalize_noise(u64 *val) | ||
353 | { | ||
354 | double res; | ||
355 | 372 | ||
356 | res = (double)*val / (run_count * sqrt((double)run_count)); | 373 | if (scaled) { |
374 | double avg_enabled, avg_running; | ||
357 | 375 | ||
358 | *val = (u64)res; | 376 | avg_enabled = avg_stats(&event_res_stats[counter][1]); |
359 | } | 377 | avg_running = avg_stats(&event_res_stats[counter][2]); |
360 | 378 | ||
361 | static void update_avg(const char *name, int idx, u64 *avg, u64 *val) | 379 | fprintf(stderr, " (scaled from %.2f%%)", |
362 | { | 380 | 100 * avg_running / avg_enabled); |
363 | *avg += *val; | ||
364 | |||
365 | if (verbose > 1) | ||
366 | fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); | ||
367 | } | ||
368 | /* | ||
369 | * Calculate the averages and noises: | ||
370 | */ | ||
371 | static void calc_avg(void) | ||
372 | { | ||
373 | int i, j; | ||
374 | |||
375 | if (verbose > 1) | ||
376 | fprintf(stderr, "\n"); | ||
377 | |||
378 | for (i = 0; i < run_count; i++) { | ||
379 | update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); | ||
380 | update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); | ||
381 | update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); | ||
382 | |||
383 | for (j = 0; j < nr_counters; j++) { | ||
384 | update_avg("counter/0", j, | ||
385 | event_res_avg[j]+0, event_res[i][j]+0); | ||
386 | update_avg("counter/1", j, | ||
387 | event_res_avg[j]+1, event_res[i][j]+1); | ||
388 | update_avg("counter/2", j, | ||
389 | event_res_avg[j]+2, event_res[i][j]+2); | ||
390 | if (event_scaled[i][j] != (u64)-1) | ||
391 | update_avg("scaled", j, | ||
392 | event_scaled_avg + j, event_scaled[i]+j); | ||
393 | else | ||
394 | event_scaled_avg[j] = -1; | ||
395 | } | ||
396 | } | ||
397 | runtime_nsecs_avg /= run_count; | ||
398 | walltime_nsecs_avg /= run_count; | ||
399 | runtime_cycles_avg /= run_count; | ||
400 | |||
401 | for (j = 0; j < nr_counters; j++) { | ||
402 | event_res_avg[j][0] /= run_count; | ||
403 | event_res_avg[j][1] /= run_count; | ||
404 | event_res_avg[j][2] /= run_count; | ||
405 | } | ||
406 | |||
407 | for (i = 0; i < run_count; i++) { | ||
408 | runtime_nsecs_noise += | ||
409 | abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); | ||
410 | walltime_nsecs_noise += | ||
411 | abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); | ||
412 | runtime_cycles_noise += | ||
413 | abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); | ||
414 | |||
415 | for (j = 0; j < nr_counters; j++) { | ||
416 | event_res_noise[j][0] += | ||
417 | abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); | ||
418 | event_res_noise[j][1] += | ||
419 | abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); | ||
420 | event_res_noise[j][2] += | ||
421 | abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); | ||
422 | } | ||
423 | } | 381 | } |
424 | 382 | ||
425 | normalize_noise(&runtime_nsecs_noise); | 383 | fprintf(stderr, "\n"); |
426 | normalize_noise(&walltime_nsecs_noise); | ||
427 | normalize_noise(&runtime_cycles_noise); | ||
428 | |||
429 | for (j = 0; j < nr_counters; j++) { | ||
430 | normalize_noise(&event_res_noise[j][0]); | ||
431 | normalize_noise(&event_res_noise[j][1]); | ||
432 | normalize_noise(&event_res_noise[j][2]); | ||
433 | } | ||
434 | } | 384 | } |
435 | 385 | ||
436 | static void print_stat(int argc, const char **argv) | 386 | static void print_stat(int argc, const char **argv) |
437 | { | 387 | { |
438 | int i, counter; | 388 | int i, counter; |
439 | 389 | ||
440 | calc_avg(); | ||
441 | |||
442 | fflush(stdout); | 390 | fflush(stdout); |
443 | 391 | ||
444 | fprintf(stderr, "\n"); | 392 | fprintf(stderr, "\n"); |
@@ -457,10 +405,11 @@ static void print_stat(int argc, const char **argv) | |||
457 | 405 | ||
458 | fprintf(stderr, "\n"); | 406 | fprintf(stderr, "\n"); |
459 | fprintf(stderr, " %14.9f seconds time elapsed", | 407 | fprintf(stderr, " %14.9f seconds time elapsed", |
460 | (double)walltime_nsecs_avg/1e9); | 408 | avg_stats(&walltime_nsecs_stats)/1e9); |
461 | if (run_count > 1) { | 409 | if (run_count > 1) { |
462 | fprintf(stderr, " ( +- %7.3f%% )", | 410 | fprintf(stderr, " ( +- %7.3f%% )", |
463 | 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); | 411 | 100*stddev_stats(&walltime_nsecs_stats) / |
412 | avg_stats(&walltime_nsecs_stats)); | ||
464 | } | 413 | } |
465 | fprintf(stderr, "\n\n"); | 414 | fprintf(stderr, "\n\n"); |
466 | } | 415 | } |
@@ -515,7 +464,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
515 | PARSE_OPT_STOP_AT_NON_OPTION); | 464 | PARSE_OPT_STOP_AT_NON_OPTION); |
516 | if (!argc) | 465 | if (!argc) |
517 | usage_with_options(stat_usage, options); | 466 | usage_with_options(stat_usage, options); |
518 | if (run_count <= 0 || run_count > MAX_RUN) | 467 | if (run_count <= 0) |
519 | usage_with_options(stat_usage, options); | 468 | usage_with_options(stat_usage, options); |
520 | 469 | ||
521 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 470 | /* Set attrs and nr_counters if no event is selected and !null_run */ |