aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-06-13 08:57:28 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-13 09:18:57 -0400
commit42202dd56c717f173cd0bf2390249e1bf5cf210b (patch)
tree3c0e09eb6a9ea978a476801f40d7070fedcf8359 /tools
parent44175b6f397a6724121eeaf0f072e2c912a46614 (diff)
perf stat: Add feature to run and measure a command multiple times
Add the --repeat <n> feature to perf stat, which repeats a given command up to a 100 times, collects the stats and calculates an average and a stddev. For example, the following oneliner 'perf stat' command runs hackbench 5 times and prints a tabulated result of all metrics, with averages and noise levels (in percentage) printed: aldebaran:~/linux/linux/tools/perf> ./perf stat --repeat 5 ~/hackbench 10 Time: 0.117 Time: 0.108 Time: 0.089 Time: 0.088 Time: 0.100 Performance counter stats for '/home/mingo/hackbench 10' (5 runs): 1243.989586 task-clock-msecs # 10.460 CPUs ( +- 4.720% ) 47706 context-switches # 0.038 M/sec ( +- 19.706% ) 387 CPU-migrations # 0.000 M/sec ( +- 3.608% ) 17793 page-faults # 0.014 M/sec ( +- 0.354% ) 3770941606 cycles # 3031.329 M/sec ( +- 4.621% ) 1566372416 instructions # 0.415 IPC ( +- 2.703% ) 16783421 cache-references # 13.492 M/sec ( +- 5.202% ) 7128590 cache-misses # 5.730 M/sec ( +- 7.420% ) 0.118924455 seconds time elapsed. The goal of this feature is to allow the reliance on these accurate statistics and to know how many times a command has to be repeated for the noise to go down to an acceptable level. (The -v option can be used to see a line printed out as each run progresses.) Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-stat.c259
1 files changed, 194 insertions, 65 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c12804853eab..9eb42b1ae784 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,6 +43,7 @@
43#include "util/parse-events.h" 43#include "util/parse-events.h"
44 44
45#include <sys/prctl.h> 45#include <sys/prctl.h>
46#include <math.h>
46 47
47static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { 48static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
48 49
@@ -79,12 +80,34 @@ static const unsigned int default_count[] = {
79 10000, 80 10000,
80}; 81};
81 82
82static __u64 event_res[MAX_COUNTERS][3]; 83#define MAX_RUN 100
83static __u64 event_scaled[MAX_COUNTERS];
84 84
85static __u64 runtime_nsecs; 85static int run_count = 1;
86static __u64 walltime_nsecs; 86static int run_idx = 0;
87static __u64 runtime_cycles; 87
88static __u64 event_res[MAX_RUN][MAX_COUNTERS][3];
89static __u64 event_scaled[MAX_RUN][MAX_COUNTERS];
90
91//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3];
92
93
94static __u64 runtime_nsecs[MAX_RUN];
95static __u64 walltime_nsecs[MAX_RUN];
96static __u64 runtime_cycles[MAX_RUN];
97
98static __u64 event_res_avg[MAX_COUNTERS][3];
99static __u64 event_res_noise[MAX_COUNTERS][3];
100
101static __u64 event_scaled_avg[MAX_COUNTERS];
102
103static __u64 runtime_nsecs_avg;
104static __u64 runtime_nsecs_noise;
105
106static __u64 walltime_nsecs_avg;
107static __u64 walltime_nsecs_noise;
108
109static __u64 runtime_cycles_avg;
110static __u64 runtime_cycles_noise;
88 111
89static void create_perf_stat_counter(int counter) 112static void create_perf_stat_counter(int counter)
90{ 113{
@@ -140,7 +163,7 @@ static void read_counter(int counter)
140 int cpu, nv; 163 int cpu, nv;
141 int scaled; 164 int scaled;
142 165
143 count = event_res[counter]; 166 count = event_res[run_idx][counter];
144 167
145 count[0] = count[1] = count[2] = 0; 168 count[0] = count[1] = count[2] = 0;
146 169
@@ -151,6 +174,8 @@ static void read_counter(int counter)
151 174
152 res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); 175 res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
153 assert(res == nv * sizeof(__u64)); 176 assert(res == nv * sizeof(__u64));
177 close(fd[cpu][counter]);
178 fd[cpu][counter] = -1;
154 179
155 count[0] += single_count[0]; 180 count[0] += single_count[0];
156 if (scale) { 181 if (scale) {
@@ -162,13 +187,13 @@ static void read_counter(int counter)
162 scaled = 0; 187 scaled = 0;
163 if (scale) { 188 if (scale) {
164 if (count[2] == 0) { 189 if (count[2] == 0) {
165 event_scaled[counter] = -1; 190 event_scaled[run_idx][counter] = -1;
166 count[0] = 0; 191 count[0] = 0;
167 return; 192 return;
168 } 193 }
169 194
170 if (count[2] < count[1]) { 195 if (count[2] < count[1]) {
171 event_scaled[counter] = 1; 196 event_scaled[run_idx][counter] = 1;
172 count[0] = (unsigned long long) 197 count[0] = (unsigned long long)
173 ((double)count[0] * count[1] / count[2] + 0.5); 198 ((double)count[0] * count[1] / count[2] + 0.5);
174 } 199 }
@@ -178,13 +203,62 @@ static void read_counter(int counter)
178 */ 203 */
179 if (attrs[counter].type == PERF_TYPE_SOFTWARE && 204 if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
180 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) 205 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
181 runtime_nsecs = count[0]; 206 runtime_nsecs[run_idx] = count[0];
182 if (attrs[counter].type == PERF_TYPE_HARDWARE && 207 if (attrs[counter].type == PERF_TYPE_HARDWARE &&
183 attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) 208 attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
184 runtime_cycles = count[0]; 209 runtime_cycles[run_idx] = count[0];
185} 210}
186 211
187static void nsec_printout(int counter, __u64 *count) 212static int run_perf_stat(int argc, const char **argv)
213{
214 unsigned long long t0, t1;
215 int status = 0;
216 int counter;
217 int pid;
218
219 if (!system_wide)
220 nr_cpus = 1;
221
222 for (counter = 0; counter < nr_counters; counter++)
223 create_perf_stat_counter(counter);
224
225 /*
226 * Enable counters and exec the command:
227 */
228 t0 = rdclock();
229 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
230
231 if ((pid = fork()) < 0)
232 perror("failed to fork");
233
234 if (!pid) {
235 if (execvp(argv[0], (char **)argv)) {
236 perror(argv[0]);
237 exit(-1);
238 }
239 }
240
241 wait(&status);
242
243 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
244 t1 = rdclock();
245
246 walltime_nsecs[run_idx] = t1 - t0;
247
248 for (counter = 0; counter < nr_counters; counter++)
249 read_counter(counter);
250
251 return WEXITSTATUS(status);
252}
253
254static void print_noise(__u64 *count, __u64 *noise)
255{
256 if (run_count > 1)
257 fprintf(stderr, " ( +- %7.3f%% )",
258 (double)noise[0]/(count[0]+1)*100.0);
259}
260
261static void nsec_printout(int counter, __u64 *count, __u64 *noise)
188{ 262{
189 double msecs = (double)count[0] / 1000000; 263 double msecs = (double)count[0] / 1000000;
190 264
@@ -193,29 +267,30 @@ static void nsec_printout(int counter, __u64 *count)
193 if (attrs[counter].type == PERF_TYPE_SOFTWARE && 267 if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
194 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { 268 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
195 269
196 if (walltime_nsecs) 270 if (walltime_nsecs_avg)
197 fprintf(stderr, " # %10.3f CPUs", 271 fprintf(stderr, " # %10.3f CPUs ",
198 (double)count[0] / (double)walltime_nsecs); 272 (double)count[0] / (double)walltime_nsecs_avg);
199 } 273 }
274 print_noise(count, noise);
200} 275}
201 276
202static void abs_printout(int counter, __u64 *count) 277static void abs_printout(int counter, __u64 *count, __u64 *noise)
203{ 278{
204 fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); 279 fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
205 280
206 if (runtime_cycles && 281 if (runtime_cycles_avg &&
207 attrs[counter].type == PERF_TYPE_HARDWARE && 282 attrs[counter].type == PERF_TYPE_HARDWARE &&
208 attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { 283 attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
209 284
210 fprintf(stderr, " # %10.3f IPC", 285 fprintf(stderr, " # %10.3f IPC ",
211 (double)count[0] / (double)runtime_cycles); 286 (double)count[0] / (double)runtime_cycles_avg);
212 287 } else {
213 return; 288 if (runtime_nsecs_avg) {
289 fprintf(stderr, " # %10.3f M/sec",
290 (double)count[0]/runtime_nsecs_avg*1000.0);
291 }
214 } 292 }
215 293 print_noise(count, noise);
216 if (runtime_nsecs)
217 fprintf(stderr, " # %10.3f M/sec",
218 (double)count[0]/runtime_nsecs*1000.0);
219} 294}
220 295
221/* 296/*
@@ -223,11 +298,12 @@ static void abs_printout(int counter, __u64 *count)
223 */ 298 */
224static void print_counter(int counter) 299static void print_counter(int counter)
225{ 300{
226 __u64 *count; 301 __u64 *count, *noise;
227 int scaled; 302 int scaled;
228 303
229 count = event_res[counter]; 304 count = event_res_avg[counter];
230 scaled = event_scaled[counter]; 305 noise = event_res_noise[counter];
306 scaled = event_scaled_avg[counter];
231 307
232 if (scaled == -1) { 308 if (scaled == -1) {
233 fprintf(stderr, " %14s %-20s\n", 309 fprintf(stderr, " %14s %-20s\n",
@@ -236,9 +312,9 @@ static void print_counter(int counter)
236 } 312 }
237 313
238 if (nsec_counter(counter)) 314 if (nsec_counter(counter))
239 nsec_printout(counter, count); 315 nsec_printout(counter, count, noise);
240 else 316 else
241 abs_printout(counter, count); 317 abs_printout(counter, count, noise);
242 318
243 if (scaled) 319 if (scaled)
244 fprintf(stderr, " (scaled from %.2f%%)", 320 fprintf(stderr, " (scaled from %.2f%%)",
@@ -247,43 +323,83 @@ static void print_counter(int counter)
247 fprintf(stderr, "\n"); 323 fprintf(stderr, "\n");
248} 324}
249 325
250static int do_perf_stat(int argc, const char **argv) 326/*
327 * Normalize noise values down to stddev:
328 */
329static void normalize(__u64 *val)
251{ 330{
252 unsigned long long t0, t1; 331 double res;
253 int counter;
254 int status;
255 int pid;
256 int i;
257
258 if (!system_wide)
259 nr_cpus = 1;
260 332
261 for (counter = 0; counter < nr_counters; counter++) 333 res = (double)*val / (run_count * sqrt((double)run_count));
262 create_perf_stat_counter(counter);
263 334
264 /* 335 *val = (__u64)res;
265 * Enable counters and exec the command: 336}
266 */
267 t0 = rdclock();
268 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
269 337
270 if ((pid = fork()) < 0) 338/*
271 perror("failed to fork"); 339 * Calculate the averages and noises:
340 */
341static void calc_avg(void)
342{
343 int i, j;
344
345 for (i = 0; i < run_count; i++) {
346 runtime_nsecs_avg += runtime_nsecs[i];
347 walltime_nsecs_avg += walltime_nsecs[i];
348 runtime_cycles_avg += runtime_cycles[i];
349
350 for (j = 0; j < nr_counters; j++) {
351 event_res_avg[j][0] += event_res[i][j][0];
352 event_res_avg[j][1] += event_res[i][j][1];
353 event_res_avg[j][2] += event_res[i][j][2];
354 event_scaled_avg[j] += event_scaled[i][j];
355 }
356 }
357 runtime_nsecs_avg /= run_count;
358 walltime_nsecs_avg /= run_count;
359 runtime_cycles_avg /= run_count;
360
361 for (j = 0; j < nr_counters; j++) {
362 event_res_avg[j][0] /= run_count;
363 event_res_avg[j][1] /= run_count;
364 event_res_avg[j][2] /= run_count;
365 }
272 366
273 if (!pid) { 367 for (i = 0; i < run_count; i++) {
274 if (execvp(argv[0], (char **)argv)) { 368 runtime_nsecs_noise +=
275 perror(argv[0]); 369 abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg));
276 exit(-1); 370 walltime_nsecs_noise +=
371 abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg));
372 runtime_cycles_noise +=
373 abs((__s64)(runtime_cycles[i] - runtime_cycles_avg));
374
375 for (j = 0; j < nr_counters; j++) {
376 event_res_noise[j][0] +=
377 abs((__s64)(event_res[i][j][0] - event_res_avg[j][0]));
378 event_res_noise[j][1] +=
379 abs((__s64)(event_res[i][j][1] - event_res_avg[j][1]));
380 event_res_noise[j][2] +=
381 abs((__s64)(event_res[i][j][2] - event_res_avg[j][2]));
277 } 382 }
278 } 383 }
279 384
280 while (wait(&status) >= 0) 385 normalize(&runtime_nsecs_noise);
281 ; 386 normalize(&walltime_nsecs_noise);
387 normalize(&runtime_cycles_noise);
282 388
283 prctl(PR_TASK_PERF_COUNTERS_DISABLE); 389 for (j = 0; j < nr_counters; j++) {
284 t1 = rdclock(); 390 normalize(&event_res_noise[j][0]);
391 normalize(&event_res_noise[j][1]);
392 normalize(&event_res_noise[j][2]);
393 }
394}
395
396static void print_stat(int argc, const char **argv)
397{
398 int i, counter;
399
400 calc_avg();
285 401
286 walltime_nsecs = t1 - t0; 402 run_idx = 0;
287 403
288 fflush(stdout); 404 fflush(stdout);
289 405
@@ -293,21 +409,19 @@ static int do_perf_stat(int argc, const char **argv)
293 for (i = 1; i < argc; i++) 409 for (i = 1; i < argc; i++)
294 fprintf(stderr, " %s", argv[i]); 410 fprintf(stderr, " %s", argv[i]);
295 411
296 fprintf(stderr, "\':\n"); 412 fprintf(stderr, "\'");
297 fprintf(stderr, "\n"); 413 if (run_count > 1)
298 414 fprintf(stderr, " (%d runs)", run_count);
299 for (counter = 0; counter < nr_counters; counter++) 415 fprintf(stderr, ":\n\n");
300 read_counter(counter);
301 416
302 for (counter = 0; counter < nr_counters; counter++) 417 for (counter = 0; counter < nr_counters; counter++)
303 print_counter(counter); 418 print_counter(counter);
304 419
305 420
306 fprintf(stderr, "\n"); 421 fprintf(stderr, "\n");
307 fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9); 422 fprintf(stderr, " %14.9f seconds time elapsed.\n",
423 (double)walltime_nsecs_avg/1e9);
308 fprintf(stderr, "\n"); 424 fprintf(stderr, "\n");
309
310 return 0;
311} 425}
312 426
313static volatile int signr = -1; 427static volatile int signr = -1;
@@ -345,11 +459,15 @@ static const struct option options[] = {
345 "scale/normalize counters"), 459 "scale/normalize counters"),
346 OPT_BOOLEAN('v', "verbose", &verbose, 460 OPT_BOOLEAN('v', "verbose", &verbose,
347 "be more verbose (show counter open errors, etc)"), 461 "be more verbose (show counter open errors, etc)"),
462 OPT_INTEGER('r', "repeat", &run_count,
463 "repeat command and print average + stddev (max: 100)"),
348 OPT_END() 464 OPT_END()
349}; 465};
350 466
351int cmd_stat(int argc, const char **argv, const char *prefix) 467int cmd_stat(int argc, const char **argv, const char *prefix)
352{ 468{
469 int status;
470
353 page_size = sysconf(_SC_PAGE_SIZE); 471 page_size = sysconf(_SC_PAGE_SIZE);
354 472
355 memcpy(attrs, default_attrs, sizeof(attrs)); 473 memcpy(attrs, default_attrs, sizeof(attrs));
@@ -357,6 +475,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
357 argc = parse_options(argc, argv, options, stat_usage, 0); 475 argc = parse_options(argc, argv, options, stat_usage, 0);
358 if (!argc) 476 if (!argc)
359 usage_with_options(stat_usage, options); 477 usage_with_options(stat_usage, options);
478 if (run_count <= 0 || run_count > MAX_RUN)
479 usage_with_options(stat_usage, options);
360 480
361 if (!nr_counters) 481 if (!nr_counters)
362 nr_counters = 8; 482 nr_counters = 8;
@@ -376,5 +496,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
376 signal(SIGALRM, skip_signal); 496 signal(SIGALRM, skip_signal);
377 signal(SIGABRT, skip_signal); 497 signal(SIGABRT, skip_signal);
378 498
379 return do_perf_stat(argc, argv); 499 status = 0;
500 for (run_idx = 0; run_idx < run_count; run_idx++) {
501 if (run_count != 1 && verbose)
502 fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1);
503 status = run_perf_stat(argc, argv);
504 }
505
506 print_stat(argc, argv);
507
508 return status;
380} 509}