summaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2013-08-02 20:41:11 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-08-07 16:35:29 -0400
commit411916880ff4061ac0491a154f10af4d49a0c61a (patch)
treebea40d54fa4765c3cb1a65c76394e4510a9e3652 /tools/perf/builtin-stat.c
parente2407bef968d64a28465561832686636d3380bf9 (diff)
perf stat: Add support for --initial-delay option
When measuring workloads the startup phase -- doing page faults, dynamic linking, opening files -- is often very different from the rest of the workload. Especially with smaller kernels and using counter multiplexing this can give significant measurement errors. Multiplexing assumes that the workload is mostly the same over longer periods. But at startup there is typically some spike of activity which is relatively short. If many groups are multiplexing the one group seeing the spike, and which is then scaled up over the time to run all groups, may see a significant error. Also in general it's often not useful to measure the startup, because it is so different from the rest. One way around this is to use interval mode and discard the first sample, but this can be awkward because interval mode doesn't support intervals of less than 100ms, and also a useful interval is not necessarily the same as a useful startup delay. This patch adds a new --initial-delay / -D option to skip measuring for the startup phase. The time can be specified in ms Here's a simple example: perf stat -e page-faults bash -c 'for i in $(seq 100000) ; do true ; done' ... 3,721 page-faults ... If we just wait 20 ms the number of page faults is 1/3 less: perf stat -D 20 -e page-faults bash -c 'for i in $(seq 100000) ; do true ; done' ... 2,823 page-faults ... So we filtered out most of the startup noise from bash. Signed-off-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1375490473-1503-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7ff4a1..2e637e4c951d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,7 @@ static const char *pre_cmd = NULL;
100static const char *post_cmd = NULL; 100static const char *post_cmd = NULL;
101static bool sync_run = false; 101static bool sync_run = false;
102static unsigned int interval = 0; 102static unsigned int interval = 0;
103static unsigned int initial_delay = 0;
103static bool forever = false; 104static bool forever = false;
104static struct timespec ref_time; 105static struct timespec ref_time;
105static struct cpu_map *aggr_map; 106static struct cpu_map *aggr_map;
@@ -254,7 +255,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
254 if (!perf_target__has_task(&target) && 255 if (!perf_target__has_task(&target) &&
255 perf_evsel__is_group_leader(evsel)) { 256 perf_evsel__is_group_leader(evsel)) {
256 attr->disabled = 1; 257 attr->disabled = 1;
257 attr->enable_on_exec = 1; 258 if (!initial_delay)
259 attr->enable_on_exec = 1;
258 } 260 }
259 261
260 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 262 return perf_evsel__open_per_thread(evsel, evsel_list->threads);
@@ -416,6 +418,20 @@ static void print_interval(void)
416 } 418 }
417} 419}
418 420
421static void handle_initial_delay(void)
422{
423 struct perf_evsel *counter;
424
425 if (initial_delay) {
426 const int ncpus = cpu_map__nr(evsel_list->cpus),
427 nthreads = thread_map__nr(evsel_list->threads);
428
429 usleep(initial_delay * 1000);
430 list_for_each_entry(counter, &evsel_list->entries, node)
431 perf_evsel__enable(counter, ncpus, nthreads);
432 }
433}
434
419static int __run_perf_stat(int argc, const char **argv) 435static int __run_perf_stat(int argc, const char **argv)
420{ 436{
421 char msg[512]; 437 char msg[512];
@@ -486,6 +502,7 @@ static int __run_perf_stat(int argc, const char **argv)
486 502
487 if (forks) { 503 if (forks) {
488 perf_evlist__start_workload(evsel_list); 504 perf_evlist__start_workload(evsel_list);
505 handle_initial_delay();
489 506
490 if (interval) { 507 if (interval) {
491 while (!waitpid(child_pid, &status, WNOHANG)) { 508 while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -497,6 +514,7 @@ static int __run_perf_stat(int argc, const char **argv)
497 if (WIFSIGNALED(status)) 514 if (WIFSIGNALED(status))
498 psignal(WTERMSIG(status), argv[0]); 515 psignal(WTERMSIG(status), argv[0]);
499 } else { 516 } else {
517 handle_initial_delay();
500 while (!done) { 518 while (!done) {
501 nanosleep(&ts, NULL); 519 nanosleep(&ts, NULL);
502 if (interval) 520 if (interval)
@@ -1419,6 +1437,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1419 "aggregate counts per processor socket", AGGR_SOCKET), 1437 "aggregate counts per processor socket", AGGR_SOCKET),
1420 OPT_SET_UINT(0, "per-core", &aggr_mode, 1438 OPT_SET_UINT(0, "per-core", &aggr_mode,
1421 "aggregate counts per physical processor core", AGGR_CORE), 1439 "aggregate counts per physical processor core", AGGR_CORE),
1440 OPT_UINTEGER('D', "delay", &initial_delay,
1441 "ms to wait before starting measurement after program start"),
1422 OPT_END() 1442 OPT_END()
1423 }; 1443 };
1424 const char * const stat_usage[] = { 1444 const char * const stat_usage[] = {