diff options
author | Jiri Olsa <jolsa@kernel.org> | 2018-06-05 08:13:13 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2018-06-06 11:52:04 -0400 |
commit | 0ce2da1483967c75a0e031af152e0fca4110d376 (patch) | |
tree | 0daedfff0fe4ff44ee3846d1f143956ccaa53a9f /tools/perf | |
parent | f92da71280fb8da3a7c489e08a096f0b8715f939 (diff) |
perf stat: Display user and system time
Adding the support to read rusage data once the workload is finished and
display the system/user time values:
$ perf stat --null perf bench sched pipe
...
Performance counter stats for 'perf bench sched pipe':
5.342599256 seconds time elapsed
2.544434000 seconds user
4.549691000 seconds sys
It works only in non -r mode and only for workload target.
So as of now, for workload targets, we display 3 types of timings. The
time we meassure in perf stat from enable to disable+period:
5.342599256 seconds time elapsed
The time spent in user and system lands, displayed only for workload
session/target:
2.544434000 seconds user
4.549691000 seconds sys
Those times are the very same displayed by 'time' tool. They are
returned by wait4 call via the getrusage struct interface.
Committer notes:
Had to rename some variables to avoid this on older systems such as
centos:6:
builtin-stat.c: In function 'print_footer':
builtin-stat.c:1831: warning: declaration of 'stime' shadows a global declaration
/usr/include/time.h:297: warning: shadowed declaration is here
Committer testing:
# perf stat --null time perf bench sched pipe
# Running 'sched/pipe' benchmark:
# Executed 1000000 pipe operations between two processes
Total time: 5.526 [sec]
5.526534 usecs/op
180945 ops/sec
1.00user 6.25system 0:05.52elapsed 131%CPU (0avgtext+0avgdata 8056maxresident)k
0inputs+0outputs (0major+606minor)pagefaults 0swaps
Performance counter stats for 'time perf bench sched pipe':
5.530978744 seconds time elapsed
1.004037000 seconds user
6.259937000 seconds sys
#
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180605121313.31337-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 40 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 28 |
2 files changed, 56 insertions, 12 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 3a822f308e6d..5dfe102fb5b5 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -310,20 +310,38 @@ Users who wants to get the actual value can apply --no-metric-only. | |||
310 | EXAMPLES | 310 | EXAMPLES |
311 | -------- | 311 | -------- |
312 | 312 | ||
313 | $ perf stat -- make -j | 313 | $ perf stat -- make |
314 | 314 | ||
315 | Performance counter stats for 'make -j': | 315 | Performance counter stats for 'make': |
316 | 316 | ||
317 | 8117.370256 task clock ticks # 11.281 CPU utilization factor | 317 | 83723.452481 task-clock:u (msec) # 1.004 CPUs utilized |
318 | 678 context switches # 0.000 M/sec | 318 | 0 context-switches:u # 0.000 K/sec |
319 | 133 CPU migrations # 0.000 M/sec | 319 | 0 cpu-migrations:u # 0.000 K/sec |
320 | 235724 pagefaults # 0.029 M/sec | 320 | 3,228,188 page-faults:u # 0.039 M/sec |
321 | 24821162526 CPU cycles # 3057.784 M/sec | 321 | 229,570,665,834 cycles:u # 2.742 GHz |
322 | 18687303457 instructions # 2302.138 M/sec | 322 | 313,163,853,778 instructions:u # 1.36 insn per cycle |
323 | 172158895 cache references # 21.209 M/sec | 323 | 69,704,684,856 branches:u # 832.559 M/sec |
324 | 27075259 cache misses # 3.335 M/sec | 324 | 2,078,861,393 branch-misses:u # 2.98% of all branches |
325 | 325 | ||
326 | Wall-clock time elapsed: 719.554352 msecs | 326 | 83.409183620 seconds time elapsed |
327 | |||
328 | 74.684747000 seconds user | ||
329 | 8.739217000 seconds sys | ||
330 | |||
331 | TIMINGS | ||
332 | ------- | ||
333 | As displayed in the example above we can display 3 types of timings. | ||
334 | We always display the time the counters were enabled/alive: | ||
335 | |||
336 | 83.409183620 seconds time elapsed | ||
337 | |||
338 | For workload sessions we also display time the workloads spent in | ||
339 | user/system lands: | ||
340 | |||
341 | 74.684747000 seconds user | ||
342 | 8.739217000 seconds sys | ||
343 | |||
344 | Those times are the very same as displayed by the 'time' tool. | ||
327 | 345 | ||
328 | CSV FORMAT | 346 | CSV FORMAT |
329 | ---------- | 347 | ---------- |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a4f662a462c6..096ccb25c11f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -80,6 +80,9 @@ | |||
80 | #include <sys/stat.h> | 80 | #include <sys/stat.h> |
81 | #include <sys/wait.h> | 81 | #include <sys/wait.h> |
82 | #include <unistd.h> | 82 | #include <unistd.h> |
83 | #include <sys/time.h> | ||
84 | #include <sys/resource.h> | ||
85 | #include <sys/wait.h> | ||
83 | 86 | ||
84 | #include "sane_ctype.h" | 87 | #include "sane_ctype.h" |
85 | 88 | ||
@@ -175,6 +178,8 @@ static int output_fd; | |||
175 | static int print_free_counters_hint; | 178 | static int print_free_counters_hint; |
176 | static int print_mixed_hw_group_error; | 179 | static int print_mixed_hw_group_error; |
177 | static u64 *walltime_run; | 180 | static u64 *walltime_run; |
181 | static bool ru_display = false; | ||
182 | static struct rusage ru_data; | ||
178 | 183 | ||
179 | struct perf_stat { | 184 | struct perf_stat { |
180 | bool record; | 185 | bool record; |
@@ -726,7 +731,7 @@ try_again: | |||
726 | break; | 731 | break; |
727 | } | 732 | } |
728 | } | 733 | } |
729 | waitpid(child_pid, &status, 0); | 734 | wait4(child_pid, &status, 0, &ru_data); |
730 | 735 | ||
731 | if (workload_exec_errno) { | 736 | if (workload_exec_errno) { |
732 | const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); | 737 | const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); |
@@ -1804,6 +1809,11 @@ static void print_table(FILE *output, int precision, double avg) | |||
1804 | fprintf(output, "\n%*s# Final result:\n", indent, ""); | 1809 | fprintf(output, "\n%*s# Final result:\n", indent, ""); |
1805 | } | 1810 | } |
1806 | 1811 | ||
1812 | static double timeval2double(struct timeval *t) | ||
1813 | { | ||
1814 | return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; | ||
1815 | } | ||
1816 | |||
1807 | static void print_footer(void) | 1817 | static void print_footer(void) |
1808 | { | 1818 | { |
1809 | double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; | 1819 | double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; |
@@ -1815,6 +1825,15 @@ static void print_footer(void) | |||
1815 | 1825 | ||
1816 | if (run_count == 1) { | 1826 | if (run_count == 1) { |
1817 | fprintf(output, " %17.9f seconds time elapsed", avg); | 1827 | fprintf(output, " %17.9f seconds time elapsed", avg); |
1828 | |||
1829 | if (ru_display) { | ||
1830 | double ru_utime = timeval2double(&ru_data.ru_utime); | ||
1831 | double ru_stime = timeval2double(&ru_data.ru_stime); | ||
1832 | |||
1833 | fprintf(output, "\n\n"); | ||
1834 | fprintf(output, " %17.9f seconds user\n", ru_utime); | ||
1835 | fprintf(output, " %17.9f seconds sys\n", ru_stime); | ||
1836 | } | ||
1818 | } else { | 1837 | } else { |
1819 | double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; | 1838 | double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; |
1820 | /* | 1839 | /* |
@@ -2950,6 +2969,13 @@ int cmd_stat(int argc, const char **argv) | |||
2950 | 2969 | ||
2951 | setup_system_wide(argc); | 2970 | setup_system_wide(argc); |
2952 | 2971 | ||
2972 | /* | ||
2973 | * Display user/system times only for single | ||
2974 | * run and when there's specified tracee. | ||
2975 | */ | ||
2976 | if ((run_count == 1) && target__none(&target)) | ||
2977 | ru_display = true; | ||
2978 | |||
2953 | if (run_count < 0) { | 2979 | if (run_count < 0) { |
2954 | pr_err("Run count must be a positive number\n"); | 2980 | pr_err("Run count must be a positive number\n"); |
2955 | parse_options_usage(stat_usage, stat_options, "r", 1); | 2981 | parse_options_usage(stat_usage, stat_options, "r", 1); |