aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorJiri Olsa <jolsa@kernel.org>2018-06-05 08:13:13 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-06-06 11:52:04 -0400
commit0ce2da1483967c75a0e031af152e0fca4110d376 (patch)
tree0daedfff0fe4ff44ee3846d1f143956ccaa53a9f /tools/perf
parentf92da71280fb8da3a7c489e08a096f0b8715f939 (diff)
perf stat: Display user and system time
Adding the support to read rusage data once the workload is finished and display the system/user time values: $ perf stat --null perf bench sched pipe ... Performance counter stats for 'perf bench sched pipe': 5.342599256 seconds time elapsed 2.544434000 seconds user 4.549691000 seconds sys It works only in non -r mode and only for workload target. So as of now, for workload targets, we display 3 types of timings. The time we meassure in perf stat from enable to disable+period: 5.342599256 seconds time elapsed The time spent in user and system lands, displayed only for workload session/target: 2.544434000 seconds user 4.549691000 seconds sys Those times are the very same displayed by 'time' tool. They are returned by wait4 call via the getrusage struct interface. Committer notes: Had to rename some variables to avoid this on older systems such as centos:6: builtin-stat.c: In function 'print_footer': builtin-stat.c:1831: warning: declaration of 'stime' shadows a global declaration /usr/include/time.h:297: warning: shadowed declaration is here Committer testing: # perf stat --null time perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 5.526 [sec] 5.526534 usecs/op 180945 ops/sec 1.00user 6.25system 0:05.52elapsed 131%CPU (0avgtext+0avgdata 8056maxresident)k 0inputs+0outputs (0major+606minor)pagefaults 0swaps Performance counter stats for 'time perf bench sched pipe': 5.530978744 seconds time elapsed 1.004037000 seconds user 6.259937000 seconds sys # Suggested-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20180605121313.31337-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-stat.txt40
-rw-r--r--tools/perf/builtin-stat.c28
2 files changed, 56 insertions, 12 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 3a822f308e6d..5dfe102fb5b5 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -310,20 +310,38 @@ Users who wants to get the actual value can apply --no-metric-only.
310EXAMPLES 310EXAMPLES
311-------- 311--------
312 312
313$ perf stat -- make -j 313$ perf stat -- make
314 314
315 Performance counter stats for 'make -j': 315 Performance counter stats for 'make':
316 316
317 8117.370256 task clock ticks # 11.281 CPU utilization factor 317 83723.452481 task-clock:u (msec) # 1.004 CPUs utilized
318 678 context switches # 0.000 M/sec 318 0 context-switches:u # 0.000 K/sec
319 133 CPU migrations # 0.000 M/sec 319 0 cpu-migrations:u # 0.000 K/sec
320 235724 pagefaults # 0.029 M/sec 320 3,228,188 page-faults:u # 0.039 M/sec
321 24821162526 CPU cycles # 3057.784 M/sec 321 229,570,665,834 cycles:u # 2.742 GHz
322 18687303457 instructions # 2302.138 M/sec 322 313,163,853,778 instructions:u # 1.36 insn per cycle
323 172158895 cache references # 21.209 M/sec 323 69,704,684,856 branches:u # 832.559 M/sec
324 27075259 cache misses # 3.335 M/sec 324 2,078,861,393 branch-misses:u # 2.98% of all branches
325 325
326 Wall-clock time elapsed: 719.554352 msecs 326 83.409183620 seconds time elapsed
327
328 74.684747000 seconds user
329 8.739217000 seconds sys
330
331TIMINGS
332-------
333As displayed in the example above we can display 3 types of timings.
334We always display the time the counters were enabled/alive:
335
336 83.409183620 seconds time elapsed
337
338For workload sessions we also display time the workloads spent in
339user/system lands:
340
341 74.684747000 seconds user
342 8.739217000 seconds sys
343
344Those times are the very same as displayed by the 'time' tool.
327 345
328CSV FORMAT 346CSV FORMAT
329---------- 347----------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a4f662a462c6..096ccb25c11f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -80,6 +80,9 @@
80#include <sys/stat.h> 80#include <sys/stat.h>
81#include <sys/wait.h> 81#include <sys/wait.h>
82#include <unistd.h> 82#include <unistd.h>
83#include <sys/time.h>
84#include <sys/resource.h>
85#include <sys/wait.h>
83 86
84#include "sane_ctype.h" 87#include "sane_ctype.h"
85 88
@@ -175,6 +178,8 @@ static int output_fd;
175static int print_free_counters_hint; 178static int print_free_counters_hint;
176static int print_mixed_hw_group_error; 179static int print_mixed_hw_group_error;
177static u64 *walltime_run; 180static u64 *walltime_run;
181static bool ru_display = false;
182static struct rusage ru_data;
178 183
179struct perf_stat { 184struct perf_stat {
180 bool record; 185 bool record;
@@ -726,7 +731,7 @@ try_again:
726 break; 731 break;
727 } 732 }
728 } 733 }
729 waitpid(child_pid, &status, 0); 734 wait4(child_pid, &status, 0, &ru_data);
730 735
731 if (workload_exec_errno) { 736 if (workload_exec_errno) {
732 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 737 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -1804,6 +1809,11 @@ static void print_table(FILE *output, int precision, double avg)
1804 fprintf(output, "\n%*s# Final result:\n", indent, ""); 1809 fprintf(output, "\n%*s# Final result:\n", indent, "");
1805} 1810}
1806 1811
1812static double timeval2double(struct timeval *t)
1813{
1814 return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
1815}
1816
1807static void print_footer(void) 1817static void print_footer(void)
1808{ 1818{
1809 double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; 1819 double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
@@ -1815,6 +1825,15 @@ static void print_footer(void)
1815 1825
1816 if (run_count == 1) { 1826 if (run_count == 1) {
1817 fprintf(output, " %17.9f seconds time elapsed", avg); 1827 fprintf(output, " %17.9f seconds time elapsed", avg);
1828
1829 if (ru_display) {
1830 double ru_utime = timeval2double(&ru_data.ru_utime);
1831 double ru_stime = timeval2double(&ru_data.ru_stime);
1832
1833 fprintf(output, "\n\n");
1834 fprintf(output, " %17.9f seconds user\n", ru_utime);
1835 fprintf(output, " %17.9f seconds sys\n", ru_stime);
1836 }
1818 } else { 1837 } else {
1819 double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; 1838 double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
1820 /* 1839 /*
@@ -2950,6 +2969,13 @@ int cmd_stat(int argc, const char **argv)
2950 2969
2951 setup_system_wide(argc); 2970 setup_system_wide(argc);
2952 2971
2972 /*
2973 * Display user/system times only for single
2974 * run and when there's specified tracee.
2975 */
2976 if ((run_count == 1) && target__none(&target))
2977 ru_display = true;
2978
2953 if (run_count < 0) { 2979 if (run_count < 0) {
2954 pr_err("Run count must be a positive number\n"); 2980 pr_err("Run count must be a positive number\n");
2955 parse_options_usage(stat_usage, stat_options, "r", 1); 2981 parse_options_usage(stat_usage, stat_options, "r", 1);