summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorAlexey Budankov <alexey.budankov@linux.intel.com>2019-03-18 13:44:42 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-05-15 15:36:49 -0400
commit504c1ad11691d1a16e92285bb961728a80c06014 (patch)
tree0ff854f7458dbd9ce2490a9e4e003beda4292245 /tools
parent61a7773ca88f32ef7e185fdf9fc0d44e8ec18a66 (diff)
perf record: Implement -z,--compression_level[=<n>] option
Implemented -z,--compression_level[=<n>] option that enables compression of mmaped kernel data buffers content in runtime during perf record mode collection. Default option value is 1 (fastest compression). Compression overhead has been measured for serial and AIO streaming when profiling matrix multiplication workload: ------------------------------------------------------------- | SERIAL | AIO-1 | ----------------------------------------------------------------| |-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) | |---------------------------------------------------------------| | 0 | 1,00 | 1,000 179,424 | 1,00 | 1,000 187,527 | | 1 | 1,04 | 8,427 181,148 | 1,01 | 8,474 188,562 | | 2 | 1,07 | 8,055 186,953 | 1,03 | 7,912 191,773 | | 3 | 1,04 | 8,283 181,908 | 1,03 | 8,220 191,078 | | 5 | 1,09 | 8,101 187,705 | 1,05 | 7,780 190,065 | | 8 | 1,05 | 9,217 179,191 | 1,12 | 6,111 193,024 | ----------------------------------------------------------------- OVH = (Execution time with -z N) / (Execution time with -z 0) ratio - compression ratio size - number of bytes that was compressed size ~= trace size x ratio Committer notes: Testing it I noticed that it failed to disable build id processing when compression is enabled, and as we'd have to uncompress everything to look for the PERF_RECORD_{MMAP,SAMPLE,etc} to figure out which build ids to read from DSOs, we better disable build id processing when compression is enabled, logging with pr_debug() when doing so: Original patch: # perf record -z2 ^C[ perf record: Woken up 1 times to write data ] 0x1746e0 [0x76]: failed to process type: 81 [Invalid argument] [ perf record: Captured and wrote 1.568 MB perf.data, compressed (original 0.452 MB, ratio is 3.995) ] # After auto-disabling build id processing when compression is enabled: $ perf record -z2 sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.292) ] $ perf record -v -z2 sleep 1 Compression enabled, disabling build id collection at the end of the session. <SNIP extra -v pr_debug() messages> [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.305) ] $ Also, with parts of the patch originally after this one moved to just before this one we get: $ perf record -z2 sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.371) ] $ perf report -D | grep COMPRESS 0 0x1b8 [0x155]: PERF_RECORD_COMPRESSED: unhandled! 0 0x30d [0x80]: PERF_RECORD_COMPRESSED: unhandled! COMPRESSED events: 2 COMPRESSED events: 0 $ I.e. when faced with PERF_RECORD_COMPRESSED that we still have no code to process, we just show it as not being handled, skip them and continue, while before we had: $ perf report -D | grep COMPRESS 0x1b8 [0x169]: failed to process type: 81 [Invalid argument] Error: failed to process sample 0 0x1b8 [0x169]: PERF_RECORD_COMPRESSED $ Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/9ff06518-ae63-a908-e44d-5d9e56dd66d9@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-record.txt5
-rw-r--r--tools/perf/builtin-record.c30
2 files changed, 35 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 58986f4cc190..27b37624c376 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -478,6 +478,11 @@ Also at some cases executing less output write syscalls with bigger data size
478can take less time than executing more output write syscalls with smaller data 478can take less time than executing more output write syscalls with smaller data
479size thus lowering runtime profiling overhead. 479size thus lowering runtime profiling overhead.
480 480
481-z::
482--compression-level[=n]::
483Produce compressed trace using specified level n (default: 1 - fastest compression,
48422 - smallest trace)
485
481--all-kernel:: 486--all-kernel::
482Configure all used events to run in kernel space. 487Configure all used events to run in kernel space.
483 488
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a0bd9104fae6..861395753c25 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -443,6 +443,25 @@ static int record__mmap_flush_parse(const struct option *opt,
443 return 0; 443 return 0;
444} 444}
445 445
446#ifdef HAVE_ZSTD_SUPPORT
447static unsigned int comp_level_default = 1;
448
449static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
450{
451 struct record_opts *opts = opt->value;
452
453 if (unset) {
454 opts->comp_level = 0;
455 } else {
456 if (str)
457 opts->comp_level = strtol(str, NULL, 0);
458 if (!opts->comp_level)
459 opts->comp_level = comp_level_default;
460 }
461
462 return 0;
463}
464#endif
446static unsigned int comp_level_max = 22; 465static unsigned int comp_level_max = 22;
447 466
448static int record__comp_enabled(struct record *rec) 467static int record__comp_enabled(struct record *rec)
@@ -2200,6 +2219,11 @@ static struct option __record_options[] = {
2200 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2219 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2201 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2220 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2202 record__parse_affinity), 2221 record__parse_affinity),
2222#ifdef HAVE_ZSTD_SUPPORT
2223 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2224 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2225 record__parse_comp_level),
2226#endif
2203 OPT_END() 2227 OPT_END()
2204}; 2228};
2205 2229
@@ -2259,6 +2283,12 @@ int cmd_record(int argc, const char **argv)
2259 "cgroup monitoring only available in system-wide mode"); 2283 "cgroup monitoring only available in system-wide mode");
2260 2284
2261 } 2285 }
2286
2287 if (rec->opts.comp_level != 0) {
2288 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2289 rec->no_buildid = true;
2290 }
2291
2262 if (rec->opts.record_switch_events && 2292 if (rec->opts.record_switch_events &&
2263 !perf_can_record_switch_events()) { 2293 !perf_can_record_switch_events()) {
2264 ui__error("kernel does not support recording context switch events\n"); 2294 ui__error("kernel does not support recording context switch events\n");