diff options
author | Alexey Budankov <alexey.budankov@linux.intel.com> | 2019-03-18 13:44:42 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-05-15 15:36:49 -0400 |
commit | 504c1ad11691d1a16e92285bb961728a80c06014 (patch) | |
tree | 0ff854f7458dbd9ce2490a9e4e003beda4292245 /tools | |
parent | 61a7773ca88f32ef7e185fdf9fc0d44e8ec18a66 (diff) |
perf record: Implement -z,--compression_level[=<n>] option
Implemented -z,--compression_level[=<n>] option that enables compression
of mmaped kernel data buffers content in runtime during perf record mode
collection. Default option value is 1 (fastest compression).
Compression overhead has been measured for serial and AIO streaming when
profiling matrix multiplication workload:
-------------------------------------------------------------
| SERIAL | AIO-1 |
----------------------------------------------------------------|
|-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) |
|---------------------------------------------------------------|
| 0 | 1,00 | 1,000 179,424 | 1,00 | 1,000 187,527 |
| 1 | 1,04 | 8,427 181,148 | 1,01 | 8,474 188,562 |
| 2 | 1,07 | 8,055 186,953 | 1,03 | 7,912 191,773 |
| 3 | 1,04 | 8,283 181,908 | 1,03 | 8,220 191,078 |
| 5 | 1,09 | 8,101 187,705 | 1,05 | 7,780 190,065 |
| 8 | 1,05 | 9,217 179,191 | 1,12 | 6,111 193,024 |
-----------------------------------------------------------------
OVH = (Execution time with -z N) / (Execution time with -z 0)
ratio - compression ratio
size - number of bytes that was compressed
size ~= trace size x ratio
Committer notes:
Testing it I noticed that it failed to disable build id processing when
compression is enabled, and as we'd have to uncompress everything to
look for the PERF_RECORD_{MMAP,SAMPLE,etc} to figure out which build ids
to read from DSOs, we better disable build id processing when
compression is enabled, logging with pr_debug() when doing so:
Original patch:
# perf record -z2
^C[ perf record: Woken up 1 times to write data ]
0x1746e0 [0x76]: failed to process type: 81 [Invalid argument]
[ perf record: Captured and wrote 1.568 MB perf.data, compressed (original 0.452 MB, ratio is 3.995) ]
#
After auto-disabling build id processing when compression is enabled:
$ perf record -z2 sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.292) ]
$ perf record -v -z2 sleep 1
Compression enabled, disabling build id collection at the end of the session.
<SNIP extra -v pr_debug() messages>
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.305) ]
$
Also, with parts of the patch originally after this one moved to just
before this one we get:
$ perf record -z2 sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data, compressed (original 0.001 MB, ratio is 2.371) ]
$ perf report -D | grep COMPRESS
0 0x1b8 [0x155]: PERF_RECORD_COMPRESSED: unhandled!
0 0x30d [0x80]: PERF_RECORD_COMPRESSED: unhandled!
COMPRESSED events: 2
COMPRESSED events: 0
$
I.e. when faced with PERF_RECORD_COMPRESSED that we still have no code
to process, we just show it as not being handled, skip them and
continue, while before we had:
$ perf report -D | grep COMPRESS
0x1b8 [0x169]: failed to process type: 81 [Invalid argument]
Error:
failed to process sample
0 0x1b8 [0x169]: PERF_RECORD_COMPRESSED
$
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/9ff06518-ae63-a908-e44d-5d9e56dd66d9@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 5 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 30 |
2 files changed, 35 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 58986f4cc190..27b37624c376 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -478,6 +478,11 @@ Also at some cases executing less output write syscalls with bigger data size | |||
478 | can take less time than executing more output write syscalls with smaller data | 478 | can take less time than executing more output write syscalls with smaller data |
479 | size thus lowering runtime profiling overhead. | 479 | size thus lowering runtime profiling overhead. |
480 | 480 | ||
481 | -z:: | ||
482 | --compression-level[=n]:: | ||
483 | Produce compressed trace using specified level n (default: 1 - fastest compression, | ||
484 | 22 - smallest trace) | ||
485 | |||
481 | --all-kernel:: | 486 | --all-kernel:: |
482 | Configure all used events to run in kernel space. | 487 | Configure all used events to run in kernel space. |
483 | 488 | ||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a0bd9104fae6..861395753c25 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -443,6 +443,25 @@ static int record__mmap_flush_parse(const struct option *opt, | |||
443 | return 0; | 443 | return 0; |
444 | } | 444 | } |
445 | 445 | ||
446 | #ifdef HAVE_ZSTD_SUPPORT | ||
447 | static unsigned int comp_level_default = 1; | ||
448 | |||
449 | static int record__parse_comp_level(const struct option *opt, const char *str, int unset) | ||
450 | { | ||
451 | struct record_opts *opts = opt->value; | ||
452 | |||
453 | if (unset) { | ||
454 | opts->comp_level = 0; | ||
455 | } else { | ||
456 | if (str) | ||
457 | opts->comp_level = strtol(str, NULL, 0); | ||
458 | if (!opts->comp_level) | ||
459 | opts->comp_level = comp_level_default; | ||
460 | } | ||
461 | |||
462 | return 0; | ||
463 | } | ||
464 | #endif | ||
446 | static unsigned int comp_level_max = 22; | 465 | static unsigned int comp_level_max = 22; |
447 | 466 | ||
448 | static int record__comp_enabled(struct record *rec) | 467 | static int record__comp_enabled(struct record *rec) |
@@ -2200,6 +2219,11 @@ static struct option __record_options[] = { | |||
2200 | OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", | 2219 | OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", |
2201 | "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", | 2220 | "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", |
2202 | record__parse_affinity), | 2221 | record__parse_affinity), |
2222 | #ifdef HAVE_ZSTD_SUPPORT | ||
2223 | OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, | ||
2224 | "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", | ||
2225 | record__parse_comp_level), | ||
2226 | #endif | ||
2203 | OPT_END() | 2227 | OPT_END() |
2204 | }; | 2228 | }; |
2205 | 2229 | ||
@@ -2259,6 +2283,12 @@ int cmd_record(int argc, const char **argv) | |||
2259 | "cgroup monitoring only available in system-wide mode"); | 2283 | "cgroup monitoring only available in system-wide mode"); |
2260 | 2284 | ||
2261 | } | 2285 | } |
2286 | |||
2287 | if (rec->opts.comp_level != 0) { | ||
2288 | pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); | ||
2289 | rec->no_buildid = true; | ||
2290 | } | ||
2291 | |||
2262 | if (rec->opts.record_switch_events && | 2292 | if (rec->opts.record_switch_events && |
2263 | !perf_can_record_switch_events()) { | 2293 | !perf_can_record_switch_events()) { |
2264 | ui__error("kernel does not support recording context switch events\n"); | 2294 | ui__error("kernel does not support recording context switch events\n"); |