aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWang Nan <wangnan0@huawei.com>2016-07-14 04:34:47 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-07-15 16:27:52 -0400
commit4ea648aec01982d5a57816a95c4665d6081e78f9 (patch)
tree7aabc47adaca3014da0e3e9b53bd645d2b461fcc
parentf06149c0db430d3694d601df126b0944cc0156a6 (diff)
perf record: Add --tail-synthesize option
When working with overwritable ring buffer there's a inconvenience problem: if perf dumps data after a long period after it starts, non-sample events may lost, which makes following 'perf report' unable to identify proc name and mmap layout. For example: # perf record -m 4 -e raw_syscalls:* -g --overwrite --switch-output \ dd if=/dev/zero of=/dev/null send SIGUSR2 after dd runs long enough. The resuling perf.data lost correct comm and mmap events: # perf script -i perf.data.2016061522374354 perf 24478 [004] 2581325.601789: raw_syscalls:sys_exit: NR 0 = 512 ^^^^ Should be 'dd' 27b2e8 syscall_slow_exit_work+0xfe2000e3 (/lib/modules/4.6.0-rc3+/build/vmlinux) 203cc7 do_syscall_64+0xfe200117 (/lib/modules/4.6.0-rc3+/build/vmlinux) b18d83 return_from_SYSCALL_64+0xfe200000 (/lib/modules/4.6.0-rc3+/build/vmlinux) 7f47c417edf0 [unknown] ([unknown]) ^^^^^^^^^^^^ Fail to unwind This patch provides a '--tail-synthesize' option, allows perf to collect system status when finalizing output file. In resuling output file, the non-sample events reflect system status when dumping data. After this patch: # perf record -m 4 -e raw_syscalls:* -g --overwrite --switch-output --tail-synthesize \ dd if=/dev/zero of=/dev/null # perf script -i perf.data.2016061600544998 dd 27364 [004] 2583244.994464: raw_syscalls:sys_enter: NR 1 (1, ... ^^ Correct comm 203a18 syscall_trace_enter_phase2+0xfe2001a8 ([kernel.kallsyms]) 203aa5 syscall_trace_enter+0xfe200055 ([kernel.kallsyms]) 203caa do_syscall_64+0xfe2000fa ([kernel.kallsyms]) b18d83 return_from_SYSCALL_64+0xfe200000 ([kernel.kallsyms]) d8e50 __GI___libc_write+0xffff01d9639f4010 (/tmp/oxygen_root-w00229757/lib64/libc-2.18.so) ^^^^^ Correct unwind This option doesn't aim to solve this problem completely. If a process terminates before SIGUSR2, we still lost its COMM and MMAP events. For example, we can't unwind correctly from the final perf.data we get from the previous example, because when perf collects the final output file (when we press C-c), 'dd' has been terminated so its '/proc/<pid>/mmap' becomes empty. However, this is a cheaper choice. To completely solve this problem we need to continously output non-sample events. To satisify the requirement of daemonization, we need to merge them periodically. It is possible but requires much more code and cycles. Automatically select --tail-synthesize when --overwrite is provided. Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nilay Vaish <nilayvaish@gmail.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1468485287-33422-16-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-record.txt8
-rw-r--r--tools/perf/builtin-record.c31
-rw-r--r--tools/perf/perf.h1
3 files changed, 34 insertions, 6 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 384c630436f8..69966abf65d1 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -367,6 +367,12 @@ options.
367'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj 367'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
368in config file is set to true. 368in config file is set to true.
369 369
370--tail-synthesize::
371Instead of collecting non-sample events (for example, fork, comm, mmap) at
372the beginning of record, collect them during finalizing an output file.
373The collected non-sample events reflects the status of the system when
374record is finished.
375
370--overwrite:: 376--overwrite::
371Makes all events use an overwritable ring buffer. An overwritable ring 377Makes all events use an overwritable ring buffer. An overwritable ring
372buffer works like a flight recorder: when it gets full, the kernel will 378buffer works like a flight recorder: when it gets full, the kernel will
@@ -381,6 +387,8 @@ those fitting in the ring buffer at that moment.
381'overwrite' attribute can also be set or canceled for an event using 387'overwrite' attribute can also be set or canceled for an event using
382config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'. 388config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
383 389
390Implies --tail-synthesize.
391
384SEE ALSO 392SEE ALSO
385-------- 393--------
386linkperf:perf-stat[1], linkperf:perf-list[1] 394linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 39c7486f0607..8f2c16d9275f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -604,13 +604,16 @@ record__finish_output(struct record *rec)
604 return; 604 return;
605} 605}
606 606
607static int record__synthesize_workload(struct record *rec) 607static int record__synthesize_workload(struct record *rec, bool tail)
608{ 608{
609 struct { 609 struct {
610 struct thread_map map; 610 struct thread_map map;
611 struct thread_map_data map_data; 611 struct thread_map_data map_data;
612 } thread_map; 612 } thread_map;
613 613
614 if (rec->opts.tail_synthesize != tail)
615 return 0;
616
614 thread_map.map.nr = 1; 617 thread_map.map.nr = 1;
615 thread_map.map.map[0].pid = rec->evlist->workload.pid; 618 thread_map.map.map[0].pid = rec->evlist->workload.pid;
616 thread_map.map.map[0].comm = NULL; 619 thread_map.map.map[0].comm = NULL;
@@ -621,7 +624,7 @@ static int record__synthesize_workload(struct record *rec)
621 rec->opts.proc_map_timeout); 624 rec->opts.proc_map_timeout);
622} 625}
623 626
624static int record__synthesize(struct record *rec); 627static int record__synthesize(struct record *rec, bool tail);
625 628
626static int 629static int
627record__switch_output(struct record *rec, bool at_exit) 630record__switch_output(struct record *rec, bool at_exit)
@@ -632,6 +635,10 @@ record__switch_output(struct record *rec, bool at_exit)
632 /* Same Size: "2015122520103046"*/ 635 /* Same Size: "2015122520103046"*/
633 char timestamp[] = "InvalidTimestamp"; 636 char timestamp[] = "InvalidTimestamp";
634 637
638 record__synthesize(rec, true);
639 if (target__none(&rec->opts.target))
640 record__synthesize_workload(rec, true);
641
635 rec->samples = 0; 642 rec->samples = 0;
636 record__finish_output(rec); 643 record__finish_output(rec);
637 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 644 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
@@ -654,7 +661,7 @@ record__switch_output(struct record *rec, bool at_exit)
654 661
655 /* Output tracking events */ 662 /* Output tracking events */
656 if (!at_exit) { 663 if (!at_exit) {
657 record__synthesize(rec); 664 record__synthesize(rec, false);
658 665
659 /* 666 /*
660 * In 'perf record --switch-output' without -a, 667 * In 'perf record --switch-output' without -a,
@@ -666,7 +673,7 @@ record__switch_output(struct record *rec, bool at_exit)
666 * perf_event__synthesize_thread_map() for those events. 673 * perf_event__synthesize_thread_map() for those events.
667 */ 674 */
668 if (target__none(&rec->opts.target)) 675 if (target__none(&rec->opts.target))
669 record__synthesize_workload(rec); 676 record__synthesize_workload(rec, false);
670 } 677 }
671 return fd; 678 return fd;
672} 679}
@@ -720,7 +727,7 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
720 return NULL; 727 return NULL;
721} 728}
722 729
723static int record__synthesize(struct record *rec) 730static int record__synthesize(struct record *rec, bool tail)
724{ 731{
725 struct perf_session *session = rec->session; 732 struct perf_session *session = rec->session;
726 struct machine *machine = &session->machines.host; 733 struct machine *machine = &session->machines.host;
@@ -730,6 +737,9 @@ static int record__synthesize(struct record *rec)
730 int fd = perf_data_file__fd(file); 737 int fd = perf_data_file__fd(file);
731 int err = 0; 738 int err = 0;
732 739
740 if (rec->opts.tail_synthesize != tail)
741 return 0;
742
733 if (file->is_pipe) { 743 if (file->is_pipe) {
734 err = perf_event__synthesize_attrs(tool, session, 744 err = perf_event__synthesize_attrs(tool, session,
735 process_synthesized_event); 745 process_synthesized_event);
@@ -893,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
893 903
894 machine = &session->machines.host; 904 machine = &session->machines.host;
895 905
896 err = record__synthesize(rec); 906 err = record__synthesize(rec, false);
897 if (err < 0) 907 if (err < 0)
898 goto out_child; 908 goto out_child;
899 909
@@ -1057,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
1057 if (!quiet) 1067 if (!quiet)
1058 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1068 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1059 1069
1070 if (target__none(&rec->opts.target))
1071 record__synthesize_workload(rec, true);
1072
1060out_child: 1073out_child:
1061 if (forks) { 1074 if (forks) {
1062 int exit_status; 1075 int exit_status;
@@ -1075,6 +1088,7 @@ out_child:
1075 } else 1088 } else
1076 status = err; 1089 status = err;
1077 1090
1091 record__synthesize(rec, true);
1078 /* this will be recalculated during process_buildids() */ 1092 /* this will be recalculated during process_buildids() */
1079 rec->samples = 0; 1093 rec->samples = 0;
1080 1094
@@ -1399,6 +1413,8 @@ struct option __record_options[] = {
1399 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1413 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1400 &record.opts.no_inherit_set, 1414 &record.opts.no_inherit_set,
1401 "child tasks do not inherit counters"), 1415 "child tasks do not inherit counters"),
1416 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1417 "synthesize non-sample events at the end of output"),
1402 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1418 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1403 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1419 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1404 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1420 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
@@ -1610,6 +1626,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1610 } 1626 }
1611 } 1627 }
1612 1628
1629 if (record.opts.overwrite)
1630 record.opts.tail_synthesize = true;
1631
1613 if (rec->evlist->nr_entries == 0 && 1632 if (rec->evlist->nr_entries == 0 &&
1614 perf_evlist__add_default(rec->evlist) < 0) { 1633 perf_evlist__add_default(rec->evlist) < 0) {
1615 pr_err("Not enough memory for event selector list\n"); 1634 pr_err("Not enough memory for event selector list\n");
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 608b42bdb1b6..a7e0f1497244 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,7 @@ struct record_opts {
59 bool record_switch_events; 59 bool record_switch_events;
60 bool all_kernel; 60 bool all_kernel;
61 bool all_user; 61 bool all_user;
62 bool tail_synthesize;
62 bool overwrite; 63 bool overwrite;
63 unsigned int freq; 64 unsigned int freq;
64 unsigned int mmap_pages; 65 unsigned int mmap_pages;