aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2016-03-28 13:45:38 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-03-30 10:14:09 -0400
commitd1706b39f0af6901ab2a5e2ebb210b53c1a5bdc7 (patch)
tree4bee0f637581706387688651a57fb867c7f6e559 /tools
parentf7380c12ec6cfd69f274ba6181cd01c764f877bb (diff)
perf tools: Add support for skipping itrace instructions
When using 'perf script' to look at PT traces it is often useful to ignore the initialization code at the beginning. On larger traces which may have many millions of instructions in initialization code doing that in a pipeline can be very slow, with perf script spending a lot of CPU time calling printf and writing data. This patch adds an extension to the --itrace argument that skips 'n' events (instructions, branches or transactions) at the beginning. This is much more efficient. v2: Add support for BTS (Adrian Hunter) Document in itrace.txt Fix branch check Check transactions and instructions too Committer note: To test intel_pt one needs to make sure VT-x isn't active, i.e. stopping KVM guests on the test machine, as described by Andi Kleen at http://lkml.kernel.org/r/20160301234953.GD23621@tassilo.jf.intel.com Signed-off-by: Andi Kleen <ak@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1459187142-20035-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/intel-pt.txt7
-rw-r--r--tools/perf/Documentation/itrace.txt8
-rw-r--r--tools/perf/util/auxtrace.c7
-rw-r--r--tools/perf/util/auxtrace.h2
-rw-r--r--tools/perf/util/intel-bts.c5
-rw-r--r--tools/perf/util/intel-pt.c22
6 files changed, 49 insertions, 2 deletions
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index be764f9ec769..c6c8318e38a2 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -672,6 +672,7 @@ The letters are:
672 d create a debug log 672 d create a debug log
673 g synthesize a call chain (use with i or x) 673 g synthesize a call chain (use with i or x)
674 l synthesize last branch entries (use with i or x) 674 l synthesize last branch entries (use with i or x)
675 s skip initial number of events
675 676
676"Instructions" events look like they were recorded by "perf record -e 677"Instructions" events look like they were recorded by "perf record -e
677instructions". 678instructions".
@@ -730,6 +731,12 @@ from one sample to the next.
730 731
731To disable trace decoding entirely, use the option --no-itrace. 732To disable trace decoding entirely, use the option --no-itrace.
732 733
734It is also possible to skip events generated (instructions, branches, transactions)
735at the beginning. This is useful to ignore initialization code.
736
737 --itrace=i0nss1000000
738
739skips the first million instructions.
733 740
734dump option 741dump option
735----------- 742-----------
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 65453f4c7006..e2a4c5e0dbe5 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -7,6 +7,7 @@
7 d create a debug log 7 d create a debug log
8 g synthesize a call chain (use with i or x) 8 g synthesize a call chain (use with i or x)
9 l synthesize last branch entries (use with i or x) 9 l synthesize last branch entries (use with i or x)
10 s skip initial number of events
10 11
11 The default is all events i.e. the same as --itrace=ibxe 12 The default is all events i.e. the same as --itrace=ibxe
12 13
@@ -24,3 +25,10 @@
24 25
25 Also the number of last branch entries (default 64, max. 1024) for 26 Also the number of last branch entries (default 64, max. 1024) for
26 instructions or transactions events can be specified. 27 instructions or transactions events can be specified.
28
29 It is also possible to skip events generated (instructions, branches, transactions)
30 at the beginning. This is useful to ignore initialization code.
31
32 --itrace=i0nss1000000
33
34 skips the first million instructions.
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index ec164fe70718..c9169011e55e 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
940 synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; 940 synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
941 synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; 941 synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
942 synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; 942 synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
943 synth_opts->initial_skip = 0;
943} 944}
944 945
945/* 946/*
@@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
1064 synth_opts->last_branch_sz = val; 1065 synth_opts->last_branch_sz = val;
1065 } 1066 }
1066 break; 1067 break;
1068 case 's':
1069 synth_opts->initial_skip = strtoul(p, &endptr, 10);
1070 if (p == endptr)
1071 goto out_err;
1072 p = endptr;
1073 break;
1067 case ' ': 1074 case ' ':
1068 case ',': 1075 case ',':
1069 break; 1076 break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 57ff31ecb8e4..767989e0e312 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -68,6 +68,7 @@ enum itrace_period_type {
68 * @last_branch_sz: branch context size 68 * @last_branch_sz: branch context size
69 * @period: 'instructions' events period 69 * @period: 'instructions' events period
70 * @period_type: 'instructions' events period type 70 * @period_type: 'instructions' events period type
71 * @initial_skip: skip N events at the beginning.
71 */ 72 */
72struct itrace_synth_opts { 73struct itrace_synth_opts {
73 bool set; 74 bool set;
@@ -86,6 +87,7 @@ struct itrace_synth_opts {
86 unsigned int last_branch_sz; 87 unsigned int last_branch_sz;
87 unsigned long long period; 88 unsigned long long period;
88 enum itrace_period_type period_type; 89 enum itrace_period_type period_type;
90 unsigned long initial_skip;
89}; 91};
90 92
91/** 93/**
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index abf1366e2a24..9df996085563 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -66,6 +66,7 @@ struct intel_bts {
66 u64 branches_id; 66 u64 branches_id;
67 size_t branches_event_size; 67 size_t branches_event_size;
68 bool synth_needs_swap; 68 bool synth_needs_swap;
69 unsigned long num_events;
69}; 70};
70 71
71struct intel_bts_queue { 72struct intel_bts_queue {
@@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
275 union perf_event event; 276 union perf_event event;
276 struct perf_sample sample = { .ip = 0, }; 277 struct perf_sample sample = { .ip = 0, };
277 278
279 if (bts->synth_opts.initial_skip &&
280 bts->num_events++ <= bts->synth_opts.initial_skip)
281 return 0;
282
278 event.sample.header.type = PERF_RECORD_SAMPLE; 283 event.sample.header.type = PERF_RECORD_SAMPLE;
279 event.sample.header.misc = PERF_RECORD_MISC_USER; 284 event.sample.header.misc = PERF_RECORD_MISC_USER;
280 event.sample.header.size = sizeof(struct perf_event_header); 285 event.sample.header.size = sizeof(struct perf_event_header);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 407f11b97c8d..ddec87f6e616 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -100,6 +100,8 @@ struct intel_pt {
100 u64 cyc_bit; 100 u64 cyc_bit;
101 u64 noretcomp_bit; 101 u64 noretcomp_bit;
102 unsigned max_non_turbo_ratio; 102 unsigned max_non_turbo_ratio;
103
104 unsigned long num_events;
103}; 105};
104 106
105enum switch_state { 107enum switch_state {
@@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
972 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 974 if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
973 return 0; 975 return 0;
974 976
977 if (pt->synth_opts.initial_skip &&
978 pt->num_events++ < pt->synth_opts.initial_skip)
979 return 0;
980
975 event->sample.header.type = PERF_RECORD_SAMPLE; 981 event->sample.header.type = PERF_RECORD_SAMPLE;
976 event->sample.header.misc = PERF_RECORD_MISC_USER; 982 event->sample.header.misc = PERF_RECORD_MISC_USER;
977 event->sample.header.size = sizeof(struct perf_event_header); 983 event->sample.header.size = sizeof(struct perf_event_header);
@@ -1029,6 +1035,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1029 union perf_event *event = ptq->event_buf; 1035 union perf_event *event = ptq->event_buf;
1030 struct perf_sample sample = { .ip = 0, }; 1036 struct perf_sample sample = { .ip = 0, };
1031 1037
1038 if (pt->synth_opts.initial_skip &&
1039 pt->num_events++ < pt->synth_opts.initial_skip)
1040 return 0;
1041
1032 event->sample.header.type = PERF_RECORD_SAMPLE; 1042 event->sample.header.type = PERF_RECORD_SAMPLE;
1033 event->sample.header.misc = PERF_RECORD_MISC_USER; 1043 event->sample.header.misc = PERF_RECORD_MISC_USER;
1034 event->sample.header.size = sizeof(struct perf_event_header); 1044 event->sample.header.size = sizeof(struct perf_event_header);
@@ -1087,6 +1097,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1087 union perf_event *event = ptq->event_buf; 1097 union perf_event *event = ptq->event_buf;
1088 struct perf_sample sample = { .ip = 0, }; 1098 struct perf_sample sample = { .ip = 0, };
1089 1099
1100 if (pt->synth_opts.initial_skip &&
1101 pt->num_events++ < pt->synth_opts.initial_skip)
1102 return 0;
1103
1090 event->sample.header.type = PERF_RECORD_SAMPLE; 1104 event->sample.header.type = PERF_RECORD_SAMPLE;
1091 event->sample.header.misc = PERF_RECORD_MISC_USER; 1105 event->sample.header.misc = PERF_RECORD_MISC_USER;
1092 event->sample.header.size = sizeof(struct perf_event_header); 1106 event->sample.header.size = sizeof(struct perf_event_header);
@@ -1199,14 +1213,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1199 ptq->have_sample = false; 1213 ptq->have_sample = false;
1200 1214
1201 if (pt->sample_instructions && 1215 if (pt->sample_instructions &&
1202 (state->type & INTEL_PT_INSTRUCTION)) { 1216 (state->type & INTEL_PT_INSTRUCTION) &&
1217 (!pt->synth_opts.initial_skip ||
1218 pt->num_events++ >= pt->synth_opts.initial_skip)) {
1203 err = intel_pt_synth_instruction_sample(ptq); 1219 err = intel_pt_synth_instruction_sample(ptq);
1204 if (err) 1220 if (err)
1205 return err; 1221 return err;
1206 } 1222 }
1207 1223
1208 if (pt->sample_transactions && 1224 if (pt->sample_transactions &&
1209 (state->type & INTEL_PT_TRANSACTION)) { 1225 (state->type & INTEL_PT_TRANSACTION) &&
1226 (!pt->synth_opts.initial_skip ||
1227 pt->num_events++ >= pt->synth_opts.initial_skip)) {
1210 err = intel_pt_synth_transaction_sample(ptq); 1228 err = intel_pt_synth_transaction_sample(ptq);
1211 if (err) 1229 if (err)
1212 return err; 1230 return err;