aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2019-05-20 07:37:11 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-06-05 08:47:54 -0400
commit7b4b4f83881e11b1fe5d8743953f81addb0871de (patch)
tree1c45b19c57685e857b90bc7d9b1a9ae65f91a412 /tools/perf
parent948e9dc8bb266649a618ac974010292bf36fb213 (diff)
perf intel-pt: Accumulate cycle count from CYC packets
In preparation for providing instructions-per-cycle (IPC) information, accumulate cycle count from CYC packets. Although CYC packets are optional (requires config term 'cyc' to enable cycle-accurate mode when recording), the simplest way to count cycles is with CYC packets. The first complication is that cycles must be counted only when also counting instructions. That means when control flow packet generation is enabled i.e. between TIP.PGE and TIP.PGD packets. Also, sampling the cycle count follows the same rules as sampling the timestamp, that is, not before the instruction to which the decoder is walking is reached. In addition, the cycle count is not accurate for any but the first branch of a TNT packet. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20190520113728.14389-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c14
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h1
2 files changed, 14 insertions, 1 deletions
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 1ab4070b5633..ef3a1c1cd250 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -160,6 +160,8 @@ struct intel_pt_decoder {
160 uint64_t period_mask; 160 uint64_t period_mask;
161 uint64_t period_ticks; 161 uint64_t period_ticks;
162 uint64_t last_masked_timestamp; 162 uint64_t last_masked_timestamp;
163 uint64_t tot_cyc_cnt;
164 uint64_t sample_tot_cyc_cnt;
163 bool continuous_period; 165 bool continuous_period;
164 bool overflow; 166 bool overflow;
165 bool set_fup_tx_flags; 167 bool set_fup_tx_flags;
@@ -167,6 +169,7 @@ struct intel_pt_decoder {
167 bool set_fup_mwait; 169 bool set_fup_mwait;
168 bool set_fup_pwre; 170 bool set_fup_pwre;
169 bool set_fup_exstop; 171 bool set_fup_exstop;
172 bool sample_cyc;
170 unsigned int fup_tx_flags; 173 unsigned int fup_tx_flags;
171 unsigned int tx_flags; 174 unsigned int tx_flags;
172 uint64_t fup_ptw_payload; 175 uint64_t fup_ptw_payload;
@@ -1323,6 +1326,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1323 decoder->ip += intel_pt_insn.length; 1326 decoder->ip += intel_pt_insn.length;
1324 return 0; 1327 return 0;
1325 } 1328 }
1329 decoder->sample_cyc = false;
1326 decoder->ip += intel_pt_insn.length; 1330 decoder->ip += intel_pt_insn.length;
1327 if (!decoder->tnt.count) { 1331 if (!decoder->tnt.count) {
1328 intel_pt_update_sample_time(decoder); 1332 intel_pt_update_sample_time(decoder);
@@ -1515,6 +1519,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1515 decoder->have_cyc = true; 1519 decoder->have_cyc = true;
1516 1520
1517 decoder->cycle_cnt += decoder->packet.payload; 1521 decoder->cycle_cnt += decoder->packet.payload;
1522 if (decoder->pge)
1523 decoder->tot_cyc_cnt += decoder->packet.payload;
1524 decoder->sample_cyc = true;
1518 1525
1519 if (!decoder->cyc_ref_timestamp) 1526 if (!decoder->cyc_ref_timestamp)
1520 return; 1527 return;
@@ -2419,6 +2426,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2419 decoder->state.err = intel_pt_ext_err(err); 2426 decoder->state.err = intel_pt_ext_err(err);
2420 decoder->state.from_ip = decoder->ip; 2427 decoder->state.from_ip = decoder->ip;
2421 intel_pt_update_sample_time(decoder); 2428 intel_pt_update_sample_time(decoder);
2429 decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2422 } else { 2430 } else {
2423 decoder->state.err = 0; 2431 decoder->state.err = 0;
2424 if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { 2432 if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
@@ -2426,14 +2434,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2426 decoder->state.type |= INTEL_PT_CBR_CHG; 2434 decoder->state.type |= INTEL_PT_CBR_CHG;
2427 decoder->state.cbr_payload = decoder->cbr_payload; 2435 decoder->state.cbr_payload = decoder->cbr_payload;
2428 } 2436 }
2429 if (intel_pt_sample_time(decoder->pkt_state)) 2437 if (intel_pt_sample_time(decoder->pkt_state)) {
2430 intel_pt_update_sample_time(decoder); 2438 intel_pt_update_sample_time(decoder);
2439 if (decoder->sample_cyc)
2440 decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2441 }
2431 } 2442 }
2432 2443
2433 decoder->state.timestamp = decoder->sample_timestamp; 2444 decoder->state.timestamp = decoder->sample_timestamp;
2434 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); 2445 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
2435 decoder->state.cr3 = decoder->cr3; 2446 decoder->state.cr3 = decoder->cr3;
2436 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; 2447 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
2448 decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
2437 2449
2438 return &decoder->state; 2450 return &decoder->state;
2439} 2451}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index ed088d4726ba..6a61773dc44b 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -77,6 +77,7 @@ struct intel_pt_state {
77 uint64_t to_ip; 77 uint64_t to_ip;
78 uint64_t cr3; 78 uint64_t cr3;
79 uint64_t tot_insn_cnt; 79 uint64_t tot_insn_cnt;
80 uint64_t tot_cyc_cnt;
80 uint64_t timestamp; 81 uint64_t timestamp;
81 uint64_t est_timestamp; 82 uint64_t est_timestamp;
82 uint64_t trace_nr; 83 uint64_t trace_nr;