aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-07-17 12:33:57 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-08-24 16:49:04 -0400
commitcc33618619cefc6d730cca3bb8e15311016a4da7 (patch)
tree251914f11eb7f672c6f19ff0d8f6dcb851b329b1 /tools/perf
parentb45fc0bfaf4a0b60ce2deda222f8ef2a23b89a5f (diff)
perf tools: Add Intel PT support for decoding CYC packets
CYC packets provide even finer grain timestamp information than MTC and TSC packets. A CYC packet contains the number of CPU cycles since the last CYC packet. This patch just adds decoder support. The CPU frequency can be related to TSC using the Maximum Non-Turbo Ratio in combination with the CBR (core-to-bus ratio) packet. However more accuracy is achieved by simply interpolating the number of cycles between other timing packets like MTC or TSC. This patch takes the latter approach. Support for a default value and validation of values is provided by a later patch. Also documentation is updated in a separate patch. For details refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1437150840-31811-23-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c311
1 files changed, 306 insertions, 5 deletions
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f7119a11a4b6..0845c5e6ad1d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -88,6 +88,7 @@ struct intel_pt_decoder {
88 bool mtc_insn; 88 bool mtc_insn;
89 bool pge; 89 bool pge;
90 bool have_tma; 90 bool have_tma;
91 bool have_cyc;
91 uint64_t pos; 92 uint64_t pos;
92 uint64_t last_ip; 93 uint64_t last_ip;
93 uint64_t ip; 94 uint64_t ip;
@@ -98,6 +99,8 @@ struct intel_pt_decoder {
98 uint64_t ret_addr; 99 uint64_t ret_addr;
99 uint64_t ctc_timestamp; 100 uint64_t ctc_timestamp;
100 uint64_t ctc_delta; 101 uint64_t ctc_delta;
102 uint64_t cycle_cnt;
103 uint64_t cyc_ref_timestamp;
101 uint32_t last_mtc; 104 uint32_t last_mtc;
102 uint32_t tsc_ctc_ratio_n; 105 uint32_t tsc_ctc_ratio_n;
103 uint32_t tsc_ctc_ratio_d; 106 uint32_t tsc_ctc_ratio_d;
@@ -111,8 +114,13 @@ struct intel_pt_decoder {
111 struct intel_pt_pkt tnt; 114 struct intel_pt_pkt tnt;
112 int pkt_step; 115 int pkt_step;
113 int pkt_len; 116 int pkt_len;
117 int last_packet_type;
114 unsigned int cbr; 118 unsigned int cbr;
115 unsigned int max_non_turbo_ratio; 119 unsigned int max_non_turbo_ratio;
120 double max_non_turbo_ratio_fp;
121 double cbr_cyc_to_tsc;
122 double calc_cyc_to_tsc;
123 bool have_calc_cyc_to_tsc;
116 int exec_mode; 124 int exec_mode;
117 unsigned int insn_bytes; 125 unsigned int insn_bytes;
118 uint64_t sign_bit; 126 uint64_t sign_bit;
@@ -189,7 +197,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
189 decoder->period = params->period; 197 decoder->period = params->period;
190 decoder->period_type = params->period_type; 198 decoder->period_type = params->period_type;
191 199
192 decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; 200 decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
201 decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
193 202
194 intel_pt_setup_period(decoder); 203 intel_pt_setup_period(decoder);
195 204
@@ -514,10 +523,247 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
514 return ret; 523 return ret;
515} 524}
516 525
526struct intel_pt_pkt_info {
527 struct intel_pt_decoder *decoder;
528 struct intel_pt_pkt packet;
529 uint64_t pos;
530 int pkt_len;
531 int last_packet_type;
532 void *data;
533};
534
535typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
536
537/* Lookahead packets in current buffer */
538static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
539 intel_pt_pkt_cb_t cb, void *data)
540{
541 struct intel_pt_pkt_info pkt_info;
542 const unsigned char *buf = decoder->buf;
543 size_t len = decoder->len;
544 int ret;
545
546 pkt_info.decoder = decoder;
547 pkt_info.pos = decoder->pos;
548 pkt_info.pkt_len = decoder->pkt_step;
549 pkt_info.last_packet_type = decoder->last_packet_type;
550 pkt_info.data = data;
551
552 while (1) {
553 do {
554 pkt_info.pos += pkt_info.pkt_len;
555 buf += pkt_info.pkt_len;
556 len -= pkt_info.pkt_len;
557
558 if (!len)
559 return INTEL_PT_NEED_MORE_BYTES;
560
561 ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
562 if (!ret)
563 return INTEL_PT_NEED_MORE_BYTES;
564 if (ret < 0)
565 return ret;
566
567 pkt_info.pkt_len = ret;
568 } while (pkt_info.packet.type == INTEL_PT_PAD);
569
570 ret = cb(&pkt_info);
571 if (ret)
572 return 0;
573
574 pkt_info.last_packet_type = pkt_info.packet.type;
575 }
576}
577
578struct intel_pt_calc_cyc_to_tsc_info {
579 uint64_t cycle_cnt;
580 unsigned int cbr;
581 uint32_t last_mtc;
582 uint64_t ctc_timestamp;
583 uint64_t ctc_delta;
584 uint64_t tsc_timestamp;
585 uint64_t timestamp;
586 bool have_tma;
587 bool from_mtc;
588 double cbr_cyc_to_tsc;
589};
590
591static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
592{
593 struct intel_pt_decoder *decoder = pkt_info->decoder;
594 struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
595 uint64_t timestamp;
596 double cyc_to_tsc;
597 unsigned int cbr;
598 uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
599
600 switch (pkt_info->packet.type) {
601 case INTEL_PT_TNT:
602 case INTEL_PT_TIP_PGE:
603 case INTEL_PT_TIP:
604 case INTEL_PT_FUP:
605 case INTEL_PT_PSB:
606 case INTEL_PT_PIP:
607 case INTEL_PT_MODE_EXEC:
608 case INTEL_PT_MODE_TSX:
609 case INTEL_PT_PSBEND:
610 case INTEL_PT_PAD:
611 case INTEL_PT_VMCS:
612 case INTEL_PT_MNT:
613 return 0;
614
615 case INTEL_PT_MTC:
616 if (!data->have_tma)
617 return 0;
618
619 mtc = pkt_info->packet.payload;
620 if (mtc > data->last_mtc)
621 mtc_delta = mtc - data->last_mtc;
622 else
623 mtc_delta = mtc + 256 - data->last_mtc;
624 data->ctc_delta += mtc_delta << decoder->mtc_shift;
625 data->last_mtc = mtc;
626
627 if (decoder->tsc_ctc_mult) {
628 timestamp = data->ctc_timestamp +
629 data->ctc_delta * decoder->tsc_ctc_mult;
630 } else {
631 timestamp = data->ctc_timestamp +
632 multdiv(data->ctc_delta,
633 decoder->tsc_ctc_ratio_n,
634 decoder->tsc_ctc_ratio_d);
635 }
636
637 if (timestamp < data->timestamp)
638 return 1;
639
640 if (pkt_info->last_packet_type != INTEL_PT_CYC) {
641 data->timestamp = timestamp;
642 return 0;
643 }
644
645 break;
646
647 case INTEL_PT_TSC:
648 timestamp = pkt_info->packet.payload |
649 (data->timestamp & (0xffULL << 56));
650 if (data->from_mtc && timestamp < data->timestamp &&
651 data->timestamp - timestamp < decoder->tsc_slip)
652 return 1;
653 while (timestamp < data->timestamp)
654 timestamp += (1ULL << 56);
655 if (pkt_info->last_packet_type != INTEL_PT_CYC) {
656 if (data->from_mtc)
657 return 1;
658 data->tsc_timestamp = timestamp;
659 data->timestamp = timestamp;
660 return 0;
661 }
662 break;
663
664 case INTEL_PT_TMA:
665 if (data->from_mtc)
666 return 1;
667
668 if (!decoder->tsc_ctc_ratio_d)
669 return 0;
670
671 ctc = pkt_info->packet.payload;
672 fc = pkt_info->packet.count;
673 ctc_rem = ctc & decoder->ctc_rem_mask;
674
675 data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
676
677 data->ctc_timestamp = data->tsc_timestamp - fc;
678 if (decoder->tsc_ctc_mult) {
679 data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
680 } else {
681 data->ctc_timestamp -=
682 multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
683 decoder->tsc_ctc_ratio_d);
684 }
685
686 data->ctc_delta = 0;
687 data->have_tma = true;
688
689 return 0;
690
691 case INTEL_PT_CYC:
692 data->cycle_cnt += pkt_info->packet.payload;
693 return 0;
694
695 case INTEL_PT_CBR:
696 cbr = pkt_info->packet.payload;
697 if (data->cbr && data->cbr != cbr)
698 return 1;
699 data->cbr = cbr;
700 data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
701 return 0;
702
703 case INTEL_PT_TIP_PGD:
704 case INTEL_PT_TRACESTOP:
705 case INTEL_PT_OVF:
706 case INTEL_PT_BAD: /* Does not happen */
707 default:
708 return 1;
709 }
710
711 if (!data->cbr && decoder->cbr) {
712 data->cbr = decoder->cbr;
713 data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
714 }
715
716 if (!data->cycle_cnt)
717 return 1;
718
719 cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
720
721 if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
722 cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
723 intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
724 cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
725 return 1;
726 }
727
728 decoder->calc_cyc_to_tsc = cyc_to_tsc;
729 decoder->have_calc_cyc_to_tsc = true;
730
731 if (data->cbr) {
732 intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
733 cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
734 } else {
735 intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
736 cyc_to_tsc, pkt_info->pos);
737 }
738
739 return 1;
740}
741
742static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
743 bool from_mtc)
744{
745 struct intel_pt_calc_cyc_to_tsc_info data = {
746 .cycle_cnt = 0,
747 .cbr = 0,
748 .last_mtc = decoder->last_mtc,
749 .ctc_timestamp = decoder->ctc_timestamp,
750 .ctc_delta = decoder->ctc_delta,
751 .tsc_timestamp = decoder->tsc_timestamp,
752 .timestamp = decoder->timestamp,
753 .have_tma = decoder->have_tma,
754 .from_mtc = from_mtc,
755 .cbr_cyc_to_tsc = 0,
756 };
757
758 intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
759}
760
517static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) 761static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
518{ 762{
519 int ret; 763 int ret;
520 764
765 decoder->last_packet_type = decoder->packet.type;
766
521 do { 767 do {
522 decoder->pos += decoder->pkt_step; 768 decoder->pos += decoder->pkt_step;
523 decoder->buf += decoder->pkt_step; 769 decoder->buf += decoder->pkt_step;
@@ -954,6 +1200,13 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
954 decoder->timestamp_insn_cnt = 0; 1200 decoder->timestamp_insn_cnt = 0;
955 } 1201 }
956 1202
1203 if (decoder->last_packet_type == INTEL_PT_CYC) {
1204 decoder->cyc_ref_timestamp = decoder->timestamp;
1205 decoder->cycle_cnt = 0;
1206 decoder->have_calc_cyc_to_tsc = false;
1207 intel_pt_calc_cyc_to_tsc(decoder, false);
1208 }
1209
957 intel_pt_log_to("Setting timestamp", decoder->timestamp); 1210 intel_pt_log_to("Setting timestamp", decoder->timestamp);
958} 1211}
959 1212
@@ -962,6 +1215,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
962 intel_pt_log("ERROR: Buffer overflow\n"); 1215 intel_pt_log("ERROR: Buffer overflow\n");
963 intel_pt_clear_tx_flags(decoder); 1216 intel_pt_clear_tx_flags(decoder);
964 decoder->have_tma = false; 1217 decoder->have_tma = false;
1218 decoder->cbr = 0;
965 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; 1219 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
966 decoder->overflow = true; 1220 decoder->overflow = true;
967 return -EOVERFLOW; 1221 return -EOVERFLOW;
@@ -1026,6 +1280,49 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
1026 1280
1027 decoder->timestamp_insn_cnt = 0; 1281 decoder->timestamp_insn_cnt = 0;
1028 decoder->last_mtc = mtc; 1282 decoder->last_mtc = mtc;
1283
1284 if (decoder->last_packet_type == INTEL_PT_CYC) {
1285 decoder->cyc_ref_timestamp = decoder->timestamp;
1286 decoder->cycle_cnt = 0;
1287 decoder->have_calc_cyc_to_tsc = false;
1288 intel_pt_calc_cyc_to_tsc(decoder, true);
1289 }
1290}
1291
1292static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
1293{
1294 unsigned int cbr = decoder->packet.payload;
1295
1296 if (decoder->cbr == cbr)
1297 return;
1298
1299 decoder->cbr = cbr;
1300 decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
1301}
1302
1303static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1304{
1305 uint64_t timestamp = decoder->cyc_ref_timestamp;
1306
1307 decoder->have_cyc = true;
1308
1309 decoder->cycle_cnt += decoder->packet.payload;
1310
1311 if (!decoder->cyc_ref_timestamp)
1312 return;
1313
1314 if (decoder->have_calc_cyc_to_tsc)
1315 timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
1316 else if (decoder->cbr)
1317 timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
1318 else
1319 return;
1320
1321 if (timestamp < decoder->timestamp)
1322 intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1323 timestamp, decoder->timestamp);
1324 else
1325 decoder->timestamp = timestamp;
1029} 1326}
1030 1327
1031/* Walk PSB+ packets when already in sync. */ 1328/* Walk PSB+ packets when already in sync. */
@@ -1065,7 +1362,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1065 break; 1362 break;
1066 1363
1067 case INTEL_PT_CBR: 1364 case INTEL_PT_CBR:
1068 decoder->cbr = decoder->packet.payload; 1365 intel_pt_calc_cbr(decoder);
1069 break; 1366 break;
1070 1367
1071 case INTEL_PT_MODE_EXEC: 1368 case INTEL_PT_MODE_EXEC:
@@ -1182,6 +1479,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1182 break; 1479 break;
1183 1480
1184 case INTEL_PT_CYC: 1481 case INTEL_PT_CYC:
1482 intel_pt_calc_cyc_timestamp(decoder);
1185 break; 1483 break;
1186 1484
1187 case INTEL_PT_MODE_EXEC: 1485 case INTEL_PT_MODE_EXEC:
@@ -1318,10 +1616,11 @@ next:
1318 break; 1616 break;
1319 1617
1320 case INTEL_PT_CYC: 1618 case INTEL_PT_CYC:
1619 intel_pt_calc_cyc_timestamp(decoder);
1321 break; 1620 break;
1322 1621
1323 case INTEL_PT_CBR: 1622 case INTEL_PT_CBR:
1324 decoder->cbr = decoder->packet.payload; 1623 intel_pt_calc_cbr(decoder);
1325 break; 1624 break;
1326 1625
1327 case INTEL_PT_MODE_EXEC: 1626 case INTEL_PT_MODE_EXEC:
@@ -1398,10 +1697,11 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
1398 break; 1697 break;
1399 1698
1400 case INTEL_PT_CYC: 1699 case INTEL_PT_CYC:
1700 intel_pt_calc_cyc_timestamp(decoder);
1401 break; 1701 break;
1402 1702
1403 case INTEL_PT_CBR: 1703 case INTEL_PT_CBR:
1404 decoder->cbr = decoder->packet.payload; 1704 intel_pt_calc_cbr(decoder);
1405 break; 1705 break;
1406 1706
1407 case INTEL_PT_PIP: 1707 case INTEL_PT_PIP:
@@ -1493,10 +1793,11 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
1493 break; 1793 break;
1494 1794
1495 case INTEL_PT_CYC: 1795 case INTEL_PT_CYC:
1796 intel_pt_calc_cyc_timestamp(decoder);
1496 break; 1797 break;
1497 1798
1498 case INTEL_PT_CBR: 1799 case INTEL_PT_CBR:
1499 decoder->cbr = decoder->packet.payload; 1800 intel_pt_calc_cbr(decoder);
1500 break; 1801 break;
1501 1802
1502 case INTEL_PT_PIP: 1803 case INTEL_PT_PIP: