aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/intel-pt.c
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-09-25 09:15:45 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-09-28 15:59:14 -0400
commitf14445ee72c59f32aa5cbf4d0f0330a5f62a752d (patch)
tree6498ddc2acfef1a06f1b5cf10606b8f1171dbc9e /tools/perf/util/intel-pt.c
parent385e33063fb963f5cccb0a37fe539319b6481fa5 (diff)
perf intel-pt: Support generating branch stack
Add support for generating branch stack context for PT samples. The decoder reports a configurable number of branches as branch context for each sample. Internally it keeps track of them by using a simple sliding window. We also flush the last branch buffer on each sample to avoid overlapping intervals. This is useful for: - Reporting accurate basic block edge frequencies through the perf report branch view - Using with --branch-history to get the wider context of samples - Other users of LBRs Also the Documentation is updated. Examples: Record with Intel PT: perf record -e intel_pt//u ls Branch stacks are used by default if synthesized so: perf report --itrace=ile is the same as: perf report --itrace=ile -b Branch history can be requested also: perf report --itrace=igle --branch-history Based-on-patch-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1443186956-18718-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/intel-pt.c')
-rw-r--r--tools/perf/util/intel-pt.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 2c01e723826a..05e8fcc5188b 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -22,6 +22,7 @@
22#include "../perf.h" 22#include "../perf.h"
23#include "session.h" 23#include "session.h"
24#include "machine.h" 24#include "machine.h"
25#include "sort.h"
25#include "tool.h" 26#include "tool.h"
26#include "event.h" 27#include "event.h"
27#include "evlist.h" 28#include "evlist.h"
@@ -115,6 +116,9 @@ struct intel_pt_queue {
115 void *decoder; 116 void *decoder;
116 const struct intel_pt_state *state; 117 const struct intel_pt_state *state;
117 struct ip_callchain *chain; 118 struct ip_callchain *chain;
119 struct branch_stack *last_branch;
120 struct branch_stack *last_branch_rb;
121 size_t last_branch_pos;
118 union perf_event *event_buf; 122 union perf_event *event_buf;
119 bool on_heap; 123 bool on_heap;
120 bool stop; 124 bool stop;
@@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
675 goto out_free; 679 goto out_free;
676 } 680 }
677 681
682 if (pt->synth_opts.last_branch) {
683 size_t sz = sizeof(struct branch_stack);
684
685 sz += pt->synth_opts.last_branch_sz *
686 sizeof(struct branch_entry);
687 ptq->last_branch = zalloc(sz);
688 if (!ptq->last_branch)
689 goto out_free;
690 ptq->last_branch_rb = zalloc(sz);
691 if (!ptq->last_branch_rb)
692 goto out_free;
693 }
694
678 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 695 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
679 if (!ptq->event_buf) 696 if (!ptq->event_buf)
680 goto out_free; 697 goto out_free;
@@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
732 749
733out_free: 750out_free:
734 zfree(&ptq->event_buf); 751 zfree(&ptq->event_buf);
752 zfree(&ptq->last_branch);
753 zfree(&ptq->last_branch_rb);
735 zfree(&ptq->chain); 754 zfree(&ptq->chain);
736 free(ptq); 755 free(ptq);
737 return NULL; 756 return NULL;
@@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv)
746 thread__zput(ptq->thread); 765 thread__zput(ptq->thread);
747 intel_pt_decoder_free(ptq->decoder); 766 intel_pt_decoder_free(ptq->decoder);
748 zfree(&ptq->event_buf); 767 zfree(&ptq->event_buf);
768 zfree(&ptq->last_branch);
769 zfree(&ptq->last_branch_rb);
749 zfree(&ptq->chain); 770 zfree(&ptq->chain);
750 free(ptq); 771 free(ptq);
751} 772}
@@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
876 return 0; 897 return 0;
877} 898}
878 899
900static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
901{
902 struct branch_stack *bs_src = ptq->last_branch_rb;
903 struct branch_stack *bs_dst = ptq->last_branch;
904 size_t nr = 0;
905
906 bs_dst->nr = bs_src->nr;
907
908 if (!bs_src->nr)
909 return;
910
911 nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
912 memcpy(&bs_dst->entries[0],
913 &bs_src->entries[ptq->last_branch_pos],
914 sizeof(struct branch_entry) * nr);
915
916 if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
917 memcpy(&bs_dst->entries[nr],
918 &bs_src->entries[0],
919 sizeof(struct branch_entry) * ptq->last_branch_pos);
920 }
921}
922
923static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
924{
925 ptq->last_branch_pos = 0;
926 ptq->last_branch_rb->nr = 0;
927}
928
929static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
930{
931 const struct intel_pt_state *state = ptq->state;
932 struct branch_stack *bs = ptq->last_branch_rb;
933 struct branch_entry *be;
934
935 if (!ptq->last_branch_pos)
936 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
937
938 ptq->last_branch_pos -= 1;
939
940 be = &bs->entries[ptq->last_branch_pos];
941 be->from = state->from_ip;
942 be->to = state->to_ip;
943 be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
944 be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
945 /* No support for mispredict */
946
947 if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
948 bs->nr += 1;
949}
950
879static int intel_pt_inject_event(union perf_event *event, 951static int intel_pt_inject_event(union perf_event *event,
880 struct perf_sample *sample, u64 type, 952 struct perf_sample *sample, u64 type,
881 bool swapped) 953 bool swapped)
@@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
890 struct intel_pt *pt = ptq->pt; 962 struct intel_pt *pt = ptq->pt;
891 union perf_event *event = ptq->event_buf; 963 union perf_event *event = ptq->event_buf;
892 struct perf_sample sample = { .ip = 0, }; 964 struct perf_sample sample = { .ip = 0, };
965 struct dummy_branch_stack {
966 u64 nr;
967 struct branch_entry entries;
968 } dummy_bs;
893 969
894 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 970 if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
895 return 0; 971 return 0;
@@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
912 sample.flags = ptq->flags; 988 sample.flags = ptq->flags;
913 sample.insn_len = ptq->insn_len; 989 sample.insn_len = ptq->insn_len;
914 990
991 /*
992 * perf report cannot handle events without a branch stack when using
993 * SORT_MODE__BRANCH so make a dummy one.
994 */
995 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
996 dummy_bs = (struct dummy_branch_stack){
997 .nr = 1,
998 .entries = {
999 .from = sample.ip,
1000 .to = sample.addr,
1001 },
1002 };
1003 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1004 }
1005
915 if (pt->synth_opts.inject) { 1006 if (pt->synth_opts.inject) {
916 ret = intel_pt_inject_event(event, &sample, 1007 ret = intel_pt_inject_event(event, &sample,
917 pt->branches_sample_type, 1008 pt->branches_sample_type,
@@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
961 sample.callchain = ptq->chain; 1052 sample.callchain = ptq->chain;
962 } 1053 }
963 1054
1055 if (pt->synth_opts.last_branch) {
1056 intel_pt_copy_last_branch_rb(ptq);
1057 sample.branch_stack = ptq->last_branch;
1058 }
1059
964 if (pt->synth_opts.inject) { 1060 if (pt->synth_opts.inject) {
965 ret = intel_pt_inject_event(event, &sample, 1061 ret = intel_pt_inject_event(event, &sample,
966 pt->instructions_sample_type, 1062 pt->instructions_sample_type,
@@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
974 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", 1070 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
975 ret); 1071 ret);
976 1072
1073 if (pt->synth_opts.last_branch)
1074 intel_pt_reset_last_branch_rb(ptq);
1075
977 return ret; 1076 return ret;
978} 1077}
979 1078
@@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1008 sample.callchain = ptq->chain; 1107 sample.callchain = ptq->chain;
1009 } 1108 }
1010 1109
1110 if (pt->synth_opts.last_branch) {
1111 intel_pt_copy_last_branch_rb(ptq);
1112 sample.branch_stack = ptq->last_branch;
1113 }
1114
1011 if (pt->synth_opts.inject) { 1115 if (pt->synth_opts.inject) {
1012 ret = intel_pt_inject_event(event, &sample, 1116 ret = intel_pt_inject_event(event, &sample,
1013 pt->transactions_sample_type, 1117 pt->transactions_sample_type,
@@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1021 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", 1125 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1022 ret); 1126 ret);
1023 1127
1128 if (pt->synth_opts.callchain)
1129 intel_pt_reset_last_branch_rb(ptq);
1130
1024 return ret; 1131 return ret;
1025} 1132}
1026 1133
@@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1116 return err; 1223 return err;
1117 } 1224 }
1118 1225
1226 if (pt->synth_opts.last_branch)
1227 intel_pt_update_last_branch_rb(ptq);
1228
1119 if (!pt->sync_switch) 1229 if (!pt->sync_switch)
1120 return 0; 1230 return 0;
1121 1231
@@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
1763 pt->instructions_sample_period = attr.sample_period; 1873 pt->instructions_sample_period = attr.sample_period;
1764 if (pt->synth_opts.callchain) 1874 if (pt->synth_opts.callchain)
1765 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 1875 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1876 if (pt->synth_opts.last_branch)
1877 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1766 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1878 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1767 id, (u64)attr.sample_type); 1879 id, (u64)attr.sample_type);
1768 err = intel_pt_synth_event(session, &attr, id); 1880 err = intel_pt_synth_event(session, &attr, id);
@@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
1782 attr.sample_period = 1; 1894 attr.sample_period = 1;
1783 if (pt->synth_opts.callchain) 1895 if (pt->synth_opts.callchain)
1784 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 1896 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1897 if (pt->synth_opts.last_branch)
1898 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1785 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1899 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1786 id, (u64)attr.sample_type); 1900 id, (u64)attr.sample_type);
1787 err = intel_pt_synth_event(session, &attr, id); 1901 err = intel_pt_synth_event(session, &attr, id);
@@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
1808 attr.sample_period = 1; 1922 attr.sample_period = 1;
1809 attr.sample_type |= PERF_SAMPLE_ADDR; 1923 attr.sample_type |= PERF_SAMPLE_ADDR;
1810 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; 1924 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1925 attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
1811 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1926 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1812 id, (u64)attr.sample_type); 1927 id, (u64)attr.sample_type);
1813 err = intel_pt_synth_event(session, &attr, id); 1928 err = intel_pt_synth_event(session, &attr, id);