diff options
author | Adrian Hunter <adrian.hunter@intel.com> | 2015-09-25 09:15:45 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2015-09-28 15:59:14 -0400 |
commit | f14445ee72c59f32aa5cbf4d0f0330a5f62a752d (patch) | |
tree | 6498ddc2acfef1a06f1b5cf10606b8f1171dbc9e /tools/perf/util/intel-pt.c | |
parent | 385e33063fb963f5cccb0a37fe539319b6481fa5 (diff) |
perf intel-pt: Support generating branch stack
Add support for generating branch stack context for PT samples. The
decoder reports a configurable number of branches as branch context for
each sample. Internally it keeps track of them by using a simple sliding
window. We also flush the last branch buffer on each sample to avoid
overlapping intervals.
This is useful for:
- Reporting accurate basic block edge frequencies through the perf
report branch view
- Using with --branch-history to get the wider context of samples
- Other users of LBRs
Also the Documentation is updated.
Examples:
Record with Intel PT:
perf record -e intel_pt//u ls
Branch stacks are used by default if synthesized so:
perf report --itrace=ile
is the same as:
perf report --itrace=ile -b
Branch history can be requested also:
perf report --itrace=igle --branch-history
Based-on-patch-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1443186956-18718-15-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/intel-pt.c')
-rw-r--r-- | tools/perf/util/intel-pt.c | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2c01e723826a..05e8fcc5188b 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "../perf.h" | 22 | #include "../perf.h" |
23 | #include "session.h" | 23 | #include "session.h" |
24 | #include "machine.h" | 24 | #include "machine.h" |
25 | #include "sort.h" | ||
25 | #include "tool.h" | 26 | #include "tool.h" |
26 | #include "event.h" | 27 | #include "event.h" |
27 | #include "evlist.h" | 28 | #include "evlist.h" |
@@ -115,6 +116,9 @@ struct intel_pt_queue { | |||
115 | void *decoder; | 116 | void *decoder; |
116 | const struct intel_pt_state *state; | 117 | const struct intel_pt_state *state; |
117 | struct ip_callchain *chain; | 118 | struct ip_callchain *chain; |
119 | struct branch_stack *last_branch; | ||
120 | struct branch_stack *last_branch_rb; | ||
121 | size_t last_branch_pos; | ||
118 | union perf_event *event_buf; | 122 | union perf_event *event_buf; |
119 | bool on_heap; | 123 | bool on_heap; |
120 | bool stop; | 124 | bool stop; |
@@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, | |||
675 | goto out_free; | 679 | goto out_free; |
676 | } | 680 | } |
677 | 681 | ||
682 | if (pt->synth_opts.last_branch) { | ||
683 | size_t sz = sizeof(struct branch_stack); | ||
684 | |||
685 | sz += pt->synth_opts.last_branch_sz * | ||
686 | sizeof(struct branch_entry); | ||
687 | ptq->last_branch = zalloc(sz); | ||
688 | if (!ptq->last_branch) | ||
689 | goto out_free; | ||
690 | ptq->last_branch_rb = zalloc(sz); | ||
691 | if (!ptq->last_branch_rb) | ||
692 | goto out_free; | ||
693 | } | ||
694 | |||
678 | ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); | 695 | ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); |
679 | if (!ptq->event_buf) | 696 | if (!ptq->event_buf) |
680 | goto out_free; | 697 | goto out_free; |
@@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, | |||
732 | 749 | ||
733 | out_free: | 750 | out_free: |
734 | zfree(&ptq->event_buf); | 751 | zfree(&ptq->event_buf); |
752 | zfree(&ptq->last_branch); | ||
753 | zfree(&ptq->last_branch_rb); | ||
735 | zfree(&ptq->chain); | 754 | zfree(&ptq->chain); |
736 | free(ptq); | 755 | free(ptq); |
737 | return NULL; | 756 | return NULL; |
@@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv) | |||
746 | thread__zput(ptq->thread); | 765 | thread__zput(ptq->thread); |
747 | intel_pt_decoder_free(ptq->decoder); | 766 | intel_pt_decoder_free(ptq->decoder); |
748 | zfree(&ptq->event_buf); | 767 | zfree(&ptq->event_buf); |
768 | zfree(&ptq->last_branch); | ||
769 | zfree(&ptq->last_branch_rb); | ||
749 | zfree(&ptq->chain); | 770 | zfree(&ptq->chain); |
750 | free(ptq); | 771 | free(ptq); |
751 | } | 772 | } |
@@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt) | |||
876 | return 0; | 897 | return 0; |
877 | } | 898 | } |
878 | 899 | ||
900 | static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) | ||
901 | { | ||
902 | struct branch_stack *bs_src = ptq->last_branch_rb; | ||
903 | struct branch_stack *bs_dst = ptq->last_branch; | ||
904 | size_t nr = 0; | ||
905 | |||
906 | bs_dst->nr = bs_src->nr; | ||
907 | |||
908 | if (!bs_src->nr) | ||
909 | return; | ||
910 | |||
911 | nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; | ||
912 | memcpy(&bs_dst->entries[0], | ||
913 | &bs_src->entries[ptq->last_branch_pos], | ||
914 | sizeof(struct branch_entry) * nr); | ||
915 | |||
916 | if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { | ||
917 | memcpy(&bs_dst->entries[nr], | ||
918 | &bs_src->entries[0], | ||
919 | sizeof(struct branch_entry) * ptq->last_branch_pos); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) | ||
924 | { | ||
925 | ptq->last_branch_pos = 0; | ||
926 | ptq->last_branch_rb->nr = 0; | ||
927 | } | ||
928 | |||
929 | static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) | ||
930 | { | ||
931 | const struct intel_pt_state *state = ptq->state; | ||
932 | struct branch_stack *bs = ptq->last_branch_rb; | ||
933 | struct branch_entry *be; | ||
934 | |||
935 | if (!ptq->last_branch_pos) | ||
936 | ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; | ||
937 | |||
938 | ptq->last_branch_pos -= 1; | ||
939 | |||
940 | be = &bs->entries[ptq->last_branch_pos]; | ||
941 | be->from = state->from_ip; | ||
942 | be->to = state->to_ip; | ||
943 | be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); | ||
944 | be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); | ||
945 | /* No support for mispredict */ | ||
946 | |||
947 | if (bs->nr < ptq->pt->synth_opts.last_branch_sz) | ||
948 | bs->nr += 1; | ||
949 | } | ||
950 | |||
879 | static int intel_pt_inject_event(union perf_event *event, | 951 | static int intel_pt_inject_event(union perf_event *event, |
880 | struct perf_sample *sample, u64 type, | 952 | struct perf_sample *sample, u64 type, |
881 | bool swapped) | 953 | bool swapped) |
@@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) | |||
890 | struct intel_pt *pt = ptq->pt; | 962 | struct intel_pt *pt = ptq->pt; |
891 | union perf_event *event = ptq->event_buf; | 963 | union perf_event *event = ptq->event_buf; |
892 | struct perf_sample sample = { .ip = 0, }; | 964 | struct perf_sample sample = { .ip = 0, }; |
965 | struct dummy_branch_stack { | ||
966 | u64 nr; | ||
967 | struct branch_entry entries; | ||
968 | } dummy_bs; | ||
893 | 969 | ||
894 | if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) | 970 | if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) |
895 | return 0; | 971 | return 0; |
@@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) | |||
912 | sample.flags = ptq->flags; | 988 | sample.flags = ptq->flags; |
913 | sample.insn_len = ptq->insn_len; | 989 | sample.insn_len = ptq->insn_len; |
914 | 990 | ||
991 | /* | ||
992 | * perf report cannot handle events without a branch stack when using | ||
993 | * SORT_MODE__BRANCH so make a dummy one. | ||
994 | */ | ||
995 | if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { | ||
996 | dummy_bs = (struct dummy_branch_stack){ | ||
997 | .nr = 1, | ||
998 | .entries = { | ||
999 | .from = sample.ip, | ||
1000 | .to = sample.addr, | ||
1001 | }, | ||
1002 | }; | ||
1003 | sample.branch_stack = (struct branch_stack *)&dummy_bs; | ||
1004 | } | ||
1005 | |||
915 | if (pt->synth_opts.inject) { | 1006 | if (pt->synth_opts.inject) { |
916 | ret = intel_pt_inject_event(event, &sample, | 1007 | ret = intel_pt_inject_event(event, &sample, |
917 | pt->branches_sample_type, | 1008 | pt->branches_sample_type, |
@@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) | |||
961 | sample.callchain = ptq->chain; | 1052 | sample.callchain = ptq->chain; |
962 | } | 1053 | } |
963 | 1054 | ||
1055 | if (pt->synth_opts.last_branch) { | ||
1056 | intel_pt_copy_last_branch_rb(ptq); | ||
1057 | sample.branch_stack = ptq->last_branch; | ||
1058 | } | ||
1059 | |||
964 | if (pt->synth_opts.inject) { | 1060 | if (pt->synth_opts.inject) { |
965 | ret = intel_pt_inject_event(event, &sample, | 1061 | ret = intel_pt_inject_event(event, &sample, |
966 | pt->instructions_sample_type, | 1062 | pt->instructions_sample_type, |
@@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) | |||
974 | pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", | 1070 | pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", |
975 | ret); | 1071 | ret); |
976 | 1072 | ||
1073 | if (pt->synth_opts.last_branch) | ||
1074 | intel_pt_reset_last_branch_rb(ptq); | ||
1075 | |||
977 | return ret; | 1076 | return ret; |
978 | } | 1077 | } |
979 | 1078 | ||
@@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) | |||
1008 | sample.callchain = ptq->chain; | 1107 | sample.callchain = ptq->chain; |
1009 | } | 1108 | } |
1010 | 1109 | ||
1110 | if (pt->synth_opts.last_branch) { | ||
1111 | intel_pt_copy_last_branch_rb(ptq); | ||
1112 | sample.branch_stack = ptq->last_branch; | ||
1113 | } | ||
1114 | |||
1011 | if (pt->synth_opts.inject) { | 1115 | if (pt->synth_opts.inject) { |
1012 | ret = intel_pt_inject_event(event, &sample, | 1116 | ret = intel_pt_inject_event(event, &sample, |
1013 | pt->transactions_sample_type, | 1117 | pt->transactions_sample_type, |
@@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) | |||
1021 | pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", | 1125 | pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", |
1022 | ret); | 1126 | ret); |
1023 | 1127 | ||
1128 | if (pt->synth_opts.callchain) | ||
1129 | intel_pt_reset_last_branch_rb(ptq); | ||
1130 | |||
1024 | return ret; | 1131 | return ret; |
1025 | } | 1132 | } |
1026 | 1133 | ||
@@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
1116 | return err; | 1223 | return err; |
1117 | } | 1224 | } |
1118 | 1225 | ||
1226 | if (pt->synth_opts.last_branch) | ||
1227 | intel_pt_update_last_branch_rb(ptq); | ||
1228 | |||
1119 | if (!pt->sync_switch) | 1229 | if (!pt->sync_switch) |
1120 | return 0; | 1230 | return 0; |
1121 | 1231 | ||
@@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, | |||
1763 | pt->instructions_sample_period = attr.sample_period; | 1873 | pt->instructions_sample_period = attr.sample_period; |
1764 | if (pt->synth_opts.callchain) | 1874 | if (pt->synth_opts.callchain) |
1765 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; | 1875 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; |
1876 | if (pt->synth_opts.last_branch) | ||
1877 | attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; | ||
1766 | pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | 1878 | pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", |
1767 | id, (u64)attr.sample_type); | 1879 | id, (u64)attr.sample_type); |
1768 | err = intel_pt_synth_event(session, &attr, id); | 1880 | err = intel_pt_synth_event(session, &attr, id); |
@@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, | |||
1782 | attr.sample_period = 1; | 1894 | attr.sample_period = 1; |
1783 | if (pt->synth_opts.callchain) | 1895 | if (pt->synth_opts.callchain) |
1784 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; | 1896 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; |
1897 | if (pt->synth_opts.last_branch) | ||
1898 | attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; | ||
1785 | pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | 1899 | pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", |
1786 | id, (u64)attr.sample_type); | 1900 | id, (u64)attr.sample_type); |
1787 | err = intel_pt_synth_event(session, &attr, id); | 1901 | err = intel_pt_synth_event(session, &attr, id); |
@@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, | |||
1808 | attr.sample_period = 1; | 1922 | attr.sample_period = 1; |
1809 | attr.sample_type |= PERF_SAMPLE_ADDR; | 1923 | attr.sample_type |= PERF_SAMPLE_ADDR; |
1810 | attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; | 1924 | attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; |
1925 | attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; | ||
1811 | pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | 1926 | pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", |
1812 | id, (u64)attr.sample_type); | 1927 | id, (u64)attr.sample_type); |
1813 | err = intel_pt_synth_event(session, &attr, id); | 1928 | err = intel_pt_synth_event(session, &attr, id); |