aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-09-25 09:15:56 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-09-28 16:21:00 -0400
commitba11ba65e02836c475427ae199adfc2d8cc4a900 (patch)
treeeb9e5d47f7147af4a662b92c0ba2c6ac2bc9dc27
parentf56fb9864c501dc85ebe40af5bf925dd07d990c0 (diff)
perf intel-pt: Add mispred-all config option to aid use with autofdo
autofdo incorrectly expects branch flags to include either mispred or predicted. In fact mispred = predicted = 0 is valid and means the flags are not supported, which they aren't by Intel PT. To make autofdo work, add a config option which will cause Intel PT decoder to set the mispred flag on all branches. Below is an example of using Intel PT with autofdo. The example is also added to the Intel PT documentation. It requires autofdo (https://github.com/google/autofdo) and gcc version 5. The bubble sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) amended to take the number of elements as a parameter. $ gcc-5 -O3 sort.c -o sort_optimized $ ./sort_optimized 30000 Bubble sorting array of 30000 elements 2254 ms $ cat ~/.perfconfig [intel-pt] mispred-all $ perf record -e intel_pt//u ./sort 3000 Bubble sorting array of 3000 elements 58 ms [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 3.939 MB perf.data ] $ perf inject -i perf.data -o inj --itrace=i100usle --strip $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo $ ./sort_autofdo 30000 Bubble sorting array of 30000 elements 2155 ms Note there is currently no advantage to using Intel PT instead of LBR, but that may change in the future if greater use is made of the data. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1443186956-18718-26-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/intel-pt.txt29
-rw-r--r--tools/perf/util/intel-pt.c14
2 files changed, 43 insertions, 0 deletions
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index a0fbb5d71f7d..be764f9ec769 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -764,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is
764removed and replaced with the synthesized events. e.g. 764removed and replaced with the synthesized events. e.g.
765 765
766 perf inject --itrace -i perf.data -o perf.data.new 766 perf inject --itrace -i perf.data -o perf.data.new
767
768Below is an example of using Intel PT with autofdo. It requires autofdo
769(https://github.com/google/autofdo) and gcc version 5. The bubble
770sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
771amended to take the number of elements as a parameter.
772
773 $ gcc-5 -O3 sort.c -o sort_optimized
774 $ ./sort_optimized 30000
775 Bubble sorting array of 30000 elements
776 2254 ms
777
778 $ cat ~/.perfconfig
779 [intel-pt]
780 mispred-all
781
782 $ perf record -e intel_pt//u ./sort 3000
783 Bubble sorting array of 3000 elements
784 58 ms
785 [ perf record: Woken up 2 times to write data ]
786 [ perf record: Captured and wrote 3.939 MB perf.data ]
787 $ perf inject -i perf.data -o inj --itrace=i100usle --strip
788 $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
789 $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
790 $ ./sort_autofdo 30000
791 Bubble sorting array of 30000 elements
792 2155 ms
793
794Note there is currently no advantage to using Intel PT instead of LBR, but
795that may change in the future if greater use is made of the data.
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 05e8fcc5188b..03ff072b5993 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -64,6 +64,7 @@ struct intel_pt {
64 bool data_queued; 64 bool data_queued;
65 bool est_tsc; 65 bool est_tsc;
66 bool sync_switch; 66 bool sync_switch;
67 bool mispred_all;
67 int have_sched_switch; 68 int have_sched_switch;
68 u32 pmu_type; 69 u32 pmu_type;
69 u64 kernel_start; 70 u64 kernel_start;
@@ -943,6 +944,7 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
943 be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); 944 be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
944 be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); 945 be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
945 /* No support for mispredict */ 946 /* No support for mispredict */
947 be->flags.mispred = ptq->pt->mispred_all;
946 948
947 if (bs->nr < ptq->pt->synth_opts.last_branch_sz) 949 if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
948 bs->nr += 1; 950 bs->nr += 1;
@@ -1967,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist)
1967 return false; 1969 return false;
1968} 1970}
1969 1971
1972static int intel_pt_perf_config(const char *var, const char *value, void *data)
1973{
1974 struct intel_pt *pt = data;
1975
1976 if (!strcmp(var, "intel-pt.mispred-all"))
1977 pt->mispred_all = perf_config_bool(var, value);
1978
1979 return 0;
1980}
1981
1970static const char * const intel_pt_info_fmts[] = { 1982static const char * const intel_pt_info_fmts[] = {
1971 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 1983 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
1972 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 1984 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2011,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
2011 if (!pt) 2023 if (!pt)
2012 return -ENOMEM; 2024 return -ENOMEM;
2013 2025
2026 perf_config(intel_pt_perf_config, pt);
2027
2014 err = auxtrace_queues__init(&pt->queues); 2028 err = auxtrace_queues__init(&pt->queues);
2015 if (err) 2029 if (err)
2016 goto err_free; 2030 goto err_free;