summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2019-06-04 09:00:09 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-06-10 15:20:12 -0400
commit2c47db90ed71af9c12d5600dbcef864761d76b3d (patch)
treec1e3aaa62c0977ad3930f9ee71d9ad3924595da6
parentda9000ae35027fb7305b8cad0b37df71937ad578 (diff)
perf intel-pt: Add support for efficient time interval filtering
Set up time ranges for efficient time interval filtering using the new "fast forward" facility. Because decoding is done in time order, intel_pt_time_filter() needs to look only at the next start or end timestamp - refer intel_pt_next_time(). Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20190604130017.31207-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/util/intel-pt.c208
1 files changed, 208 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 3e3a01318b76..43ddc78a066e 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -42,6 +42,7 @@
42#include "tsc.h" 42#include "tsc.h"
43#include "intel-pt.h" 43#include "intel-pt.h"
44#include "config.h" 44#include "config.h"
45#include "time-utils.h"
45 46
46#include "intel-pt-decoder/intel-pt-log.h" 47#include "intel-pt-decoder/intel-pt-log.h"
47#include "intel-pt-decoder/intel-pt-decoder.h" 48#include "intel-pt-decoder/intel-pt-decoder.h"
@@ -50,6 +51,11 @@
50 51
51#define MAX_TIMESTAMP (~0ULL) 52#define MAX_TIMESTAMP (~0ULL)
52 53
54struct range {
55 u64 start;
56 u64 end;
57};
58
53struct intel_pt { 59struct intel_pt {
54 struct auxtrace auxtrace; 60 struct auxtrace auxtrace;
55 struct auxtrace_queues queues; 61 struct auxtrace_queues queues;
@@ -118,6 +124,9 @@ struct intel_pt {
118 124
119 char *filter; 125 char *filter;
120 struct addr_filters filts; 126 struct addr_filters filts;
127
128 struct range *time_ranges;
129 unsigned int range_cnt;
121}; 130};
122 131
123enum switch_state { 132enum switch_state {
@@ -154,6 +163,9 @@ struct intel_pt_queue {
154 bool have_sample; 163 bool have_sample;
155 u64 time; 164 u64 time;
156 u64 timestamp; 165 u64 timestamp;
166 u64 sel_timestamp;
167 bool sel_start;
168 unsigned int sel_idx;
157 u32 flags; 169 u32 flags;
158 u16 insn_len; 170 u16 insn_len;
159 u64 last_insn_cnt; 171 u64 last_insn_cnt;
@@ -1007,6 +1019,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
1007 ptq->flags |= PERF_IP_FLAG_TRACE_END; 1019 ptq->flags |= PERF_IP_FLAG_TRACE_END;
1008} 1020}
1009 1021
1022static void intel_pt_setup_time_range(struct intel_pt *pt,
1023 struct intel_pt_queue *ptq)
1024{
1025 if (!pt->range_cnt)
1026 return;
1027
1028 ptq->sel_timestamp = pt->time_ranges[0].start;
1029 ptq->sel_idx = 0;
1030
1031 if (ptq->sel_timestamp) {
1032 ptq->sel_start = true;
1033 } else {
1034 ptq->sel_timestamp = pt->time_ranges[0].end;
1035 ptq->sel_start = false;
1036 }
1037}
1038
1010static int intel_pt_setup_queue(struct intel_pt *pt, 1039static int intel_pt_setup_queue(struct intel_pt *pt,
1011 struct auxtrace_queue *queue, 1040 struct auxtrace_queue *queue,
1012 unsigned int queue_nr) 1041 unsigned int queue_nr)
@@ -1031,6 +1060,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
1031 ptq->step_through_buffers = true; 1060 ptq->step_through_buffers = true;
1032 1061
1033 ptq->sync_switch = pt->sync_switch; 1062 ptq->sync_switch = pt->sync_switch;
1063
1064 intel_pt_setup_time_range(pt, ptq);
1034 } 1065 }
1035 1066
1036 if (!ptq->on_heap && 1067 if (!ptq->on_heap &&
@@ -1045,6 +1076,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
1045 intel_pt_log("queue %u getting timestamp\n", queue_nr); 1076 intel_pt_log("queue %u getting timestamp\n", queue_nr);
1046 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1077 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1047 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 1078 queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1079
1080 if (ptq->sel_start && ptq->sel_timestamp) {
1081 ret = intel_pt_fast_forward(ptq->decoder,
1082 ptq->sel_timestamp);
1083 if (ret)
1084 return ret;
1085 }
1086
1048 while (1) { 1087 while (1) {
1049 state = intel_pt_decode(ptq->decoder); 1088 state = intel_pt_decode(ptq->decoder);
1050 if (state->err) { 1089 if (state->err) {
@@ -1064,6 +1103,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
1064 queue_nr, ptq->timestamp); 1103 queue_nr, ptq->timestamp);
1065 ptq->state = state; 1104 ptq->state = state;
1066 ptq->have_sample = true; 1105 ptq->have_sample = true;
1106 if (ptq->sel_start && ptq->sel_timestamp &&
1107 ptq->timestamp < ptq->sel_timestamp)
1108 ptq->have_sample = false;
1067 intel_pt_sample_flags(ptq); 1109 intel_pt_sample_flags(ptq);
1068 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 1110 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
1069 if (ret) 1111 if (ret)
@@ -1750,10 +1792,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
1750 } 1792 }
1751} 1793}
1752 1794
1795/*
1796 * To filter against time ranges, it is only necessary to look at the next start
1797 * or end time.
1798 */
1799static bool intel_pt_next_time(struct intel_pt_queue *ptq)
1800{
1801 struct intel_pt *pt = ptq->pt;
1802
1803 if (ptq->sel_start) {
1804 /* Next time is an end time */
1805 ptq->sel_start = false;
1806 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
1807 return true;
1808 } else if (ptq->sel_idx + 1 < pt->range_cnt) {
1809 /* Next time is a start time */
1810 ptq->sel_start = true;
1811 ptq->sel_idx += 1;
1812 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
1813 return true;
1814 }
1815
1816 /* No next time */
1817 return false;
1818}
1819
1820static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
1821{
1822 int err;
1823
1824 while (1) {
1825 if (ptq->sel_start) {
1826 if (ptq->timestamp >= ptq->sel_timestamp) {
1827 /* After start time, so consider next time */
1828 intel_pt_next_time(ptq);
1829 if (!ptq->sel_timestamp) {
1830 /* No end time */
1831 return 0;
1832 }
1833 /* Check against end time */
1834 continue;
1835 }
1836 /* Before start time, so fast forward */
1837 ptq->have_sample = false;
1838 if (ptq->sel_timestamp > *ff_timestamp) {
1839 if (ptq->sync_switch) {
1840 intel_pt_next_tid(ptq->pt, ptq);
1841 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1842 }
1843 *ff_timestamp = ptq->sel_timestamp;
1844 err = intel_pt_fast_forward(ptq->decoder,
1845 ptq->sel_timestamp);
1846 if (err)
1847 return err;
1848 }
1849 return 0;
1850 } else if (ptq->timestamp > ptq->sel_timestamp) {
1851 /* After end time, so consider next time */
1852 if (!intel_pt_next_time(ptq)) {
1853 /* No next time range, so stop decoding */
1854 ptq->have_sample = false;
1855 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1856 return 1;
1857 }
1858 /* Check against next start time */
1859 continue;
1860 } else {
1861 /* Before end time */
1862 return 0;
1863 }
1864 }
1865}
1866
1753static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 1867static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1754{ 1868{
1755 const struct intel_pt_state *state = ptq->state; 1869 const struct intel_pt_state *state = ptq->state;
1756 struct intel_pt *pt = ptq->pt; 1870 struct intel_pt *pt = ptq->pt;
1871 u64 ff_timestamp = 0;
1757 int err; 1872 int err;
1758 1873
1759 if (!pt->kernel_start) { 1874 if (!pt->kernel_start) {
@@ -1818,6 +1933,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1818 ptq->timestamp = state->timestamp; 1933 ptq->timestamp = state->timestamp;
1819 } 1934 }
1820 1935
1936 if (ptq->sel_timestamp) {
1937 err = intel_pt_time_filter(ptq, &ff_timestamp);
1938 if (err)
1939 return err;
1940 }
1941
1821 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { 1942 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1822 *timestamp = ptq->timestamp; 1943 *timestamp = ptq->timestamp;
1823 return 0; 1944 return 0;
@@ -2223,6 +2344,7 @@ static void intel_pt_free(struct perf_session *session)
2223 thread__put(pt->unknown_thread); 2344 thread__put(pt->unknown_thread);
2224 addr_filters__exit(&pt->filts); 2345 addr_filters__exit(&pt->filts);
2225 zfree(&pt->filter); 2346 zfree(&pt->filter);
2347 zfree(&pt->time_ranges);
2226 free(pt); 2348 free(pt);
2227} 2349}
2228 2350
@@ -2520,6 +2642,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data)
2520 return 0; 2642 return 0;
2521} 2643}
2522 2644
2645/* Find least TSC which converts to ns or later */
2646static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
2647{
2648 u64 tsc, tm;
2649
2650 tsc = perf_time_to_tsc(ns, &pt->tc);
2651
2652 while (1) {
2653 tm = tsc_to_perf_time(tsc, &pt->tc);
2654 if (tm < ns)
2655 break;
2656 tsc -= 1;
2657 }
2658
2659 while (tm < ns)
2660 tm = tsc_to_perf_time(++tsc, &pt->tc);
2661
2662 return tsc;
2663}
2664
2665/* Find greatest TSC which converts to ns or earlier */
2666static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
2667{
2668 u64 tsc, tm;
2669
2670 tsc = perf_time_to_tsc(ns, &pt->tc);
2671
2672 while (1) {
2673 tm = tsc_to_perf_time(tsc, &pt->tc);
2674 if (tm > ns)
2675 break;
2676 tsc += 1;
2677 }
2678
2679 while (tm > ns)
2680 tm = tsc_to_perf_time(--tsc, &pt->tc);
2681
2682 return tsc;
2683}
2684
2685static int intel_pt_setup_time_ranges(struct intel_pt *pt,
2686 struct itrace_synth_opts *opts)
2687{
2688 struct perf_time_interval *p = opts->ptime_range;
2689 int n = opts->range_num;
2690 int i;
2691
2692 if (!n || !p || pt->timeless_decoding)
2693 return 0;
2694
2695 pt->time_ranges = calloc(n, sizeof(struct range));
2696 if (!pt->time_ranges)
2697 return -ENOMEM;
2698
2699 pt->range_cnt = n;
2700
2701 intel_pt_log("%s: %u range(s)\n", __func__, n);
2702
2703 for (i = 0; i < n; i++) {
2704 struct range *r = &pt->time_ranges[i];
2705 u64 ts = p[i].start;
2706 u64 te = p[i].end;
2707
2708 /*
2709 * Take care to ensure the TSC range matches the perf-time range
2710 * when converted back to perf-time.
2711 */
2712 r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
2713 r->end = te ? intel_pt_tsc_end(te, pt) : 0;
2714
2715 intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
2716 i, ts, te);
2717 intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
2718 i, r->start, r->end);
2719 }
2720
2721 return 0;
2722}
2723
2523static const char * const intel_pt_info_fmts[] = { 2724static const char * const intel_pt_info_fmts[] = {
2524 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 2725 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
2525 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 2726 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2752,6 +2953,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
2752 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; 2953 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
2753 } 2954 }
2754 2955
2956 if (session->itrace_synth_opts) {
2957 err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
2958 if (err)
2959 goto err_delete_thread;
2960 }
2961
2755 if (pt->synth_opts.calls) 2962 if (pt->synth_opts.calls)
2756 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | 2963 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2757 PERF_IP_FLAG_TRACE_END; 2964 PERF_IP_FLAG_TRACE_END;
@@ -2792,6 +2999,7 @@ err_free_queues:
2792err_free: 2999err_free:
2793 addr_filters__exit(&pt->filts); 3000 addr_filters__exit(&pt->filts);
2794 zfree(&pt->filter); 3001 zfree(&pt->filter);
3002 zfree(&pt->time_ranges);
2795 free(pt); 3003 free(pt);
2796 return err; 3004 return err;
2797} 3005}