diff options
-rw-r--r-- | tools/perf/util/intel-pt.c | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3e3a01318b76..43ddc78a066e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include "tsc.h" | 42 | #include "tsc.h" |
43 | #include "intel-pt.h" | 43 | #include "intel-pt.h" |
44 | #include "config.h" | 44 | #include "config.h" |
45 | #include "time-utils.h" | ||
45 | 46 | ||
46 | #include "intel-pt-decoder/intel-pt-log.h" | 47 | #include "intel-pt-decoder/intel-pt-log.h" |
47 | #include "intel-pt-decoder/intel-pt-decoder.h" | 48 | #include "intel-pt-decoder/intel-pt-decoder.h" |
@@ -50,6 +51,11 @@ | |||
50 | 51 | ||
51 | #define MAX_TIMESTAMP (~0ULL) | 52 | #define MAX_TIMESTAMP (~0ULL) |
52 | 53 | ||
54 | struct range { | ||
55 | u64 start; | ||
56 | u64 end; | ||
57 | }; | ||
58 | |||
53 | struct intel_pt { | 59 | struct intel_pt { |
54 | struct auxtrace auxtrace; | 60 | struct auxtrace auxtrace; |
55 | struct auxtrace_queues queues; | 61 | struct auxtrace_queues queues; |
@@ -118,6 +124,9 @@ struct intel_pt { | |||
118 | 124 | ||
119 | char *filter; | 125 | char *filter; |
120 | struct addr_filters filts; | 126 | struct addr_filters filts; |
127 | |||
128 | struct range *time_ranges; | ||
129 | unsigned int range_cnt; | ||
121 | }; | 130 | }; |
122 | 131 | ||
123 | enum switch_state { | 132 | enum switch_state { |
@@ -154,6 +163,9 @@ struct intel_pt_queue { | |||
154 | bool have_sample; | 163 | bool have_sample; |
155 | u64 time; | 164 | u64 time; |
156 | u64 timestamp; | 165 | u64 timestamp; |
166 | u64 sel_timestamp; | ||
167 | bool sel_start; | ||
168 | unsigned int sel_idx; | ||
157 | u32 flags; | 169 | u32 flags; |
158 | u16 insn_len; | 170 | u16 insn_len; |
159 | u64 last_insn_cnt; | 171 | u64 last_insn_cnt; |
@@ -1007,6 +1019,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) | |||
1007 | ptq->flags |= PERF_IP_FLAG_TRACE_END; | 1019 | ptq->flags |= PERF_IP_FLAG_TRACE_END; |
1008 | } | 1020 | } |
1009 | 1021 | ||
1022 | static void intel_pt_setup_time_range(struct intel_pt *pt, | ||
1023 | struct intel_pt_queue *ptq) | ||
1024 | { | ||
1025 | if (!pt->range_cnt) | ||
1026 | return; | ||
1027 | |||
1028 | ptq->sel_timestamp = pt->time_ranges[0].start; | ||
1029 | ptq->sel_idx = 0; | ||
1030 | |||
1031 | if (ptq->sel_timestamp) { | ||
1032 | ptq->sel_start = true; | ||
1033 | } else { | ||
1034 | ptq->sel_timestamp = pt->time_ranges[0].end; | ||
1035 | ptq->sel_start = false; | ||
1036 | } | ||
1037 | } | ||
1038 | |||
1010 | static int intel_pt_setup_queue(struct intel_pt *pt, | 1039 | static int intel_pt_setup_queue(struct intel_pt *pt, |
1011 | struct auxtrace_queue *queue, | 1040 | struct auxtrace_queue *queue, |
1012 | unsigned int queue_nr) | 1041 | unsigned int queue_nr) |
@@ -1031,6 +1060,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt, | |||
1031 | ptq->step_through_buffers = true; | 1060 | ptq->step_through_buffers = true; |
1032 | 1061 | ||
1033 | ptq->sync_switch = pt->sync_switch; | 1062 | ptq->sync_switch = pt->sync_switch; |
1063 | |||
1064 | intel_pt_setup_time_range(pt, ptq); | ||
1034 | } | 1065 | } |
1035 | 1066 | ||
1036 | if (!ptq->on_heap && | 1067 | if (!ptq->on_heap && |
@@ -1045,6 +1076,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt, | |||
1045 | intel_pt_log("queue %u getting timestamp\n", queue_nr); | 1076 | intel_pt_log("queue %u getting timestamp\n", queue_nr); |
1046 | intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", | 1077 | intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", |
1047 | queue_nr, ptq->cpu, ptq->pid, ptq->tid); | 1078 | queue_nr, ptq->cpu, ptq->pid, ptq->tid); |
1079 | |||
1080 | if (ptq->sel_start && ptq->sel_timestamp) { | ||
1081 | ret = intel_pt_fast_forward(ptq->decoder, | ||
1082 | ptq->sel_timestamp); | ||
1083 | if (ret) | ||
1084 | return ret; | ||
1085 | } | ||
1086 | |||
1048 | while (1) { | 1087 | while (1) { |
1049 | state = intel_pt_decode(ptq->decoder); | 1088 | state = intel_pt_decode(ptq->decoder); |
1050 | if (state->err) { | 1089 | if (state->err) { |
@@ -1064,6 +1103,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt, | |||
1064 | queue_nr, ptq->timestamp); | 1103 | queue_nr, ptq->timestamp); |
1065 | ptq->state = state; | 1104 | ptq->state = state; |
1066 | ptq->have_sample = true; | 1105 | ptq->have_sample = true; |
1106 | if (ptq->sel_start && ptq->sel_timestamp && | ||
1107 | ptq->timestamp < ptq->sel_timestamp) | ||
1108 | ptq->have_sample = false; | ||
1067 | intel_pt_sample_flags(ptq); | 1109 | intel_pt_sample_flags(ptq); |
1068 | ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); | 1110 | ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); |
1069 | if (ret) | 1111 | if (ret) |
@@ -1750,10 +1792,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) | |||
1750 | } | 1792 | } |
1751 | } | 1793 | } |
1752 | 1794 | ||
1795 | /* | ||
1796 | * To filter against time ranges, it is only necessary to look at the next start | ||
1797 | * or end time. | ||
1798 | */ | ||
1799 | static bool intel_pt_next_time(struct intel_pt_queue *ptq) | ||
1800 | { | ||
1801 | struct intel_pt *pt = ptq->pt; | ||
1802 | |||
1803 | if (ptq->sel_start) { | ||
1804 | /* Next time is an end time */ | ||
1805 | ptq->sel_start = false; | ||
1806 | ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; | ||
1807 | return true; | ||
1808 | } else if (ptq->sel_idx + 1 < pt->range_cnt) { | ||
1809 | /* Next time is a start time */ | ||
1810 | ptq->sel_start = true; | ||
1811 | ptq->sel_idx += 1; | ||
1812 | ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; | ||
1813 | return true; | ||
1814 | } | ||
1815 | |||
1816 | /* No next time */ | ||
1817 | return false; | ||
1818 | } | ||
1819 | |||
1820 | static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) | ||
1821 | { | ||
1822 | int err; | ||
1823 | |||
1824 | while (1) { | ||
1825 | if (ptq->sel_start) { | ||
1826 | if (ptq->timestamp >= ptq->sel_timestamp) { | ||
1827 | /* After start time, so consider next time */ | ||
1828 | intel_pt_next_time(ptq); | ||
1829 | if (!ptq->sel_timestamp) { | ||
1830 | /* No end time */ | ||
1831 | return 0; | ||
1832 | } | ||
1833 | /* Check against end time */ | ||
1834 | continue; | ||
1835 | } | ||
1836 | /* Before start time, so fast forward */ | ||
1837 | ptq->have_sample = false; | ||
1838 | if (ptq->sel_timestamp > *ff_timestamp) { | ||
1839 | if (ptq->sync_switch) { | ||
1840 | intel_pt_next_tid(ptq->pt, ptq); | ||
1841 | ptq->switch_state = INTEL_PT_SS_UNKNOWN; | ||
1842 | } | ||
1843 | *ff_timestamp = ptq->sel_timestamp; | ||
1844 | err = intel_pt_fast_forward(ptq->decoder, | ||
1845 | ptq->sel_timestamp); | ||
1846 | if (err) | ||
1847 | return err; | ||
1848 | } | ||
1849 | return 0; | ||
1850 | } else if (ptq->timestamp > ptq->sel_timestamp) { | ||
1851 | /* After end time, so consider next time */ | ||
1852 | if (!intel_pt_next_time(ptq)) { | ||
1853 | /* No next time range, so stop decoding */ | ||
1854 | ptq->have_sample = false; | ||
1855 | ptq->switch_state = INTEL_PT_SS_NOT_TRACING; | ||
1856 | return 1; | ||
1857 | } | ||
1858 | /* Check against next start time */ | ||
1859 | continue; | ||
1860 | } else { | ||
1861 | /* Before end time */ | ||
1862 | return 0; | ||
1863 | } | ||
1864 | } | ||
1865 | } | ||
1866 | |||
1753 | static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | 1867 | static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) |
1754 | { | 1868 | { |
1755 | const struct intel_pt_state *state = ptq->state; | 1869 | const struct intel_pt_state *state = ptq->state; |
1756 | struct intel_pt *pt = ptq->pt; | 1870 | struct intel_pt *pt = ptq->pt; |
1871 | u64 ff_timestamp = 0; | ||
1757 | int err; | 1872 | int err; |
1758 | 1873 | ||
1759 | if (!pt->kernel_start) { | 1874 | if (!pt->kernel_start) { |
@@ -1818,6 +1933,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | |||
1818 | ptq->timestamp = state->timestamp; | 1933 | ptq->timestamp = state->timestamp; |
1819 | } | 1934 | } |
1820 | 1935 | ||
1936 | if (ptq->sel_timestamp) { | ||
1937 | err = intel_pt_time_filter(ptq, &ff_timestamp); | ||
1938 | if (err) | ||
1939 | return err; | ||
1940 | } | ||
1941 | |||
1821 | if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { | 1942 | if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { |
1822 | *timestamp = ptq->timestamp; | 1943 | *timestamp = ptq->timestamp; |
1823 | return 0; | 1944 | return 0; |
@@ -2223,6 +2344,7 @@ static void intel_pt_free(struct perf_session *session) | |||
2223 | thread__put(pt->unknown_thread); | 2344 | thread__put(pt->unknown_thread); |
2224 | addr_filters__exit(&pt->filts); | 2345 | addr_filters__exit(&pt->filts); |
2225 | zfree(&pt->filter); | 2346 | zfree(&pt->filter); |
2347 | zfree(&pt->time_ranges); | ||
2226 | free(pt); | 2348 | free(pt); |
2227 | } | 2349 | } |
2228 | 2350 | ||
@@ -2520,6 +2642,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data) | |||
2520 | return 0; | 2642 | return 0; |
2521 | } | 2643 | } |
2522 | 2644 | ||
2645 | /* Find least TSC which converts to ns or later */ | ||
2646 | static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) | ||
2647 | { | ||
2648 | u64 tsc, tm; | ||
2649 | |||
2650 | tsc = perf_time_to_tsc(ns, &pt->tc); | ||
2651 | |||
2652 | while (1) { | ||
2653 | tm = tsc_to_perf_time(tsc, &pt->tc); | ||
2654 | if (tm < ns) | ||
2655 | break; | ||
2656 | tsc -= 1; | ||
2657 | } | ||
2658 | |||
2659 | while (tm < ns) | ||
2660 | tm = tsc_to_perf_time(++tsc, &pt->tc); | ||
2661 | |||
2662 | return tsc; | ||
2663 | } | ||
2664 | |||
2665 | /* Find greatest TSC which converts to ns or earlier */ | ||
2666 | static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) | ||
2667 | { | ||
2668 | u64 tsc, tm; | ||
2669 | |||
2670 | tsc = perf_time_to_tsc(ns, &pt->tc); | ||
2671 | |||
2672 | while (1) { | ||
2673 | tm = tsc_to_perf_time(tsc, &pt->tc); | ||
2674 | if (tm > ns) | ||
2675 | break; | ||
2676 | tsc += 1; | ||
2677 | } | ||
2678 | |||
2679 | while (tm > ns) | ||
2680 | tm = tsc_to_perf_time(--tsc, &pt->tc); | ||
2681 | |||
2682 | return tsc; | ||
2683 | } | ||
2684 | |||
2685 | static int intel_pt_setup_time_ranges(struct intel_pt *pt, | ||
2686 | struct itrace_synth_opts *opts) | ||
2687 | { | ||
2688 | struct perf_time_interval *p = opts->ptime_range; | ||
2689 | int n = opts->range_num; | ||
2690 | int i; | ||
2691 | |||
2692 | if (!n || !p || pt->timeless_decoding) | ||
2693 | return 0; | ||
2694 | |||
2695 | pt->time_ranges = calloc(n, sizeof(struct range)); | ||
2696 | if (!pt->time_ranges) | ||
2697 | return -ENOMEM; | ||
2698 | |||
2699 | pt->range_cnt = n; | ||
2700 | |||
2701 | intel_pt_log("%s: %u range(s)\n", __func__, n); | ||
2702 | |||
2703 | for (i = 0; i < n; i++) { | ||
2704 | struct range *r = &pt->time_ranges[i]; | ||
2705 | u64 ts = p[i].start; | ||
2706 | u64 te = p[i].end; | ||
2707 | |||
2708 | /* | ||
2709 | * Take care to ensure the TSC range matches the perf-time range | ||
2710 | * when converted back to perf-time. | ||
2711 | */ | ||
2712 | r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; | ||
2713 | r->end = te ? intel_pt_tsc_end(te, pt) : 0; | ||
2714 | |||
2715 | intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", | ||
2716 | i, ts, te); | ||
2717 | intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", | ||
2718 | i, r->start, r->end); | ||
2719 | } | ||
2720 | |||
2721 | return 0; | ||
2722 | } | ||
2723 | |||
2523 | static const char * const intel_pt_info_fmts[] = { | 2724 | static const char * const intel_pt_info_fmts[] = { |
2524 | [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", | 2725 | [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", |
2525 | [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", | 2726 | [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", |
@@ -2752,6 +2953,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, | |||
2752 | pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; | 2953 | pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; |
2753 | } | 2954 | } |
2754 | 2955 | ||
2956 | if (session->itrace_synth_opts) { | ||
2957 | err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); | ||
2958 | if (err) | ||
2959 | goto err_delete_thread; | ||
2960 | } | ||
2961 | |||
2755 | if (pt->synth_opts.calls) | 2962 | if (pt->synth_opts.calls) |
2756 | pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | | 2963 | pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | |
2757 | PERF_IP_FLAG_TRACE_END; | 2964 | PERF_IP_FLAG_TRACE_END; |
@@ -2792,6 +2999,7 @@ err_free_queues: | |||
2792 | err_free: | 2999 | err_free: |
2793 | addr_filters__exit(&pt->filts); | 3000 | addr_filters__exit(&pt->filts); |
2794 | zfree(&pt->filter); | 3001 | zfree(&pt->filter); |
3002 | zfree(&pt->time_ranges); | ||
2795 | free(pt); | 3003 | free(pt); |
2796 | return err; | 3004 | return err; |
2797 | } | 3005 | } |