diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_ds.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 203 |
1 files changed, 115 insertions, 88 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ab3ba1c1b7dd..ae96cfa5eddd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | 13 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) |
14 | #define PEBS_BUFFER_SIZE PAGE_SIZE | 14 | #define PEBS_BUFFER_SIZE PAGE_SIZE |
15 | #define PEBS_FIXUP_SIZE PAGE_SIZE | ||
15 | 16 | ||
16 | /* | 17 | /* |
17 | * pebs_record_32 for p4 and core not supported | 18 | * pebs_record_32 for p4 and core not supported |
@@ -182,18 +183,32 @@ struct pebs_record_nhm { | |||
182 | * Same as pebs_record_nhm, with two additional fields. | 183 | * Same as pebs_record_nhm, with two additional fields. |
183 | */ | 184 | */ |
184 | struct pebs_record_hsw { | 185 | struct pebs_record_hsw { |
185 | struct pebs_record_nhm nhm; | 186 | u64 flags, ip; |
186 | /* | 187 | u64 ax, bx, cx, dx; |
187 | * Real IP of the event. In the Intel documentation this | 188 | u64 si, di, bp, sp; |
188 | * is called eventingrip. | 189 | u64 r8, r9, r10, r11; |
189 | */ | 190 | u64 r12, r13, r14, r15; |
190 | u64 real_ip; | 191 | u64 status, dla, dse, lat; |
191 | /* | 192 | u64 real_ip, tsx_tuning; |
192 | * TSX tuning information field: abort cycles and abort flags. | 193 | }; |
193 | */ | 194 | |
194 | u64 tsx_tuning; | 195 | union hsw_tsx_tuning { |
196 | struct { | ||
197 | u32 cycles_last_block : 32, | ||
198 | hle_abort : 1, | ||
199 | rtm_abort : 1, | ||
200 | instruction_abort : 1, | ||
201 | non_instruction_abort : 1, | ||
202 | retry : 1, | ||
203 | data_conflict : 1, | ||
204 | capacity_writes : 1, | ||
205 | capacity_reads : 1; | ||
206 | }; | ||
207 | u64 value; | ||
195 | }; | 208 | }; |
196 | 209 | ||
210 | #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL | ||
211 | |||
197 | void init_debug_store_on_cpu(int cpu) | 212 | void init_debug_store_on_cpu(int cpu) |
198 | { | 213 | { |
199 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 214 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
@@ -214,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu) | |||
214 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | 229 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); |
215 | } | 230 | } |
216 | 231 | ||
232 | static DEFINE_PER_CPU(void *, insn_buffer); | ||
233 | |||
217 | static int alloc_pebs_buffer(int cpu) | 234 | static int alloc_pebs_buffer(int cpu) |
218 | { | 235 | { |
219 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 236 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
220 | int node = cpu_to_node(cpu); | 237 | int node = cpu_to_node(cpu); |
221 | int max, thresh = 1; /* always use a single PEBS record */ | 238 | int max, thresh = 1; /* always use a single PEBS record */ |
222 | void *buffer; | 239 | void *buffer, *ibuffer; |
223 | 240 | ||
224 | if (!x86_pmu.pebs) | 241 | if (!x86_pmu.pebs) |
225 | return 0; | 242 | return 0; |
@@ -228,6 +245,19 @@ static int alloc_pebs_buffer(int cpu) | |||
228 | if (unlikely(!buffer)) | 245 | if (unlikely(!buffer)) |
229 | return -ENOMEM; | 246 | return -ENOMEM; |
230 | 247 | ||
248 | /* | ||
249 | * HSW+ already provides us the eventing ip; no need to allocate this | ||
250 | * buffer then. | ||
251 | */ | ||
252 | if (x86_pmu.intel_cap.pebs_format < 2) { | ||
253 | ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); | ||
254 | if (!ibuffer) { | ||
255 | kfree(buffer); | ||
256 | return -ENOMEM; | ||
257 | } | ||
258 | per_cpu(insn_buffer, cpu) = ibuffer; | ||
259 | } | ||
260 | |||
231 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | 261 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; |
232 | 262 | ||
233 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | 263 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; |
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu) | |||
248 | if (!ds || !x86_pmu.pebs) | 278 | if (!ds || !x86_pmu.pebs) |
249 | return; | 279 | return; |
250 | 280 | ||
281 | kfree(per_cpu(insn_buffer, cpu)); | ||
282 | per_cpu(insn_buffer, cpu) = NULL; | ||
283 | |||
251 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | 284 | kfree((void *)(unsigned long)ds->pebs_buffer_base); |
252 | ds->pebs_buffer_base = 0; | 285 | ds->pebs_buffer_base = 0; |
253 | } | 286 | } |
@@ -715,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
715 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | 748 | unsigned long old_to, to = cpuc->lbr_entries[0].to; |
716 | unsigned long ip = regs->ip; | 749 | unsigned long ip = regs->ip; |
717 | int is_64bit = 0; | 750 | int is_64bit = 0; |
751 | void *kaddr; | ||
718 | 752 | ||
719 | /* | 753 | /* |
720 | * We don't need to fixup if the PEBS assist is fault like | 754 | * We don't need to fixup if the PEBS assist is fault like |
@@ -738,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
738 | * unsigned math, either ip is before the start (impossible) or | 772 | * unsigned math, either ip is before the start (impossible) or |
739 | * the basic block is larger than 1 page (sanity) | 773 | * the basic block is larger than 1 page (sanity) |
740 | */ | 774 | */ |
741 | if ((ip - to) > PAGE_SIZE) | 775 | if ((ip - to) > PEBS_FIXUP_SIZE) |
742 | return 0; | 776 | return 0; |
743 | 777 | ||
744 | /* | 778 | /* |
@@ -749,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
749 | return 1; | 783 | return 1; |
750 | } | 784 | } |
751 | 785 | ||
786 | if (!kernel_ip(ip)) { | ||
787 | int size, bytes; | ||
788 | u8 *buf = this_cpu_read(insn_buffer); | ||
789 | |||
790 | size = ip - to; /* Must fit our buffer, see above */ | ||
791 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
792 | if (bytes != 0) | ||
793 | return 0; | ||
794 | |||
795 | kaddr = buf; | ||
796 | } else { | ||
797 | kaddr = (void *)to; | ||
798 | } | ||
799 | |||
752 | do { | 800 | do { |
753 | struct insn insn; | 801 | struct insn insn; |
754 | u8 buf[MAX_INSN_SIZE]; | ||
755 | void *kaddr; | ||
756 | 802 | ||
757 | old_to = to; | 803 | old_to = to; |
758 | if (!kernel_ip(ip)) { | ||
759 | int bytes, size = MAX_INSN_SIZE; | ||
760 | |||
761 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
762 | if (bytes != size) | ||
763 | return 0; | ||
764 | |||
765 | kaddr = buf; | ||
766 | } else | ||
767 | kaddr = (void *)to; | ||
768 | 804 | ||
769 | #ifdef CONFIG_X86_64 | 805 | #ifdef CONFIG_X86_64 |
770 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); | 806 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); |
771 | #endif | 807 | #endif |
772 | insn_init(&insn, kaddr, is_64bit); | 808 | insn_init(&insn, kaddr, is_64bit); |
773 | insn_get_length(&insn); | 809 | insn_get_length(&insn); |
810 | |||
774 | to += insn.length; | 811 | to += insn.length; |
812 | kaddr += insn.length; | ||
775 | } while (to < ip); | 813 | } while (to < ip); |
776 | 814 | ||
777 | if (to == ip) { | 815 | if (to == ip) { |
@@ -786,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
786 | return 0; | 824 | return 0; |
787 | } | 825 | } |
788 | 826 | ||
827 | static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) | ||
828 | { | ||
829 | if (pebs->tsx_tuning) { | ||
830 | union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; | ||
831 | return tsx.cycles_last_block; | ||
832 | } | ||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) | ||
837 | { | ||
838 | u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; | ||
839 | |||
840 | /* For RTM XABORTs also log the abort code from AX */ | ||
841 | if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) | ||
842 | txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; | ||
843 | return txn; | ||
844 | } | ||
845 | |||
789 | static void __intel_pmu_pebs_event(struct perf_event *event, | 846 | static void __intel_pmu_pebs_event(struct perf_event *event, |
790 | struct pt_regs *iregs, void *__pebs) | 847 | struct pt_regs *iregs, void *__pebs) |
791 | { | 848 | { |
792 | /* | 849 | /* |
793 | * We cast to pebs_record_nhm to get the load latency data | 850 | * We cast to the biggest pebs_record but are careful not to |
794 | * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used | 851 | * unconditionally access the 'extra' entries. |
795 | */ | 852 | */ |
796 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 853 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
797 | struct pebs_record_nhm *pebs = __pebs; | 854 | struct pebs_record_hsw *pebs = __pebs; |
798 | struct pebs_record_hsw *pebs_hsw = __pebs; | ||
799 | struct perf_sample_data data; | 855 | struct perf_sample_data data; |
800 | struct pt_regs regs; | 856 | struct pt_regs regs; |
801 | u64 sample_type; | 857 | u64 sample_type; |
@@ -854,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
854 | regs.sp = pebs->sp; | 910 | regs.sp = pebs->sp; |
855 | 911 | ||
856 | if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { | 912 | if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { |
857 | regs.ip = pebs_hsw->real_ip; | 913 | regs.ip = pebs->real_ip; |
858 | regs.flags |= PERF_EFLAGS_EXACT; | 914 | regs.flags |= PERF_EFLAGS_EXACT; |
859 | } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | 915 | } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) |
860 | regs.flags |= PERF_EFLAGS_EXACT; | 916 | regs.flags |= PERF_EFLAGS_EXACT; |
@@ -862,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
862 | regs.flags &= ~PERF_EFLAGS_EXACT; | 918 | regs.flags &= ~PERF_EFLAGS_EXACT; |
863 | 919 | ||
864 | if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && | 920 | if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && |
865 | x86_pmu.intel_cap.pebs_format >= 1) | 921 | x86_pmu.intel_cap.pebs_format >= 1) |
866 | data.addr = pebs->dla; | 922 | data.addr = pebs->dla; |
867 | 923 | ||
924 | if (x86_pmu.intel_cap.pebs_format >= 2) { | ||
925 | /* Only set the TSX weight when no memory weight. */ | ||
926 | if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) | ||
927 | data.weight = intel_hsw_weight(pebs); | ||
928 | |||
929 | if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) | ||
930 | data.txn = intel_hsw_transaction(pebs); | ||
931 | } | ||
932 | |||
868 | if (has_branch_stack(event)) | 933 | if (has_branch_stack(event)) |
869 | data.br_stack = &cpuc->lbr_stack; | 934 | data.br_stack = &cpuc->lbr_stack; |
870 | 935 | ||
@@ -913,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
913 | __intel_pmu_pebs_event(event, iregs, at); | 978 | __intel_pmu_pebs_event(event, iregs, at); |
914 | } | 979 | } |
915 | 980 | ||
916 | static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, | 981 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) |
917 | void *top) | ||
918 | { | 982 | { |
919 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 983 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
920 | struct debug_store *ds = cpuc->ds; | 984 | struct debug_store *ds = cpuc->ds; |
921 | struct perf_event *event = NULL; | 985 | struct perf_event *event = NULL; |
986 | void *at, *top; | ||
922 | u64 status = 0; | 987 | u64 status = 0; |
923 | int bit; | 988 | int bit; |
924 | 989 | ||
990 | if (!x86_pmu.pebs_active) | ||
991 | return; | ||
992 | |||
993 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
994 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
995 | |||
925 | ds->pebs_index = ds->pebs_buffer_base; | 996 | ds->pebs_index = ds->pebs_buffer_base; |
926 | 997 | ||
998 | if (unlikely(at > top)) | ||
999 | return; | ||
1000 | |||
1001 | /* | ||
1002 | * Should not happen, we program the threshold at 1 and do not | ||
1003 | * set a reset value. | ||
1004 | */ | ||
1005 | WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, | ||
1006 | "Unexpected number of pebs records %ld\n", | ||
1007 | (long)(top - at) / x86_pmu.pebs_record_size); | ||
1008 | |||
927 | for (; at < top; at += x86_pmu.pebs_record_size) { | 1009 | for (; at < top; at += x86_pmu.pebs_record_size) { |
928 | struct pebs_record_nhm *p = at; | 1010 | struct pebs_record_nhm *p = at; |
929 | 1011 | ||
@@ -951,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, | |||
951 | } | 1033 | } |
952 | } | 1034 | } |
953 | 1035 | ||
954 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
955 | { | ||
956 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
957 | struct debug_store *ds = cpuc->ds; | ||
958 | struct pebs_record_nhm *at, *top; | ||
959 | int n; | ||
960 | |||
961 | if (!x86_pmu.pebs_active) | ||
962 | return; | ||
963 | |||
964 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
965 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
966 | |||
967 | ds->pebs_index = ds->pebs_buffer_base; | ||
968 | |||
969 | n = top - at; | ||
970 | if (n <= 0) | ||
971 | return; | ||
972 | |||
973 | /* | ||
974 | * Should not happen, we program the threshold at 1 and do not | ||
975 | * set a reset value. | ||
976 | */ | ||
977 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
978 | "Unexpected number of pebs records %d\n", n); | ||
979 | |||
980 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
981 | } | ||
982 | |||
983 | static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) | ||
984 | { | ||
985 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
986 | struct debug_store *ds = cpuc->ds; | ||
987 | struct pebs_record_hsw *at, *top; | ||
988 | int n; | ||
989 | |||
990 | if (!x86_pmu.pebs_active) | ||
991 | return; | ||
992 | |||
993 | at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; | ||
994 | top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; | ||
995 | |||
996 | n = top - at; | ||
997 | if (n <= 0) | ||
998 | return; | ||
999 | /* | ||
1000 | * Should not happen, we program the threshold at 1 and do not | ||
1001 | * set a reset value. | ||
1002 | */ | ||
1003 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
1004 | "Unexpected number of pebs records %d\n", n); | ||
1005 | |||
1006 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
1007 | } | ||
1008 | |||
1009 | /* | 1036 | /* |
1010 | * BTS, PEBS probe and setup | 1037 | * BTS, PEBS probe and setup |
1011 | */ | 1038 | */ |
@@ -1040,7 +1067,7 @@ void intel_ds_init(void) | |||
1040 | case 2: | 1067 | case 2: |
1041 | pr_cont("PEBS fmt2%c, ", pebs_type); | 1068 | pr_cont("PEBS fmt2%c, ", pebs_type); |
1042 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); | 1069 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); |
1043 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; | 1070 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
1044 | break; | 1071 | break; |
1045 | 1072 | ||
1046 | default: | 1073 | default: |