aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel_ds.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_ds.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c203
1 files changed, 115 insertions, 88 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ab3ba1c1b7dd..ae96cfa5eddd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -12,6 +12,7 @@
12 12
13#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) 13#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
14#define PEBS_BUFFER_SIZE PAGE_SIZE 14#define PEBS_BUFFER_SIZE PAGE_SIZE
15#define PEBS_FIXUP_SIZE PAGE_SIZE
15 16
16/* 17/*
17 * pebs_record_32 for p4 and core not supported 18 * pebs_record_32 for p4 and core not supported
@@ -182,18 +183,32 @@ struct pebs_record_nhm {
182 * Same as pebs_record_nhm, with two additional fields. 183 * Same as pebs_record_nhm, with two additional fields.
183 */ 184 */
184struct pebs_record_hsw { 185struct pebs_record_hsw {
185 struct pebs_record_nhm nhm; 186 u64 flags, ip;
186 /* 187 u64 ax, bx, cx, dx;
187 * Real IP of the event. In the Intel documentation this 188 u64 si, di, bp, sp;
188 * is called eventingrip. 189 u64 r8, r9, r10, r11;
189 */ 190 u64 r12, r13, r14, r15;
190 u64 real_ip; 191 u64 status, dla, dse, lat;
191 /* 192 u64 real_ip, tsx_tuning;
192 * TSX tuning information field: abort cycles and abort flags. 193};
193 */ 194
194 u64 tsx_tuning; 195union hsw_tsx_tuning {
196 struct {
197 u32 cycles_last_block : 32,
198 hle_abort : 1,
199 rtm_abort : 1,
200 instruction_abort : 1,
201 non_instruction_abort : 1,
202 retry : 1,
203 data_conflict : 1,
204 capacity_writes : 1,
205 capacity_reads : 1;
206 };
207 u64 value;
195}; 208};
196 209
210#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
211
197void init_debug_store_on_cpu(int cpu) 212void init_debug_store_on_cpu(int cpu)
198{ 213{
199 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 214 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -214,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu)
214 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 229 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
215} 230}
216 231
232static DEFINE_PER_CPU(void *, insn_buffer);
233
217static int alloc_pebs_buffer(int cpu) 234static int alloc_pebs_buffer(int cpu)
218{ 235{
219 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 236 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
220 int node = cpu_to_node(cpu); 237 int node = cpu_to_node(cpu);
221 int max, thresh = 1; /* always use a single PEBS record */ 238 int max, thresh = 1; /* always use a single PEBS record */
222 void *buffer; 239 void *buffer, *ibuffer;
223 240
224 if (!x86_pmu.pebs) 241 if (!x86_pmu.pebs)
225 return 0; 242 return 0;
@@ -228,6 +245,19 @@ static int alloc_pebs_buffer(int cpu)
228 if (unlikely(!buffer)) 245 if (unlikely(!buffer))
229 return -ENOMEM; 246 return -ENOMEM;
230 247
248 /*
249 * HSW+ already provides us the eventing ip; no need to allocate this
250 * buffer then.
251 */
252 if (x86_pmu.intel_cap.pebs_format < 2) {
253 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
254 if (!ibuffer) {
255 kfree(buffer);
256 return -ENOMEM;
257 }
258 per_cpu(insn_buffer, cpu) = ibuffer;
259 }
260
231 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; 261 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
232 262
233 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 263 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu)
248 if (!ds || !x86_pmu.pebs) 278 if (!ds || !x86_pmu.pebs)
249 return; 279 return;
250 280
281 kfree(per_cpu(insn_buffer, cpu));
282 per_cpu(insn_buffer, cpu) = NULL;
283
251 kfree((void *)(unsigned long)ds->pebs_buffer_base); 284 kfree((void *)(unsigned long)ds->pebs_buffer_base);
252 ds->pebs_buffer_base = 0; 285 ds->pebs_buffer_base = 0;
253} 286}
@@ -715,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
715 unsigned long old_to, to = cpuc->lbr_entries[0].to; 748 unsigned long old_to, to = cpuc->lbr_entries[0].to;
716 unsigned long ip = regs->ip; 749 unsigned long ip = regs->ip;
717 int is_64bit = 0; 750 int is_64bit = 0;
751 void *kaddr;
718 752
719 /* 753 /*
720 * We don't need to fixup if the PEBS assist is fault like 754 * We don't need to fixup if the PEBS assist is fault like
@@ -738,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
738 * unsigned math, either ip is before the start (impossible) or 772 * unsigned math, either ip is before the start (impossible) or
739 * the basic block is larger than 1 page (sanity) 773 * the basic block is larger than 1 page (sanity)
740 */ 774 */
741 if ((ip - to) > PAGE_SIZE) 775 if ((ip - to) > PEBS_FIXUP_SIZE)
742 return 0; 776 return 0;
743 777
744 /* 778 /*
@@ -749,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
749 return 1; 783 return 1;
750 } 784 }
751 785
786 if (!kernel_ip(ip)) {
787 int size, bytes;
788 u8 *buf = this_cpu_read(insn_buffer);
789
790 size = ip - to; /* Must fit our buffer, see above */
791 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
792 if (bytes != 0)
793 return 0;
794
795 kaddr = buf;
796 } else {
797 kaddr = (void *)to;
798 }
799
752 do { 800 do {
753 struct insn insn; 801 struct insn insn;
754 u8 buf[MAX_INSN_SIZE];
755 void *kaddr;
756 802
757 old_to = to; 803 old_to = to;
758 if (!kernel_ip(ip)) {
759 int bytes, size = MAX_INSN_SIZE;
760
761 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
762 if (bytes != size)
763 return 0;
764
765 kaddr = buf;
766 } else
767 kaddr = (void *)to;
768 804
769#ifdef CONFIG_X86_64 805#ifdef CONFIG_X86_64
770 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 806 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
771#endif 807#endif
772 insn_init(&insn, kaddr, is_64bit); 808 insn_init(&insn, kaddr, is_64bit);
773 insn_get_length(&insn); 809 insn_get_length(&insn);
810
774 to += insn.length; 811 to += insn.length;
812 kaddr += insn.length;
775 } while (to < ip); 813 } while (to < ip);
776 814
777 if (to == ip) { 815 if (to == ip) {
@@ -786,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
786 return 0; 824 return 0;
787} 825}
788 826
827static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
828{
829 if (pebs->tsx_tuning) {
830 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
831 return tsx.cycles_last_block;
832 }
833 return 0;
834}
835
836static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
837{
838 u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
839
840 /* For RTM XABORTs also log the abort code from AX */
841 if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
842 txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
843 return txn;
844}
845
789static void __intel_pmu_pebs_event(struct perf_event *event, 846static void __intel_pmu_pebs_event(struct perf_event *event,
790 struct pt_regs *iregs, void *__pebs) 847 struct pt_regs *iregs, void *__pebs)
791{ 848{
792 /* 849 /*
793 * We cast to pebs_record_nhm to get the load latency data 850 * We cast to the biggest pebs_record but are careful not to
794 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used 851 * unconditionally access the 'extra' entries.
795 */ 852 */
796 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 853 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
797 struct pebs_record_nhm *pebs = __pebs; 854 struct pebs_record_hsw *pebs = __pebs;
798 struct pebs_record_hsw *pebs_hsw = __pebs;
799 struct perf_sample_data data; 855 struct perf_sample_data data;
800 struct pt_regs regs; 856 struct pt_regs regs;
801 u64 sample_type; 857 u64 sample_type;
@@ -854,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
854 regs.sp = pebs->sp; 910 regs.sp = pebs->sp;
855 911
856 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 912 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
857 regs.ip = pebs_hsw->real_ip; 913 regs.ip = pebs->real_ip;
858 regs.flags |= PERF_EFLAGS_EXACT; 914 regs.flags |= PERF_EFLAGS_EXACT;
859 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 915 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
860 regs.flags |= PERF_EFLAGS_EXACT; 916 regs.flags |= PERF_EFLAGS_EXACT;
@@ -862,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
862 regs.flags &= ~PERF_EFLAGS_EXACT; 918 regs.flags &= ~PERF_EFLAGS_EXACT;
863 919
864 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && 920 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
865 x86_pmu.intel_cap.pebs_format >= 1) 921 x86_pmu.intel_cap.pebs_format >= 1)
866 data.addr = pebs->dla; 922 data.addr = pebs->dla;
867 923
924 if (x86_pmu.intel_cap.pebs_format >= 2) {
925 /* Only set the TSX weight when no memory weight. */
926 if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
927 data.weight = intel_hsw_weight(pebs);
928
929 if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
930 data.txn = intel_hsw_transaction(pebs);
931 }
932
868 if (has_branch_stack(event)) 933 if (has_branch_stack(event))
869 data.br_stack = &cpuc->lbr_stack; 934 data.br_stack = &cpuc->lbr_stack;
870 935
@@ -913,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
913 __intel_pmu_pebs_event(event, iregs, at); 978 __intel_pmu_pebs_event(event, iregs, at);
914} 979}
915 980
916static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, 981static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
917 void *top)
918{ 982{
919 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 983 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
920 struct debug_store *ds = cpuc->ds; 984 struct debug_store *ds = cpuc->ds;
921 struct perf_event *event = NULL; 985 struct perf_event *event = NULL;
986 void *at, *top;
922 u64 status = 0; 987 u64 status = 0;
923 int bit; 988 int bit;
924 989
990 if (!x86_pmu.pebs_active)
991 return;
992
993 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
994 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
995
925 ds->pebs_index = ds->pebs_buffer_base; 996 ds->pebs_index = ds->pebs_buffer_base;
926 997
998 if (unlikely(at > top))
999 return;
1000
1001 /*
1002 * Should not happen, we program the threshold at 1 and do not
1003 * set a reset value.
1004 */
1005 WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
1006 "Unexpected number of pebs records %ld\n",
1007 (long)(top - at) / x86_pmu.pebs_record_size);
1008
927 for (; at < top; at += x86_pmu.pebs_record_size) { 1009 for (; at < top; at += x86_pmu.pebs_record_size) {
928 struct pebs_record_nhm *p = at; 1010 struct pebs_record_nhm *p = at;
929 1011
@@ -951,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
951 } 1033 }
952} 1034}
953 1035
954static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
955{
956 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
957 struct debug_store *ds = cpuc->ds;
958 struct pebs_record_nhm *at, *top;
959 int n;
960
961 if (!x86_pmu.pebs_active)
962 return;
963
964 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
965 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
966
967 ds->pebs_index = ds->pebs_buffer_base;
968
969 n = top - at;
970 if (n <= 0)
971 return;
972
973 /*
974 * Should not happen, we program the threshold at 1 and do not
975 * set a reset value.
976 */
977 WARN_ONCE(n > x86_pmu.max_pebs_events,
978 "Unexpected number of pebs records %d\n", n);
979
980 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
981}
982
983static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
984{
985 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
986 struct debug_store *ds = cpuc->ds;
987 struct pebs_record_hsw *at, *top;
988 int n;
989
990 if (!x86_pmu.pebs_active)
991 return;
992
993 at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
994 top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
995
996 n = top - at;
997 if (n <= 0)
998 return;
999 /*
1000 * Should not happen, we program the threshold at 1 and do not
1001 * set a reset value.
1002 */
1003 WARN_ONCE(n > x86_pmu.max_pebs_events,
1004 "Unexpected number of pebs records %d\n", n);
1005
1006 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
1007}
1008
1009/* 1036/*
1010 * BTS, PEBS probe and setup 1037 * BTS, PEBS probe and setup
1011 */ 1038 */
@@ -1040,7 +1067,7 @@ void intel_ds_init(void)
1040 case 2: 1067 case 2:
1041 pr_cont("PEBS fmt2%c, ", pebs_type); 1068 pr_cont("PEBS fmt2%c, ", pebs_type);
1042 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 1069 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1043 x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; 1070 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1044 break; 1071 break;
1045 1072
1046 default: 1073 default: