aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/perf_event.h19
-rw-r--r--arch/x86/kernel/cpu/perf_event.c338
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c222
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c673
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c221
-rw-r--r--arch/x86/lib/Makefile2
9 files changed, 1179 insertions, 303 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e98440371525..e1240f652a9b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -58,6 +58,9 @@ config X86
58 select HAVE_ARCH_KMEMCHECK 58 select HAVE_ARCH_KMEMCHECK
59 select HAVE_USER_RETURN_NOTIFIER 59 select HAVE_USER_RETURN_NOTIFIER
60 60
61config INSTRUCTION_DECODER
62 def_bool (KPROBES || PERF_EVENTS)
63
61config OUTPUT_FORMAT 64config OUTPUT_FORMAT
62 string 65 string
63 default "elf32-i386" if X86_32 66 default "elf32-i386" if X86_32
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 96c2e0ad04ca..88c765e16410 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -68,6 +68,8 @@ struct insn {
68 const insn_byte_t *next_byte; 68 const insn_byte_t *next_byte;
69}; 69};
70 70
71#define MAX_INSN_SIZE 16
72
71#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) 73#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
72#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) 74#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
73#define X86_MODRM_RM(modrm) ((modrm) & 0x07) 75#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4ffa345a8ccb..547882539157 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -24,6 +24,7 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/ptrace.h> 25#include <linux/ptrace.h>
26#include <linux/percpu.h> 26#include <linux/percpu.h>
27#include <asm/insn.h>
27 28
28#define __ARCH_WANT_KPROBES_INSN_SLOT 29#define __ARCH_WANT_KPROBES_INSN_SLOT
29 30
@@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t;
36#define RELATIVEJUMP_SIZE 5 37#define RELATIVEJUMP_SIZE 5
37#define RELATIVECALL_OPCODE 0xe8 38#define RELATIVECALL_OPCODE 0xe8
38#define RELATIVE_ADDR_SIZE 4 39#define RELATIVE_ADDR_SIZE 4
39#define MAX_INSN_SIZE 16
40#define MAX_STACK_SIZE 64 40#define MAX_STACK_SIZE 64
41#define MIN_STACK_SIZE(ADDR) \ 41#define MIN_STACK_SIZE(ADDR) \
42 (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ 42 (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index db6109a885a7..a9038c951619 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void);
136 136
137#define PERF_EVENT_INDEX_OFFSET 0 137#define PERF_EVENT_INDEX_OFFSET 0
138 138
139/*
140 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
141 * This flag is otherwise unused and ABI specified to be 0, so nobody should
142 * care what we do with it.
143 */
144#define PERF_EFLAGS_EXACT (1UL << 3)
145
146#define perf_misc_flags(regs) \
147({ int misc = 0; \
148 if (user_mode(regs)) \
149 misc |= PERF_RECORD_MISC_USER; \
150 else \
151 misc |= PERF_RECORD_MISC_KERNEL; \
152 if (regs->flags & PERF_EFLAGS_EXACT) \
153 misc |= PERF_RECORD_MISC_EXACT; \
154 misc; })
155
156#define perf_instruction_pointer(regs) ((regs)->ip)
157
139#else 158#else
140static inline void init_hw_perf_events(void) { } 159static inline void init_hw_perf_events(void) { }
141static inline void perf_events_lapic_init(void) { } 160static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 7645faea8e85..a6d92c34135c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -29,46 +29,53 @@
29#include <asm/stacktrace.h> 29#include <asm/stacktrace.h>
30#include <asm/nmi.h> 30#include <asm/nmi.h>
31 31
32static u64 perf_event_mask __read_mostly; 32#if 0
33#undef wrmsrl
34#define wrmsrl(msr, val) \
35do { \
36 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
37 (unsigned long)(val)); \
38 native_write_msr((msr), (u32)((u64)(val)), \
39 (u32)((u64)(val) >> 32)); \
40} while (0)
41#endif
42
43/*
44 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
45 */
46static unsigned long
47copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
48{
49 unsigned long offset, addr = (unsigned long)from;
50 int type = in_nmi() ? KM_NMI : KM_IRQ0;
51 unsigned long size, len = 0;
52 struct page *page;
53 void *map;
54 int ret;
33 55
34/* The maximal number of PEBS events: */ 56 do {
35#define MAX_PEBS_EVENTS 4 57 ret = __get_user_pages_fast(addr, 1, 0, &page);
58 if (!ret)
59 break;
36 60
37/* The size of a BTS record in bytes: */ 61 offset = addr & (PAGE_SIZE - 1);
38#define BTS_RECORD_SIZE 24 62 size = min(PAGE_SIZE - offset, n - len);
39 63
40/* The size of a per-cpu BTS buffer in bytes: */ 64 map = kmap_atomic(page, type);
41#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) 65 memcpy(to, map+offset, size);
66 kunmap_atomic(map, type);
67 put_page(page);
42 68
43/* The BTS overflow threshold in bytes from the end of the buffer: */ 69 len += size;
44#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) 70 to += size;
71 addr += size;
45 72
73 } while (len < n);
46 74
47/* 75 return len;
48 * Bits in the debugctlmsr controlling branch tracing. 76}
49 */
50#define X86_DEBUGCTL_TR (1 << 6)
51#define X86_DEBUGCTL_BTS (1 << 7)
52#define X86_DEBUGCTL_BTINT (1 << 8)
53#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
54#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
55 77
56/* 78static u64 perf_event_mask __read_mostly;
57 * A debug store configuration.
58 *
59 * We only support architectures that use 64bit fields.
60 */
61struct debug_store {
62 u64 bts_buffer_base;
63 u64 bts_index;
64 u64 bts_absolute_maximum;
65 u64 bts_interrupt_threshold;
66 u64 pebs_buffer_base;
67 u64 pebs_index;
68 u64 pebs_absolute_maximum;
69 u64 pebs_interrupt_threshold;
70 u64 pebs_event_reset[MAX_PEBS_EVENTS];
71};
72 79
73struct event_constraint { 80struct event_constraint {
74 union { 81 union {
@@ -87,18 +94,40 @@ struct amd_nb {
87 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
88}; 95};
89 96
97#define MAX_LBR_ENTRIES 16
98
90struct cpu_hw_events { 99struct cpu_hw_events {
100 /*
101 * Generic x86 PMC bits
102 */
91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 103 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 104 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
93 unsigned long interrupts; 105 unsigned long interrupts;
94 int enabled; 106 int enabled;
95 struct debug_store *ds;
96 107
97 int n_events; 108 int n_events;
98 int n_added; 109 int n_added;
99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 110 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
100 u64 tags[X86_PMC_IDX_MAX]; 111 u64 tags[X86_PMC_IDX_MAX];
101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 112 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
113
114 /*
115 * Intel DebugStore bits
116 */
117 struct debug_store *ds;
118 u64 pebs_enabled;
119
120 /*
121 * Intel LBR bits
122 */
123 int lbr_users;
124 void *lbr_context;
125 struct perf_branch_stack lbr_stack;
126 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
127
128 /*
129 * AMD specific bits
130 */
102 struct amd_nb *amd_nb; 131 struct amd_nb *amd_nb;
103}; 132};
104 133
@@ -112,22 +141,48 @@ struct cpu_hw_events {
112#define EVENT_CONSTRAINT(c, n, m) \ 141#define EVENT_CONSTRAINT(c, n, m) \
113 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 142 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
114 143
144/*
145 * Constraint on the Event code.
146 */
115#define INTEL_EVENT_CONSTRAINT(c, n) \ 147#define INTEL_EVENT_CONSTRAINT(c, n) \
116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 148 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
117 149
150/*
151 * Constraint on the Event code + UMask + fixed-mask
152 */
118#define FIXED_EVENT_CONSTRAINT(c, n) \ 153#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 154 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
120 155
156/*
157 * Constraint on the Event code + UMask
158 */
159#define PEBS_EVENT_CONSTRAINT(c, n) \
160 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
161
121#define EVENT_CONSTRAINT_END \ 162#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0) 163 EVENT_CONSTRAINT(0, 0, 0)
123 164
124#define for_each_event_constraint(e, c) \ 165#define for_each_event_constraint(e, c) \
125 for ((e) = (c); (e)->cmask; (e)++) 166 for ((e) = (c); (e)->cmask; (e)++)
126 167
168union perf_capabilities {
169 struct {
170 u64 lbr_format : 6;
171 u64 pebs_trap : 1;
172 u64 pebs_arch_reg : 1;
173 u64 pebs_format : 4;
174 u64 smm_freeze : 1;
175 };
176 u64 capabilities;
177};
178
127/* 179/*
128 * struct x86_pmu - generic x86 pmu 180 * struct x86_pmu - generic x86 pmu
129 */ 181 */
130struct x86_pmu { 182struct x86_pmu {
183 /*
184 * Generic x86 PMC bits
185 */
131 const char *name; 186 const char *name;
132 int version; 187 int version;
133 int (*handle_irq)(struct pt_regs *); 188 int (*handle_irq)(struct pt_regs *);
@@ -146,10 +201,6 @@ struct x86_pmu {
146 u64 event_mask; 201 u64 event_mask;
147 int apic; 202 int apic;
148 u64 max_period; 203 u64 max_period;
149 u64 intel_ctrl;
150 void (*enable_bts)(u64 config);
151 void (*disable_bts)(void);
152
153 struct event_constraint * 204 struct event_constraint *
154 (*get_event_constraints)(struct cpu_hw_events *cpuc, 205 (*get_event_constraints)(struct cpu_hw_events *cpuc,
155 struct perf_event *event); 206 struct perf_event *event);
@@ -157,11 +208,32 @@ struct x86_pmu {
157 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 208 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
158 struct perf_event *event); 209 struct perf_event *event);
159 struct event_constraint *event_constraints; 210 struct event_constraint *event_constraints;
211 void (*quirks)(void);
160 212
161 void (*cpu_prepare)(int cpu); 213 void (*cpu_prepare)(int cpu);
162 void (*cpu_starting)(int cpu); 214 void (*cpu_starting)(int cpu);
163 void (*cpu_dying)(int cpu); 215 void (*cpu_dying)(int cpu);
164 void (*cpu_dead)(int cpu); 216 void (*cpu_dead)(int cpu);
217
218 /*
219 * Intel Arch Perfmon v2+
220 */
221 u64 intel_ctrl;
222 union perf_capabilities intel_cap;
223
224 /*
225 * Intel DebugStore bits
226 */
227 int bts, pebs;
228 int pebs_record_size;
229 void (*drain_pebs)(struct pt_regs *regs);
230 struct event_constraint *pebs_constraints;
231
232 /*
233 * Intel LBR
234 */
235 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
236 int lbr_nr; /* hardware stack size */
165}; 237};
166 238
167static struct x86_pmu x86_pmu __read_mostly; 239static struct x86_pmu x86_pmu __read_mostly;
@@ -293,110 +365,14 @@ static void release_pmc_hardware(void)
293#endif 365#endif
294} 366}
295 367
296static inline bool bts_available(void) 368static int reserve_ds_buffers(void);
297{ 369static void release_ds_buffers(void);
298 return x86_pmu.enable_bts != NULL;
299}
300
301static void init_debug_store_on_cpu(int cpu)
302{
303 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
304
305 if (!ds)
306 return;
307
308 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
309 (u32)((u64)(unsigned long)ds),
310 (u32)((u64)(unsigned long)ds >> 32));
311}
312
313static void fini_debug_store_on_cpu(int cpu)
314{
315 if (!per_cpu(cpu_hw_events, cpu).ds)
316 return;
317
318 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
319}
320
321static void release_bts_hardware(void)
322{
323 int cpu;
324
325 if (!bts_available())
326 return;
327
328 get_online_cpus();
329
330 for_each_online_cpu(cpu)
331 fini_debug_store_on_cpu(cpu);
332
333 for_each_possible_cpu(cpu) {
334 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
335
336 if (!ds)
337 continue;
338
339 per_cpu(cpu_hw_events, cpu).ds = NULL;
340
341 kfree((void *)(unsigned long)ds->bts_buffer_base);
342 kfree(ds);
343 }
344
345 put_online_cpus();
346}
347
348static int reserve_bts_hardware(void)
349{
350 int cpu, err = 0;
351
352 if (!bts_available())
353 return 0;
354
355 get_online_cpus();
356
357 for_each_possible_cpu(cpu) {
358 struct debug_store *ds;
359 void *buffer;
360
361 err = -ENOMEM;
362 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
363 if (unlikely(!buffer))
364 break;
365
366 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
367 if (unlikely(!ds)) {
368 kfree(buffer);
369 break;
370 }
371
372 ds->bts_buffer_base = (u64)(unsigned long)buffer;
373 ds->bts_index = ds->bts_buffer_base;
374 ds->bts_absolute_maximum =
375 ds->bts_buffer_base + BTS_BUFFER_SIZE;
376 ds->bts_interrupt_threshold =
377 ds->bts_absolute_maximum - BTS_OVFL_TH;
378
379 per_cpu(cpu_hw_events, cpu).ds = ds;
380 err = 0;
381 }
382
383 if (err)
384 release_bts_hardware();
385 else {
386 for_each_online_cpu(cpu)
387 init_debug_store_on_cpu(cpu);
388 }
389
390 put_online_cpus();
391
392 return err;
393}
394 370
395static void hw_perf_event_destroy(struct perf_event *event) 371static void hw_perf_event_destroy(struct perf_event *event)
396{ 372{
397 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 373 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
398 release_pmc_hardware(); 374 release_pmc_hardware();
399 release_bts_hardware(); 375 release_ds_buffers();
400 mutex_unlock(&pmc_reserve_mutex); 376 mutex_unlock(&pmc_reserve_mutex);
401 } 377 }
402} 378}
@@ -459,7 +435,7 @@ static int __hw_perf_event_init(struct perf_event *event)
459 if (!reserve_pmc_hardware()) 435 if (!reserve_pmc_hardware())
460 err = -EBUSY; 436 err = -EBUSY;
461 else 437 else
462 err = reserve_bts_hardware(); 438 err = reserve_ds_buffers();
463 } 439 }
464 if (!err) 440 if (!err)
465 atomic_inc(&active_events); 441 atomic_inc(&active_events);
@@ -537,7 +513,7 @@ static int __hw_perf_event_init(struct perf_event *event)
537 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 513 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
538 (hwc->sample_period == 1)) { 514 (hwc->sample_period == 1)) {
539 /* BTS is not supported by this architecture. */ 515 /* BTS is not supported by this architecture. */
540 if (!bts_available()) 516 if (!x86_pmu.bts)
541 return -EOPNOTSUPP; 517 return -EOPNOTSUPP;
542 518
543 /* BTS is currently only allowed for user-mode. */ 519 /* BTS is currently only allowed for user-mode. */
@@ -850,14 +826,15 @@ void hw_perf_enable(void)
850 826
851static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) 827static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
852{ 828{
853 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 829 wrmsrl(hwc->config_base + hwc->idx,
854 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); 830 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
855} 831}
856 832
857static inline void x86_pmu_disable_event(struct perf_event *event) 833static inline void x86_pmu_disable_event(struct perf_event *event)
858{ 834{
859 struct hw_perf_event *hwc = &event->hw; 835 struct hw_perf_event *hwc = &event->hw;
860 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); 836
837 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
861} 838}
862 839
863static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 840static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -872,7 +849,7 @@ x86_perf_event_set_period(struct perf_event *event)
872 struct hw_perf_event *hwc = &event->hw; 849 struct hw_perf_event *hwc = &event->hw;
873 s64 left = atomic64_read(&hwc->period_left); 850 s64 left = atomic64_read(&hwc->period_left);
874 s64 period = hwc->sample_period; 851 s64 period = hwc->sample_period;
875 int err, ret = 0, idx = hwc->idx; 852 int ret = 0, idx = hwc->idx;
876 853
877 if (idx == X86_PMC_IDX_FIXED_BTS) 854 if (idx == X86_PMC_IDX_FIXED_BTS)
878 return 0; 855 return 0;
@@ -910,8 +887,8 @@ x86_perf_event_set_period(struct perf_event *event)
910 */ 887 */
911 atomic64_set(&hwc->prev_count, (u64)-left); 888 atomic64_set(&hwc->prev_count, (u64)-left);
912 889
913 err = checking_wrmsrl(hwc->event_base + idx, 890 wrmsrl(hwc->event_base + idx,
914 (u64)(-left) & x86_pmu.event_mask); 891 (u64)(-left) & x86_pmu.event_mask);
915 892
916 perf_event_update_userpage(event); 893 perf_event_update_userpage(event);
917 894
@@ -989,6 +966,7 @@ static void x86_pmu_unthrottle(struct perf_event *event)
989void perf_event_print_debug(void) 966void perf_event_print_debug(void)
990{ 967{
991 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 968 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
969 u64 pebs;
992 struct cpu_hw_events *cpuc; 970 struct cpu_hw_events *cpuc;
993 unsigned long flags; 971 unsigned long flags;
994 int cpu, idx; 972 int cpu, idx;
@@ -1006,14 +984,16 @@ void perf_event_print_debug(void)
1006 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 984 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1007 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 985 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1008 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 986 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
987 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1009 988
1010 pr_info("\n"); 989 pr_info("\n");
1011 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 990 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1012 pr_info("CPU#%d: status: %016llx\n", cpu, status); 991 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1013 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 992 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1014 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 993 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
994 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1015 } 995 }
1016 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 996 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1017 997
1018 for (idx = 0; idx < x86_pmu.num_events; idx++) { 998 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1019 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 999 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1327,6 +1307,8 @@ undo:
1327 1307
1328#include "perf_event_amd.c" 1308#include "perf_event_amd.c"
1329#include "perf_event_p6.c" 1309#include "perf_event_p6.c"
1310#include "perf_event_intel_lbr.c"
1311#include "perf_event_intel_ds.c"
1330#include "perf_event_intel.c" 1312#include "perf_event_intel.c"
1331 1313
1332static int __cpuinit 1314static int __cpuinit
@@ -1398,6 +1380,9 @@ void __init init_hw_perf_events(void)
1398 1380
1399 pr_cont("%s PMU driver.\n", x86_pmu.name); 1381 pr_cont("%s PMU driver.\n", x86_pmu.name);
1400 1382
1383 if (x86_pmu.quirks)
1384 x86_pmu.quirks();
1385
1401 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1386 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
1402 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1387 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1403 x86_pmu.num_events, X86_PMC_MAX_GENERIC); 1388 x86_pmu.num_events, X86_PMC_MAX_GENERIC);
@@ -1459,6 +1444,32 @@ static const struct pmu pmu = {
1459}; 1444};
1460 1445
1461/* 1446/*
1447 * validate that we can schedule this event
1448 */
1449static int validate_event(struct perf_event *event)
1450{
1451 struct cpu_hw_events *fake_cpuc;
1452 struct event_constraint *c;
1453 int ret = 0;
1454
1455 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1456 if (!fake_cpuc)
1457 return -ENOMEM;
1458
1459 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1460
1461 if (!c || !c->weight)
1462 ret = -ENOSPC;
1463
1464 if (x86_pmu.put_event_constraints)
1465 x86_pmu.put_event_constraints(fake_cpuc, event);
1466
1467 kfree(fake_cpuc);
1468
1469 return ret;
1470}
1471
1472/*
1462 * validate a single event group 1473 * validate a single event group
1463 * 1474 *
1464 * validation include: 1475 * validation include:
@@ -1523,6 +1534,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1523 1534
1524 if (event->group_leader != event) 1535 if (event->group_leader != event)
1525 err = validate_group(event); 1536 err = validate_group(event);
1537 else
1538 err = validate_event(event);
1526 1539
1527 event->pmu = tmp; 1540 event->pmu = tmp;
1528 } 1541 }
@@ -1593,41 +1606,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1593 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1606 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1594} 1607}
1595 1608
1596/*
1597 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1598 */
1599static unsigned long
1600copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1601{
1602 unsigned long offset, addr = (unsigned long)from;
1603 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1604 unsigned long size, len = 0;
1605 struct page *page;
1606 void *map;
1607 int ret;
1608
1609 do {
1610 ret = __get_user_pages_fast(addr, 1, 0, &page);
1611 if (!ret)
1612 break;
1613
1614 offset = addr & (PAGE_SIZE - 1);
1615 size = min(PAGE_SIZE - offset, n - len);
1616
1617 map = kmap_atomic(page, type);
1618 memcpy(to, map+offset, size);
1619 kunmap_atomic(map, type);
1620 put_page(page);
1621
1622 len += size;
1623 to += size;
1624 addr += size;
1625
1626 } while (len < n);
1627
1628 return len;
1629}
1630
1631static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1609static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1632{ 1610{
1633 unsigned long bytes; 1611 unsigned long bytes;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 84bfde64a337..971dc6e7d54b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -470,42 +470,6 @@ static u64 intel_pmu_raw_event(u64 hw_event)
470 return hw_event & CORE_EVNTSEL_MASK; 470 return hw_event & CORE_EVNTSEL_MASK;
471} 471}
472 472
473static void intel_pmu_enable_bts(u64 config)
474{
475 unsigned long debugctlmsr;
476
477 debugctlmsr = get_debugctlmsr();
478
479 debugctlmsr |= X86_DEBUGCTL_TR;
480 debugctlmsr |= X86_DEBUGCTL_BTS;
481 debugctlmsr |= X86_DEBUGCTL_BTINT;
482
483 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
484 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
485
486 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
487 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
488
489 update_debugctlmsr(debugctlmsr);
490}
491
492static void intel_pmu_disable_bts(void)
493{
494 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
495 unsigned long debugctlmsr;
496
497 if (!cpuc->ds)
498 return;
499
500 debugctlmsr = get_debugctlmsr();
501
502 debugctlmsr &=
503 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
504 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
505
506 update_debugctlmsr(debugctlmsr);
507}
508
509static void intel_pmu_disable_all(void) 473static void intel_pmu_disable_all(void)
510{ 474{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 475 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -514,12 +478,17 @@ static void intel_pmu_disable_all(void)
514 478
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 479 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
516 intel_pmu_disable_bts(); 480 intel_pmu_disable_bts();
481
482 intel_pmu_pebs_disable_all();
483 intel_pmu_lbr_disable_all();
517} 484}
518 485
519static void intel_pmu_enable_all(void) 486static void intel_pmu_enable_all(void)
520{ 487{
521 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 488 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
522 489
490 intel_pmu_pebs_enable_all();
491 intel_pmu_lbr_enable_all();
523 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 492 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
524 493
525 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 494 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -547,8 +516,7 @@ static inline void intel_pmu_ack_status(u64 ack)
547 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 516 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
548} 517}
549 518
550static inline void 519static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
551intel_pmu_disable_fixed(struct hw_perf_event *hwc)
552{ 520{
553 int idx = hwc->idx - X86_PMC_IDX_FIXED; 521 int idx = hwc->idx - X86_PMC_IDX_FIXED;
554 u64 ctrl_val, mask; 522 u64 ctrl_val, mask;
@@ -557,71 +525,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc)
557 525
558 rdmsrl(hwc->config_base, ctrl_val); 526 rdmsrl(hwc->config_base, ctrl_val);
559 ctrl_val &= ~mask; 527 ctrl_val &= ~mask;
560 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 528 wrmsrl(hwc->config_base, ctrl_val);
561} 529}
562 530
563static void intel_pmu_drain_bts_buffer(void) 531static void intel_pmu_disable_event(struct perf_event *event)
564{
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct debug_store *ds = cpuc->ds;
567 struct bts_record {
568 u64 from;
569 u64 to;
570 u64 flags;
571 };
572 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
573 struct bts_record *at, *top;
574 struct perf_output_handle handle;
575 struct perf_event_header header;
576 struct perf_sample_data data;
577 struct pt_regs regs;
578
579 if (!event)
580 return;
581
582 if (!ds)
583 return;
584
585 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
586 top = (struct bts_record *)(unsigned long)ds->bts_index;
587
588 if (top <= at)
589 return;
590
591 ds->bts_index = ds->bts_buffer_base;
592
593 perf_sample_data_init(&data, 0);
594
595 data.period = event->hw.last_period;
596 regs.ip = 0;
597
598 /*
599 * Prepare a generic sample, i.e. fill in the invariant fields.
600 * We will overwrite the from and to address before we output
601 * the sample.
602 */
603 perf_prepare_sample(&header, &data, event, &regs);
604
605 if (perf_output_begin(&handle, event,
606 header.size * (top - at), 1, 1))
607 return;
608
609 for (; at < top; at++) {
610 data.ip = at->from;
611 data.addr = at->to;
612
613 perf_output_sample(&handle, &header, &data, event);
614 }
615
616 perf_output_end(&handle);
617
618 /* There's new data available. */
619 event->hw.interrupts++;
620 event->pending_kill = POLL_IN;
621}
622
623static inline void
624intel_pmu_disable_event(struct perf_event *event)
625{ 532{
626 struct hw_perf_event *hwc = &event->hw; 533 struct hw_perf_event *hwc = &event->hw;
627 534
@@ -637,14 +544,15 @@ intel_pmu_disable_event(struct perf_event *event)
637 } 544 }
638 545
639 x86_pmu_disable_event(event); 546 x86_pmu_disable_event(event);
547
548 if (unlikely(event->attr.precise))
549 intel_pmu_pebs_disable(event);
640} 550}
641 551
642static inline void 552static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
643intel_pmu_enable_fixed(struct hw_perf_event *hwc)
644{ 553{
645 int idx = hwc->idx - X86_PMC_IDX_FIXED; 554 int idx = hwc->idx - X86_PMC_IDX_FIXED;
646 u64 ctrl_val, bits, mask; 555 u64 ctrl_val, bits, mask;
647 int err;
648 556
649 /* 557 /*
650 * Enable IRQ generation (0x8), 558 * Enable IRQ generation (0x8),
@@ -669,7 +577,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc)
669 rdmsrl(hwc->config_base, ctrl_val); 577 rdmsrl(hwc->config_base, ctrl_val);
670 ctrl_val &= ~mask; 578 ctrl_val &= ~mask;
671 ctrl_val |= bits; 579 ctrl_val |= bits;
672 err = checking_wrmsrl(hwc->config_base, ctrl_val); 580 wrmsrl(hwc->config_base, ctrl_val);
673} 581}
674 582
675static void intel_pmu_enable_event(struct perf_event *event) 583static void intel_pmu_enable_event(struct perf_event *event)
@@ -689,6 +597,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
689 return; 597 return;
690 } 598 }
691 599
600 if (unlikely(event->attr.precise))
601 intel_pmu_pebs_enable(event);
602
692 __x86_pmu_enable_event(hwc); 603 __x86_pmu_enable_event(hwc);
693} 604}
694 605
@@ -762,6 +673,15 @@ again:
762 673
763 inc_irq_stat(apic_perf_irqs); 674 inc_irq_stat(apic_perf_irqs);
764 ack = status; 675 ack = status;
676
677 intel_pmu_lbr_read();
678
679 /*
680 * PEBS overflow sets bit 62 in the global status register
681 */
682 if (__test_and_clear_bit(62, (unsigned long *)&status))
683 x86_pmu.drain_pebs(regs);
684
765 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 685 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
766 struct perf_event *event = cpuc->events[bit]; 686 struct perf_event *event = cpuc->events[bit];
767 687
@@ -791,22 +711,18 @@ done:
791 return 1; 711 return 1;
792} 712}
793 713
794static struct event_constraint bts_constraint =
795 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
796
797static struct event_constraint * 714static struct event_constraint *
798intel_special_constraints(struct perf_event *event) 715intel_bts_constraints(struct perf_event *event)
799{ 716{
800 unsigned int hw_event; 717 struct hw_perf_event *hwc = &event->hw;
801 718 unsigned int hw_event, bts_event;
802 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
803 719
804 if (unlikely((hw_event == 720 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
805 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && 721 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
806 (event->hw.sample_period == 1))) {
807 722
723 if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
808 return &bts_constraint; 724 return &bts_constraint;
809 } 725
810 return NULL; 726 return NULL;
811} 727}
812 728
@@ -815,7 +731,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
815{ 731{
816 struct event_constraint *c; 732 struct event_constraint *c;
817 733
818 c = intel_special_constraints(event); 734 c = intel_bts_constraints(event);
735 if (c)
736 return c;
737
738 c = intel_pebs_constraints(event);
819 if (c) 739 if (c)
820 return c; 740 return c;
821 741
@@ -845,6 +765,20 @@ static __initconst struct x86_pmu core_pmu = {
845 .event_constraints = intel_core_event_constraints, 765 .event_constraints = intel_core_event_constraints,
846}; 766};
847 767
768static void intel_pmu_cpu_starting(int cpu)
769{
770 init_debug_store_on_cpu(cpu);
771 /*
772 * Deal with CPUs that don't clear their LBRs on power-up.
773 */
774 intel_pmu_lbr_reset();
775}
776
777static void intel_pmu_cpu_dying(int cpu)
778{
779 fini_debug_store_on_cpu(cpu);
780}
781
848static __initconst struct x86_pmu intel_pmu = { 782static __initconst struct x86_pmu intel_pmu = {
849 .name = "Intel", 783 .name = "Intel",
850 .handle_irq = intel_pmu_handle_irq, 784 .handle_irq = intel_pmu_handle_irq,
@@ -864,14 +798,38 @@ static __initconst struct x86_pmu intel_pmu = {
864 * the generic event period: 798 * the generic event period:
865 */ 799 */
866 .max_period = (1ULL << 31) - 1, 800 .max_period = (1ULL << 31) - 1,
867 .enable_bts = intel_pmu_enable_bts,
868 .disable_bts = intel_pmu_disable_bts,
869 .get_event_constraints = intel_get_event_constraints, 801 .get_event_constraints = intel_get_event_constraints,
870 802
871 .cpu_starting = init_debug_store_on_cpu, 803 .cpu_starting = intel_pmu_cpu_starting,
872 .cpu_dying = fini_debug_store_on_cpu, 804 .cpu_dying = intel_pmu_cpu_dying,
873}; 805};
874 806
807static void intel_clovertown_quirks(void)
808{
809 /*
810 * PEBS is unreliable due to:
811 *
812 * AJ67 - PEBS may experience CPL leaks
813 * AJ68 - PEBS PMI may be delayed by one event
814 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
815 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
816 *
817 * AJ67 could be worked around by restricting the OS/USR flags.
818 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
819 *
820 * AJ106 could possibly be worked around by not allowing LBR
821 * usage from PEBS, including the fixup.
822 * AJ68 could possibly be worked around by always programming
823 * a pebs_event_reset[0] value and coping with the lost events.
824 *
825 * But taken together it might just make sense to not enable PEBS on
826 * these chips.
827 */
828 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
829 x86_pmu.pebs = 0;
830 x86_pmu.pebs_constraints = NULL;
831}
832
875static __init int intel_pmu_init(void) 833static __init int intel_pmu_init(void)
876{ 834{
877 union cpuid10_edx edx; 835 union cpuid10_edx edx;
@@ -916,6 +874,18 @@ static __init int intel_pmu_init(void)
916 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); 874 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
917 875
918 /* 876 /*
877 * v2 and above have a perf capabilities MSR
878 */
879 if (version > 1) {
880 u64 capabilities;
881
882 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
883 x86_pmu.intel_cap.capabilities = capabilities;
884 }
885
886 intel_ds_init();
887
888 /*
919 * Install the hw-cache-events table: 889 * Install the hw-cache-events table:
920 */ 890 */
921 switch (boot_cpu_data.x86_model) { 891 switch (boot_cpu_data.x86_model) {
@@ -924,12 +894,15 @@ static __init int intel_pmu_init(void)
924 break; 894 break;
925 895
926 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 896 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
897 x86_pmu.quirks = intel_clovertown_quirks;
927 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 898 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
928 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 899 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
929 case 29: /* six-core 45 nm xeon "Dunnington" */ 900 case 29: /* six-core 45 nm xeon "Dunnington" */
930 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 901 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
931 sizeof(hw_cache_event_ids)); 902 sizeof(hw_cache_event_ids));
932 903
904 intel_pmu_lbr_init_core();
905
933 x86_pmu.event_constraints = intel_core2_event_constraints; 906 x86_pmu.event_constraints = intel_core2_event_constraints;
934 pr_cont("Core2 events, "); 907 pr_cont("Core2 events, ");
935 break; 908 break;
@@ -939,13 +912,18 @@ static __init int intel_pmu_init(void)
939 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 912 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids)); 913 sizeof(hw_cache_event_ids));
941 914
915 intel_pmu_lbr_init_nhm();
916
942 x86_pmu.event_constraints = intel_nehalem_event_constraints; 917 x86_pmu.event_constraints = intel_nehalem_event_constraints;
943 pr_cont("Nehalem/Corei7 events, "); 918 pr_cont("Nehalem/Corei7 events, ");
944 break; 919 break;
920
945 case 28: /* Atom */ 921 case 28: /* Atom */
946 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 922 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
947 sizeof(hw_cache_event_ids)); 923 sizeof(hw_cache_event_ids));
948 924
925 intel_pmu_lbr_init_atom();
926
949 x86_pmu.event_constraints = intel_gen_event_constraints; 927 x86_pmu.event_constraints = intel_gen_event_constraints;
950 pr_cont("Atom events, "); 928 pr_cont("Atom events, ");
951 break; 929 break;
@@ -955,6 +933,8 @@ static __init int intel_pmu_init(void)
955 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 933 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
956 sizeof(hw_cache_event_ids)); 934 sizeof(hw_cache_event_ids));
957 935
936 intel_pmu_lbr_init_nhm();
937
958 x86_pmu.event_constraints = intel_westmere_event_constraints; 938 x86_pmu.event_constraints = intel_westmere_event_constraints;
959 pr_cont("Westmere events, "); 939 pr_cont("Westmere events, ");
960 break; 940 break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
new file mode 100644
index 000000000000..c59678a14a2e
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -0,0 +1,673 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/* The maximal number of PEBS events: */
4#define MAX_PEBS_EVENTS 4
5
6/* The size of a BTS record in bytes: */
7#define BTS_RECORD_SIZE 24
8
9#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10#define PEBS_BUFFER_SIZE PAGE_SIZE
11
12/*
13 * pebs_record_32 for p4 and core not supported
14
15struct pebs_record_32 {
16 u32 flags, ip;
17 u32 ax, bc, cx, dx;
18 u32 si, di, bp, sp;
19};
20
21 */
22
23struct pebs_record_core {
24 u64 flags, ip;
25 u64 ax, bx, cx, dx;
26 u64 si, di, bp, sp;
27 u64 r8, r9, r10, r11;
28 u64 r12, r13, r14, r15;
29};
30
31struct pebs_record_nhm {
32 u64 flags, ip;
33 u64 ax, bx, cx, dx;
34 u64 si, di, bp, sp;
35 u64 r8, r9, r10, r11;
36 u64 r12, r13, r14, r15;
37 u64 status, dla, dse, lat;
38};
39
40/*
41 * Bits in the debugctlmsr controlling branch tracing.
42 */
43#define X86_DEBUGCTL_TR (1 << 6)
44#define X86_DEBUGCTL_BTS (1 << 7)
45#define X86_DEBUGCTL_BTINT (1 << 8)
46#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
47#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
48
49/*
50 * A debug store configuration.
51 *
52 * We only support architectures that use 64bit fields.
53 */
54struct debug_store {
55 u64 bts_buffer_base;
56 u64 bts_index;
57 u64 bts_absolute_maximum;
58 u64 bts_interrupt_threshold;
59 u64 pebs_buffer_base;
60 u64 pebs_index;
61 u64 pebs_absolute_maximum;
62 u64 pebs_interrupt_threshold;
63 u64 pebs_event_reset[MAX_PEBS_EVENTS];
64};
65
66static void init_debug_store_on_cpu(int cpu)
67{
68 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
69
70 if (!ds)
71 return;
72
73 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
74 (u32)((u64)(unsigned long)ds),
75 (u32)((u64)(unsigned long)ds >> 32));
76}
77
78static void fini_debug_store_on_cpu(int cpu)
79{
80 if (!per_cpu(cpu_hw_events, cpu).ds)
81 return;
82
83 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
84}
85
86static void release_ds_buffers(void)
87{
88 int cpu;
89
90 if (!x86_pmu.bts && !x86_pmu.pebs)
91 return;
92
93 get_online_cpus();
94
95 for_each_online_cpu(cpu)
96 fini_debug_store_on_cpu(cpu);
97
98 for_each_possible_cpu(cpu) {
99 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
100
101 if (!ds)
102 continue;
103
104 per_cpu(cpu_hw_events, cpu).ds = NULL;
105
106 kfree((void *)(unsigned long)ds->pebs_buffer_base);
107 kfree((void *)(unsigned long)ds->bts_buffer_base);
108 kfree(ds);
109 }
110
111 put_online_cpus();
112}
113
114static int reserve_ds_buffers(void)
115{
116 int cpu, err = 0;
117
118 if (!x86_pmu.bts && !x86_pmu.pebs)
119 return 0;
120
121 get_online_cpus();
122
123 for_each_possible_cpu(cpu) {
124 struct debug_store *ds;
125 void *buffer;
126 int max, thresh;
127
128 err = -ENOMEM;
129 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
130 if (unlikely(!ds))
131 break;
132 per_cpu(cpu_hw_events, cpu).ds = ds;
133
134 if (x86_pmu.bts) {
135 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
136 if (unlikely(!buffer))
137 break;
138
139 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
140 thresh = max / 16;
141
142 ds->bts_buffer_base = (u64)(unsigned long)buffer;
143 ds->bts_index = ds->bts_buffer_base;
144 ds->bts_absolute_maximum = ds->bts_buffer_base +
145 max * BTS_RECORD_SIZE;
146 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
147 thresh * BTS_RECORD_SIZE;
148 }
149
150 if (x86_pmu.pebs) {
151 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
152 if (unlikely(!buffer))
153 break;
154
155 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
156
157 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
158 ds->pebs_index = ds->pebs_buffer_base;
159 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
160 max * x86_pmu.pebs_record_size;
161 /*
162 * Always use single record PEBS
163 */
164 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
165 x86_pmu.pebs_record_size;
166 }
167
168 err = 0;
169 }
170
171 if (err)
172 release_ds_buffers();
173 else {
174 for_each_online_cpu(cpu)
175 init_debug_store_on_cpu(cpu);
176 }
177
178 put_online_cpus();
179
180 return err;
181}
182
183/*
184 * BTS
185 */
186
187static struct event_constraint bts_constraint =
188 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
189
190static void intel_pmu_enable_bts(u64 config)
191{
192 unsigned long debugctlmsr;
193
194 debugctlmsr = get_debugctlmsr();
195
196 debugctlmsr |= X86_DEBUGCTL_TR;
197 debugctlmsr |= X86_DEBUGCTL_BTS;
198 debugctlmsr |= X86_DEBUGCTL_BTINT;
199
200 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
201 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
202
203 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
204 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
205
206 update_debugctlmsr(debugctlmsr);
207}
208
209static void intel_pmu_disable_bts(void)
210{
211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
212 unsigned long debugctlmsr;
213
214 if (!cpuc->ds)
215 return;
216
217 debugctlmsr = get_debugctlmsr();
218
219 debugctlmsr &=
220 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
221 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
222
223 update_debugctlmsr(debugctlmsr);
224}
225
226static void intel_pmu_drain_bts_buffer(void)
227{
228 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
229 struct debug_store *ds = cpuc->ds;
230 struct bts_record {
231 u64 from;
232 u64 to;
233 u64 flags;
234 };
235 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
236 struct bts_record *at, *top;
237 struct perf_output_handle handle;
238 struct perf_event_header header;
239 struct perf_sample_data data;
240 struct pt_regs regs;
241
242 if (!event)
243 return;
244
245 if (!ds)
246 return;
247
248 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
249 top = (struct bts_record *)(unsigned long)ds->bts_index;
250
251 if (top <= at)
252 return;
253
254 ds->bts_index = ds->bts_buffer_base;
255
256 perf_sample_data_init(&data, 0);
257 data.period = event->hw.last_period;
258 regs.ip = 0;
259
260 /*
261 * Prepare a generic sample, i.e. fill in the invariant fields.
262 * We will overwrite the from and to address before we output
263 * the sample.
264 */
265 perf_prepare_sample(&header, &data, event, &regs);
266
267 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
268 return;
269
270 for (; at < top; at++) {
271 data.ip = at->from;
272 data.addr = at->to;
273
274 perf_output_sample(&handle, &header, &data, event);
275 }
276
277 perf_output_end(&handle);
278
279 /* There's new data available. */
280 event->hw.interrupts++;
281 event->pending_kill = POLL_IN;
282}
283
284/*
285 * PEBS
286 */
287
288static struct event_constraint intel_core_pebs_events[] = {
289 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
290 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
291 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
292 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
293 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
294 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
295 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
296 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
297 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
298 EVENT_CONSTRAINT_END
299};
300
301static struct event_constraint intel_nehalem_pebs_events[] = {
302 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
303 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
304 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
305 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
306 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
307 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
308 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
309 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
310 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
311 EVENT_CONSTRAINT_END
312};
313
314static struct event_constraint *
315intel_pebs_constraints(struct perf_event *event)
316{
317 struct event_constraint *c;
318
319 if (!event->attr.precise)
320 return NULL;
321
322 if (x86_pmu.pebs_constraints) {
323 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
324 if ((event->hw.config & c->cmask) == c->code)
325 return c;
326 }
327 }
328
329 return &emptyconstraint;
330}
331
332static void intel_pmu_pebs_enable(struct perf_event *event)
333{
334 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
335 struct hw_perf_event *hwc = &event->hw;
336
337 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
338
339 cpuc->pebs_enabled |= 1ULL << hwc->idx;
340 WARN_ON_ONCE(cpuc->enabled);
341
342 if (x86_pmu.intel_cap.pebs_trap)
343 intel_pmu_lbr_enable(event);
344}
345
346static void intel_pmu_pebs_disable(struct perf_event *event)
347{
348 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
349 struct hw_perf_event *hwc = &event->hw;
350
351 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
352 if (cpuc->enabled)
353 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
354
355 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
356
357 if (x86_pmu.intel_cap.pebs_trap)
358 intel_pmu_lbr_disable(event);
359}
360
361static void intel_pmu_pebs_enable_all(void)
362{
363 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
364
365 if (cpuc->pebs_enabled)
366 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
367}
368
369static void intel_pmu_pebs_disable_all(void)
370{
371 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
372
373 if (cpuc->pebs_enabled)
374 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
375}
376
377#include <asm/insn.h>
378
379static inline bool kernel_ip(unsigned long ip)
380{
381#ifdef CONFIG_X86_32
382 return ip > PAGE_OFFSET;
383#else
384 return (long)ip < 0;
385#endif
386}
387
388static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
389{
390 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
391 unsigned long from = cpuc->lbr_entries[0].from;
392 unsigned long old_to, to = cpuc->lbr_entries[0].to;
393 unsigned long ip = regs->ip;
394
395 /*
396 * We don't need to fixup if the PEBS assist is fault like
397 */
398 if (!x86_pmu.intel_cap.pebs_trap)
399 return 1;
400
401 /*
402 * No LBR entry, no basic block, no rewinding
403 */
404 if (!cpuc->lbr_stack.nr || !from || !to)
405 return 0;
406
407 /*
408 * Basic blocks should never cross user/kernel boundaries
409 */
410 if (kernel_ip(ip) != kernel_ip(to))
411 return 0;
412
413 /*
414 * unsigned math, either ip is before the start (impossible) or
415 * the basic block is larger than 1 page (sanity)
416 */
417 if ((ip - to) > PAGE_SIZE)
418 return 0;
419
420 /*
421 * We sampled a branch insn, rewind using the LBR stack
422 */
423 if (ip == to) {
424 regs->ip = from;
425 return 1;
426 }
427
428 do {
429 struct insn insn;
430 u8 buf[MAX_INSN_SIZE];
431 void *kaddr;
432
433 old_to = to;
434 if (!kernel_ip(ip)) {
435 int bytes, size = MAX_INSN_SIZE;
436
437 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
438 if (bytes != size)
439 return 0;
440
441 kaddr = buf;
442 } else
443 kaddr = (void *)to;
444
445 kernel_insn_init(&insn, kaddr);
446 insn_get_length(&insn);
447 to += insn.length;
448 } while (to < ip);
449
450 if (to == ip) {
451 regs->ip = old_to;
452 return 1;
453 }
454
455 /*
456 * Even though we decoded the basic block, the instruction stream
457 * never matched the given IP, either the TO or the IP got corrupted.
458 */
459 return 0;
460}
461
462static int intel_pmu_save_and_restart(struct perf_event *event);
463
464static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
465{
466 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
467 struct debug_store *ds = cpuc->ds;
468 struct perf_event *event = cpuc->events[0]; /* PMC0 only */
469 struct pebs_record_core *at, *top;
470 struct perf_sample_data data;
471 struct perf_raw_record raw;
472 struct pt_regs regs;
473 int n;
474
475 if (!ds || !x86_pmu.pebs)
476 return;
477
478 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
479 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
480
481 /*
482 * Whatever else happens, drain the thing
483 */
484 ds->pebs_index = ds->pebs_buffer_base;
485
486 if (!test_bit(0, cpuc->active_mask))
487 return;
488
489 WARN_ON_ONCE(!event);
490
491 if (!event->attr.precise)
492 return;
493
494 n = top - at;
495 if (n <= 0)
496 return;
497
498 if (!intel_pmu_save_and_restart(event))
499 return;
500
501 /*
502 * Should not happen, we program the threshold at 1 and do not
503 * set a reset value.
504 */
505 WARN_ON_ONCE(n > 1);
506 at += n - 1;
507
508 perf_sample_data_init(&data, 0);
509 data.period = event->hw.last_period;
510
511 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
512 raw.size = x86_pmu.pebs_record_size;
513 raw.data = at;
514 data.raw = &raw;
515 }
516
517 /*
518 * We use the interrupt regs as a base because the PEBS record
519 * does not contain a full regs set, specifically it seems to
520 * lack segment descriptors, which get used by things like
521 * user_mode().
522 *
523 * In the simple case fix up only the IP and BP,SP regs, for
524 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
525 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
526 */
527 regs = *iregs;
528 regs.ip = at->ip;
529 regs.bp = at->bp;
530 regs.sp = at->sp;
531
532 if (intel_pmu_pebs_fixup_ip(&regs))
533 regs.flags |= PERF_EFLAGS_EXACT;
534 else
535 regs.flags &= ~PERF_EFLAGS_EXACT;
536
537 if (perf_event_overflow(event, 1, &data, &regs))
538 x86_pmu_stop(event);
539}
540
541static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
542{
543 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
544 struct debug_store *ds = cpuc->ds;
545 struct pebs_record_nhm *at, *top;
546 struct perf_sample_data data;
547 struct perf_event *event = NULL;
548 struct perf_raw_record raw;
549 struct pt_regs regs;
550 u64 status = 0;
551 int bit, n;
552
553 if (!ds || !x86_pmu.pebs)
554 return;
555
556 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
557 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
558
559 ds->pebs_index = ds->pebs_buffer_base;
560
561 n = top - at;
562 if (n <= 0)
563 return;
564
565 /*
566 * Should not happen, we program the threshold at 1 and do not
567 * set a reset value.
568 */
569 WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
570
571 for ( ; at < top; at++) {
572 for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
573 event = cpuc->events[bit];
574 if (!test_bit(bit, cpuc->active_mask))
575 continue;
576
577 WARN_ON_ONCE(!event);
578
579 if (!event->attr.precise)
580 continue;
581
582 if (__test_and_set_bit(bit, (unsigned long *)&status))
583 continue;
584
585 break;
586 }
587
588 if (!event || bit >= MAX_PEBS_EVENTS)
589 continue;
590
591 if (!intel_pmu_save_and_restart(event))
592 continue;
593
594 perf_sample_data_init(&data, 0);
595 data.period = event->hw.last_period;
596
597 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
598 raw.size = x86_pmu.pebs_record_size;
599 raw.data = at;
600 data.raw = &raw;
601 }
602
603 /*
604 * See the comment in intel_pmu_drain_pebs_core()
605 */
606 regs = *iregs;
607 regs.ip = at->ip;
608 regs.bp = at->bp;
609 regs.sp = at->sp;
610
611 if (intel_pmu_pebs_fixup_ip(&regs))
612 regs.flags |= PERF_EFLAGS_EXACT;
613 else
614 regs.flags &= ~PERF_EFLAGS_EXACT;
615
616 if (perf_event_overflow(event, 1, &data, &regs))
617 x86_pmu_stop(event);
618 }
619}
620
621/*
622 * BTS, PEBS probe and setup
623 */
624
625static void intel_ds_init(void)
626{
627 /*
628 * No support for 32bit formats
629 */
630 if (!boot_cpu_has(X86_FEATURE_DTES64))
631 return;
632
633 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
634 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
635 if (x86_pmu.pebs) {
636 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
637 int format = x86_pmu.intel_cap.pebs_format;
638
639 switch (format) {
640 case 0:
641 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
642 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
643 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
644 x86_pmu.pebs_constraints = intel_core_pebs_events;
645 break;
646
647 case 1:
648 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
649 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
650 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
651 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
652 break;
653
654 default:
655 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
656 x86_pmu.pebs = 0;
657 break;
658 }
659 }
660}
661
662#else /* CONFIG_CPU_SUP_INTEL */
663
664static int reserve_ds_buffers(void)
665{
666 return 0;
667}
668
669static void release_ds_buffers(void)
670{
671}
672
673#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..df4c98e26c5b
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,221 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3enum {
4 LBR_FORMAT_32 = 0x00,
5 LBR_FORMAT_LIP = 0x01,
6 LBR_FORMAT_EIP = 0x02,
7 LBR_FORMAT_EIP_FLAGS = 0x03,
8};
9
10/*
11 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
12 * otherwise it becomes near impossible to get a reliable stack.
13 */
14
15#define X86_DEBUGCTL_LBR (1 << 0)
16#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11)
17
18static void __intel_pmu_lbr_enable(void)
19{
20 u64 debugctl;
21
22 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
23 debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
24 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
25}
26
27static void __intel_pmu_lbr_disable(void)
28{
29 u64 debugctl;
30
31 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
32 debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
33 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
34}
35
36static void intel_pmu_lbr_reset_32(void)
37{
38 int i;
39
40 for (i = 0; i < x86_pmu.lbr_nr; i++)
41 wrmsrl(x86_pmu.lbr_from + i, 0);
42}
43
44static void intel_pmu_lbr_reset_64(void)
45{
46 int i;
47
48 for (i = 0; i < x86_pmu.lbr_nr; i++) {
49 wrmsrl(x86_pmu.lbr_from + i, 0);
50 wrmsrl(x86_pmu.lbr_to + i, 0);
51 }
52}
53
54static void intel_pmu_lbr_reset(void)
55{
56 if (!x86_pmu.lbr_nr)
57 return;
58
59 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
60 intel_pmu_lbr_reset_32();
61 else
62 intel_pmu_lbr_reset_64();
63}
64
65static void intel_pmu_lbr_enable(struct perf_event *event)
66{
67 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
68
69 if (!x86_pmu.lbr_nr)
70 return;
71
72 WARN_ON_ONCE(cpuc->enabled);
73
74 /*
75 * Reset the LBR stack if we changed task context to
76 * avoid data leaks.
77 */
78
79 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
80 intel_pmu_lbr_reset();
81 cpuc->lbr_context = event->ctx;
82 }
83
84 cpuc->lbr_users++;
85}
86
87static void intel_pmu_lbr_disable(struct perf_event *event)
88{
89 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
90
91 if (!x86_pmu.lbr_nr)
92 return;
93
94 cpuc->lbr_users--;
95 WARN_ON_ONCE(cpuc->lbr_users < 0);
96
97 if (cpuc->enabled && !cpuc->lbr_users)
98 __intel_pmu_lbr_disable();
99}
100
101static void intel_pmu_lbr_enable_all(void)
102{
103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
104
105 if (cpuc->lbr_users)
106 __intel_pmu_lbr_enable();
107}
108
109static void intel_pmu_lbr_disable_all(void)
110{
111 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
112
113 if (cpuc->lbr_users)
114 __intel_pmu_lbr_disable();
115}
116
117static inline u64 intel_pmu_lbr_tos(void)
118{
119 u64 tos;
120
121 rdmsrl(x86_pmu.lbr_tos, tos);
122
123 return tos;
124}
125
126static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
127{
128 unsigned long mask = x86_pmu.lbr_nr - 1;
129 u64 tos = intel_pmu_lbr_tos();
130 int i;
131
132 for (i = 0; i < x86_pmu.lbr_nr; i++) {
133 unsigned long lbr_idx = (tos - i) & mask;
134 union {
135 struct {
136 u32 from;
137 u32 to;
138 };
139 u64 lbr;
140 } msr_lastbranch;
141
142 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
143
144 cpuc->lbr_entries[i].from = msr_lastbranch.from;
145 cpuc->lbr_entries[i].to = msr_lastbranch.to;
146 cpuc->lbr_entries[i].flags = 0;
147 }
148 cpuc->lbr_stack.nr = i;
149}
150
151#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
152
153/*
154 * Due to lack of segmentation in Linux the effective address (offset)
155 * is the same as the linear address, allowing us to merge the LIP and EIP
156 * LBR formats.
157 */
158static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
159{
160 unsigned long mask = x86_pmu.lbr_nr - 1;
161 int lbr_format = x86_pmu.intel_cap.lbr_format;
162 u64 tos = intel_pmu_lbr_tos();
163 int i;
164
165 for (i = 0; i < x86_pmu.lbr_nr; i++) {
166 unsigned long lbr_idx = (tos - i) & mask;
167 u64 from, to, flags = 0;
168
169 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
170 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
171
172 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
173 flags = !!(from & LBR_FROM_FLAG_MISPRED);
174 from = (u64)((((s64)from) << 1) >> 1);
175 }
176
177 cpuc->lbr_entries[i].from = from;
178 cpuc->lbr_entries[i].to = to;
179 cpuc->lbr_entries[i].flags = flags;
180 }
181 cpuc->lbr_stack.nr = i;
182}
183
184static void intel_pmu_lbr_read(void)
185{
186 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
187
188 if (!cpuc->lbr_users)
189 return;
190
191 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
192 intel_pmu_lbr_read_32(cpuc);
193 else
194 intel_pmu_lbr_read_64(cpuc);
195}
196
197static void intel_pmu_lbr_init_core(void)
198{
199 x86_pmu.lbr_nr = 4;
200 x86_pmu.lbr_tos = 0x01c9;
201 x86_pmu.lbr_from = 0x40;
202 x86_pmu.lbr_to = 0x60;
203}
204
205static void intel_pmu_lbr_init_nhm(void)
206{
207 x86_pmu.lbr_nr = 16;
208 x86_pmu.lbr_tos = 0x01c9;
209 x86_pmu.lbr_from = 0x680;
210 x86_pmu.lbr_to = 0x6c0;
211}
212
213static void intel_pmu_lbr_init_atom(void)
214{
215 x86_pmu.lbr_nr = 8;
216 x86_pmu.lbr_tos = 0x01c9;
217 x86_pmu.lbr_from = 0x40;
218 x86_pmu.lbr_to = 0x60;
219}
220
221#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 419386c24b82..cbaf8f2b83df 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -20,7 +20,7 @@ lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-$(CONFIG_KPROBES) += insn.o inat.o 23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
24 24
25obj-y += msr.o msr-reg.o msr-reg-export.o 25obj-y += msr.o msr-reg.o msr-reg-export.o
26 26