aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAvi Kivity <avi@redhat.com>2010-04-19 05:52:53 -0400
committerAvi Kivity <avi@redhat.com>2010-05-17 05:17:58 -0400
commit9beeaa2d689842f7760aa16c512e6bb8182d38b6 (patch)
tree62cea0772127c4b1c0b476e46dec6830d36809c1 /arch
parent3246af0ece6c61689847417977733f0b12dc4b6f (diff)
parenta1645ce12adb6c9cc9e19d7695466204e3f017fe (diff)
Merge branch 'perf'
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.cpu20
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/include/asm/apic.h13
-rw-r--r--arch/x86/include/asm/ds.h302
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/msr-index.h15
-rw-r--r--arch/x86/include/asm/perf_event.h76
-rw-r--r--arch/x86/include/asm/perf_event_p4.h794
-rw-r--r--arch/x86/include/asm/processor.h35
-rw-r--r--arch/x86/include/asm/ptrace-abi.h57
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/thread_info.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c552
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c354
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c664
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c218
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c834
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c31
-rw-r--r--arch/x86/kernel/ds.c1437
-rw-r--r--arch/x86/kernel/ds_selftest.c408
-rw-r--r--arch/x86/kernel/ds_selftest.h15
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/kprobes.c16
-rw-r--r--arch/x86/kernel/process.c18
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/ptrace.c382
-rw-r--r--arch/x86/kernel/step.c46
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c46
-rw-r--r--arch/x86/kvm/x86.h3
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c4
40 files changed, 3189 insertions, 3272 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index e5eb1337a537..f06010fb4838 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -42,15 +42,10 @@ config KPROBES
42 If in doubt, say "N". 42 If in doubt, say "N".
43 43
44config OPTPROBES 44config OPTPROBES
45 bool "Kprobes jump optimization support (EXPERIMENTAL)" 45 def_bool y
46 default y 46 depends on KPROBES && HAVE_OPTPROBES
47 depends on KPROBES
48 depends on !PREEMPT 47 depends on !PREEMPT
49 depends on HAVE_OPTPROBES
50 select KALLSYMS_ALL 48 select KALLSYMS_ALL
51 help
52 This option will allow kprobes to optimize breakpoint to
53 a jump for reducing its overhead.
54 49
55config HAVE_EFFICIENT_UNALIGNED_ACCESS 50config HAVE_EFFICIENT_UNALIGNED_ACCESS
56 bool 51 bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9458685902bd..97a95dfd1181 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -58,6 +58,9 @@ config X86
58 select HAVE_ARCH_KMEMCHECK 58 select HAVE_ARCH_KMEMCHECK
59 select HAVE_USER_RETURN_NOTIFIER 59 select HAVE_USER_RETURN_NOTIFIER
60 60
61config INSTRUCTION_DECODER
62 def_bool (KPROBES || PERF_EVENTS)
63
61config OUTPUT_FORMAT 64config OUTPUT_FORMAT
62 string 65 string
63 default "elf32-i386" if X86_32 66 default "elf32-i386" if X86_32
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index a19829374e6a..918fbb1855cc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -502,23 +502,3 @@ config CPU_SUP_UMC_32
502 CPU might render the kernel unbootable. 502 CPU might render the kernel unbootable.
503 503
504 If unsure, say N. 504 If unsure, say N.
505
506config X86_DS
507 def_bool X86_PTRACE_BTS
508 depends on X86_DEBUGCTLMSR
509 select HAVE_HW_BRANCH_TRACER
510
511config X86_PTRACE_BTS
512 bool "Branch Trace Store"
513 default y
514 depends on X86_DEBUGCTLMSR
515 depends on BROKEN
516 ---help---
517 This adds a ptrace interface to the hardware's branch trace store.
518
519 Debuggers may use it to collect an execution trace of the debugged
520 application in order to answer the question 'how did I get here?'.
521 Debuggers may trace user mode as well as kernel mode.
522
523 Say Y unless there is no application development on this machine
524 and you want to save a small amount of code size.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bc01e3ebfeb2..bd58c8abbfbd 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,15 +174,6 @@ config IOMMU_LEAK
174 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
175 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
176 176
177config X86_DS_SELFTEST
178 bool "DS selftest"
179 default y
180 depends on DEBUG_KERNEL
181 depends on X86_DS
182 ---help---
183 Perform Debug Store selftests at boot time.
184 If in doubt, say "N".
185
186config HAVE_MMIOTRACE_SUPPORT 177config HAVE_MMIOTRACE_SUPPORT
187 def_bool y 178 def_bool y
188 179
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index b4ac2cdcb64f..1fa03e04ae44 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -373,6 +373,7 @@ extern atomic_t init_deasserted;
373extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); 373extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
374#endif 374#endif
375 375
376#ifdef CONFIG_X86_LOCAL_APIC
376static inline u32 apic_read(u32 reg) 377static inline u32 apic_read(u32 reg)
377{ 378{
378 return apic->read(reg); 379 return apic->read(reg);
@@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void)
403 return apic->safe_wait_icr_idle(); 404 return apic->safe_wait_icr_idle();
404} 405}
405 406
407#else /* CONFIG_X86_LOCAL_APIC */
408
409static inline u32 apic_read(u32 reg) { return 0; }
410static inline void apic_write(u32 reg, u32 val) { }
411static inline u64 apic_icr_read(void) { return 0; }
412static inline void apic_icr_write(u32 low, u32 high) { }
413static inline void apic_wait_icr_idle(void) { }
414static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
415
416#endif /* CONFIG_X86_LOCAL_APIC */
406 417
407static inline void ack_APIC_irq(void) 418static inline void ack_APIC_irq(void)
408{ 419{
409#ifdef CONFIG_X86_LOCAL_APIC
410 /* 420 /*
411 * ack_APIC_irq() actually gets compiled as a single instruction 421 * ack_APIC_irq() actually gets compiled as a single instruction
412 * ... yummie. 422 * ... yummie.
@@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void)
414 424
415 /* Docs say use 0 for future compatibility */ 425 /* Docs say use 0 for future compatibility */
416 apic_write(APIC_EOI, 0); 426 apic_write(APIC_EOI, 0);
417#endif
418} 427}
419 428
420static inline unsigned default_get_apic_id(unsigned long x) 429static inline unsigned default_get_apic_id(unsigned long x)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
deleted file mode 100644
index 70dac199b093..000000000000
--- a/arch/x86/include/asm/ds.h
+++ /dev/null
@@ -1,302 +0,0 @@
1/*
2 * Debug Store (DS) support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS).
7 *
8 * It manages:
9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done)
11 * - buffer access
12 *
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
16 *
17 *
18 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */
21
22#ifndef _ASM_X86_DS_H
23#define _ASM_X86_DS_H
24
25
26#include <linux/types.h>
27#include <linux/init.h>
28#include <linux/err.h>
29
30
31#ifdef CONFIG_X86_DS
32
33struct task_struct;
34struct ds_context;
35struct ds_tracer;
36struct bts_tracer;
37struct pebs_tracer;
38
39typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
40typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
41
42
43/*
44 * A list of features plus corresponding macros to talk about them in
45 * the ds_request function's flags parameter.
46 *
47 * We use the enum to index an array of corresponding control bits;
48 * we use the macro to index a flags bit-vector.
49 */
50enum ds_feature {
51 dsf_bts = 0,
52 dsf_bts_kernel,
53#define BTS_KERNEL (1 << dsf_bts_kernel)
54 /* trace kernel-mode branches */
55
56 dsf_bts_user,
57#define BTS_USER (1 << dsf_bts_user)
58 /* trace user-mode branches */
59
60 dsf_bts_overflow,
61 dsf_bts_max,
62 dsf_pebs = dsf_bts_max,
63
64 dsf_pebs_max,
65 dsf_ctl_max = dsf_pebs_max,
66 dsf_bts_timestamps = dsf_ctl_max,
67#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
68 /* add timestamps into BTS trace */
69
70#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
71};
72
73
74/*
75 * Request BTS or PEBS
76 *
77 * Due to alignement constraints, the actual buffer may be slightly
78 * smaller than the requested or provided buffer.
79 *
80 * Returns a pointer to a tracer structure on success, or
81 * ERR_PTR(errcode) on failure.
82 *
83 * The interrupt threshold is independent from the overflow callback
84 * to allow users to use their own overflow interrupt handling mechanism.
85 *
86 * The function might sleep.
87 *
88 * task: the task to request recording for
89 * cpu: the cpu to request recording for
90 * base: the base pointer for the (non-pageable) buffer;
91 * size: the size of the provided buffer in bytes
92 * ovfl: pointer to a function to be called on buffer overflow;
93 * NULL if cyclic buffer requested
94 * th: the interrupt threshold in records from the end of the buffer;
95 * -1 if no interrupt threshold is requested.
96 * flags: a bit-mask of the above flags
97 */
98extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
99 void *base, size_t size,
100 bts_ovfl_callback_t ovfl,
101 size_t th, unsigned int flags);
102extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
103 bts_ovfl_callback_t ovfl,
104 size_t th, unsigned int flags);
105extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
106 void *base, size_t size,
107 pebs_ovfl_callback_t ovfl,
108 size_t th, unsigned int flags);
109extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
110 void *base, size_t size,
111 pebs_ovfl_callback_t ovfl,
112 size_t th, unsigned int flags);
113
114/*
115 * Release BTS or PEBS resources
116 * Suspend and resume BTS or PEBS tracing
117 *
118 * Must be called with irq's enabled.
119 *
120 * tracer: the tracer handle returned from ds_request_~()
121 */
122extern void ds_release_bts(struct bts_tracer *tracer);
123extern void ds_suspend_bts(struct bts_tracer *tracer);
124extern void ds_resume_bts(struct bts_tracer *tracer);
125extern void ds_release_pebs(struct pebs_tracer *tracer);
126extern void ds_suspend_pebs(struct pebs_tracer *tracer);
127extern void ds_resume_pebs(struct pebs_tracer *tracer);
128
129/*
130 * Release BTS or PEBS resources
131 * Suspend and resume BTS or PEBS tracing
132 *
133 * Cpu tracers must call this on the traced cpu.
134 * Task tracers must call ds_release_~_noirq() for themselves.
135 *
136 * May be called with irq's disabled.
137 *
138 * Returns 0 if successful;
139 * -EPERM if the cpu tracer does not trace the current cpu.
140 * -EPERM if the task tracer does not trace itself.
141 *
142 * tracer: the tracer handle returned from ds_request_~()
143 */
144extern int ds_release_bts_noirq(struct bts_tracer *tracer);
145extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
146extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
147extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
148extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
149extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
150
151
152/*
153 * The raw DS buffer state as it is used for BTS and PEBS recording.
154 *
155 * This is the low-level, arch-dependent interface for working
156 * directly on the raw trace data.
157 */
158struct ds_trace {
159 /* the number of bts/pebs records */
160 size_t n;
161 /* the size of a bts/pebs record in bytes */
162 size_t size;
163 /* pointers into the raw buffer:
164 - to the first entry */
165 void *begin;
166 /* - one beyond the last entry */
167 void *end;
168 /* - one beyond the newest entry */
169 void *top;
170 /* - the interrupt threshold */
171 void *ith;
172 /* flags given on ds_request() */
173 unsigned int flags;
174};
175
176/*
177 * An arch-independent view on branch trace data.
178 */
179enum bts_qualifier {
180 bts_invalid,
181#define BTS_INVALID bts_invalid
182
183 bts_branch,
184#define BTS_BRANCH bts_branch
185
186 bts_task_arrives,
187#define BTS_TASK_ARRIVES bts_task_arrives
188
189 bts_task_departs,
190#define BTS_TASK_DEPARTS bts_task_departs
191
192 bts_qual_bit_size = 4,
193 bts_qual_max = (1 << bts_qual_bit_size),
194};
195
196struct bts_struct {
197 __u64 qualifier;
198 union {
199 /* BTS_BRANCH */
200 struct {
201 __u64 from;
202 __u64 to;
203 } lbr;
204 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
205 struct {
206 __u64 clock;
207 pid_t pid;
208 } event;
209 } variant;
210};
211
212
213/*
214 * The BTS state.
215 *
216 * This gives access to the raw DS state and adds functions to provide
217 * an arch-independent view of the BTS data.
218 */
219struct bts_trace {
220 struct ds_trace ds;
221
222 int (*read)(struct bts_tracer *tracer, const void *at,
223 struct bts_struct *out);
224 int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
225};
226
227
228/*
229 * The PEBS state.
230 *
231 * This gives access to the raw DS state and the PEBS-specific counter
232 * reset value.
233 */
234struct pebs_trace {
235 struct ds_trace ds;
236
237 /* the number of valid counters in the below array */
238 unsigned int counters;
239
240#define MAX_PEBS_COUNTERS 4
241 /* the counter reset value */
242 unsigned long long counter_reset[MAX_PEBS_COUNTERS];
243};
244
245
246/*
247 * Read the BTS or PEBS trace.
248 *
249 * Returns a view on the trace collected for the parameter tracer.
250 *
251 * The view remains valid as long as the traced task is not running or
252 * the tracer is suspended.
253 * Writes into the trace buffer are not reflected.
254 *
255 * tracer: the tracer handle returned from ds_request_~()
256 */
257extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
258extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
259
260
261/*
262 * Reset the write pointer of the BTS/PEBS buffer.
263 *
264 * Returns 0 on success; -Eerrno on error
265 *
266 * tracer: the tracer handle returned from ds_request_~()
267 */
268extern int ds_reset_bts(struct bts_tracer *tracer);
269extern int ds_reset_pebs(struct pebs_tracer *tracer);
270
271/*
272 * Set the PEBS counter reset value.
273 *
274 * Returns 0 on success; -Eerrno on error
275 *
276 * tracer: the tracer handle returned from ds_request_pebs()
277 * counter: the index of the counter
278 * value: the new counter reset value
279 */
280extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
281 unsigned int counter, u64 value);
282
283/*
284 * Initialization
285 */
286struct cpuinfo_x86;
287extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
288
289/*
290 * Context switch work
291 */
292extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
293
294#else /* CONFIG_X86_DS */
295
296struct cpuinfo_x86;
297static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
298static inline void ds_switch_to(struct task_struct *prev,
299 struct task_struct *next) {}
300
301#endif /* CONFIG_X86_DS */
302#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 96c2e0ad04ca..88c765e16410 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -68,6 +68,8 @@ struct insn {
68 const insn_byte_t *next_byte; 68 const insn_byte_t *next_byte;
69}; 69};
70 70
71#define MAX_INSN_SIZE 16
72
71#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) 73#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
72#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) 74#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
73#define X86_MODRM_RM(modrm) ((modrm) & 0x07) 75#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4ffa345a8ccb..547882539157 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -24,6 +24,7 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/ptrace.h> 25#include <linux/ptrace.h>
26#include <linux/percpu.h> 26#include <linux/percpu.h>
27#include <asm/insn.h>
27 28
28#define __ARCH_WANT_KPROBES_INSN_SLOT 29#define __ARCH_WANT_KPROBES_INSN_SLOT
29 30
@@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t;
36#define RELATIVEJUMP_SIZE 5 37#define RELATIVEJUMP_SIZE 5
37#define RELATIVECALL_OPCODE 0xe8 38#define RELATIVECALL_OPCODE 0xe8
38#define RELATIVE_ADDR_SIZE 4 39#define RELATIVE_ADDR_SIZE 4
39#define MAX_INSN_SIZE 16
40#define MAX_STACK_SIZE 64 40#define MAX_STACK_SIZE 64
41#define MIN_STACK_SIZE(ADDR) \ 41#define MIN_STACK_SIZE(ADDR) \
42 (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ 42 (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4604e6a54d36..bc473acfa7f9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -71,11 +71,14 @@
71#define MSR_IA32_LASTINTTOIP 0x000001de 71#define MSR_IA32_LASTINTTOIP 0x000001de
72 72
73/* DEBUGCTLMSR bits (others vary by model): */ 73/* DEBUGCTLMSR bits (others vary by model): */
74#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ 74#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
75#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ 75#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
76 76#define DEBUGCTLMSR_TR (1UL << 6)
77#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) 77#define DEBUGCTLMSR_BTS (1UL << 7)
78#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) 78#define DEBUGCTLMSR_BTINT (1UL << 8)
79#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
80#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
81#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
79 82
80#define MSR_IA32_MC0_CTL 0x00000400 83#define MSR_IA32_MC0_CTL 0x00000400
81#define MSR_IA32_MC0_STATUS 0x00000401 84#define MSR_IA32_MC0_STATUS 0x00000401
@@ -359,6 +362,8 @@
359#define MSR_P4_U2L_ESCR0 0x000003b0 362#define MSR_P4_U2L_ESCR0 0x000003b0
360#define MSR_P4_U2L_ESCR1 0x000003b1 363#define MSR_P4_U2L_ESCR1 0x000003b1
361 364
365#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
366
362/* Intel Core-based CPU performance counters */ 367/* Intel Core-based CPU performance counters */
363#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 368#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
364#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a 369#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index db6109a885a7..254883d0c7e0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -5,7 +5,7 @@
5 * Performance event hw details: 5 * Performance event hw details:
6 */ 6 */
7 7
8#define X86_PMC_MAX_GENERIC 8 8#define X86_PMC_MAX_GENERIC 32
9#define X86_PMC_MAX_FIXED 3 9#define X86_PMC_MAX_FIXED 3
10 10
11#define X86_PMC_IDX_GENERIC 0 11#define X86_PMC_IDX_GENERIC 0
@@ -18,39 +18,31 @@
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20 20
21#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) 21#define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL
22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) 22#define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL
23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) 23#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
25#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) 25#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
26 26#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
27/* 27#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
28 * Includes eventsel and unit mask as well: 28#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
29 */ 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL 32#define AMD64_EVENTSEL_EVENT \
33#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL 33 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
34#define INTEL_ARCH_EDGE_MASK 0x00040000ULL 34#define INTEL_ARCH_EVENT_MASK \
35#define INTEL_ARCH_INV_MASK 0x00800000ULL 35 (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
36#define INTEL_ARCH_CNT_MASK 0xFF000000ULL 36
37#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) 37#define X86_RAW_EVENT_MASK \
38 38 (ARCH_PERFMON_EVENTSEL_EVENT | \
39/* 39 ARCH_PERFMON_EVENTSEL_UMASK | \
40 * filter mask to validate fixed counter events. 40 ARCH_PERFMON_EVENTSEL_EDGE | \
41 * the following filters disqualify for fixed counters: 41 ARCH_PERFMON_EVENTSEL_INV | \
42 * - inv 42 ARCH_PERFMON_EVENTSEL_CMASK)
43 * - edge 43#define AMD64_RAW_EVENT_MASK \
44 * - cnt-mask 44 (X86_RAW_EVENT_MASK | \
45 * The other filters are supported by fixed counters. 45 AMD64_EVENTSEL_EVENT)
46 * The any-thread option is supported starting with v3.
47 */
48#define INTEL_ARCH_FIXED_MASK \
49 (INTEL_ARCH_CNT_MASK| \
50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVENT_MASK)
54 46
55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 47#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 48#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
@@ -67,7 +59,7 @@
67union cpuid10_eax { 59union cpuid10_eax {
68 struct { 60 struct {
69 unsigned int version_id:8; 61 unsigned int version_id:8;
70 unsigned int num_events:8; 62 unsigned int num_counters:8;
71 unsigned int bit_width:8; 63 unsigned int bit_width:8;
72 unsigned int mask_length:8; 64 unsigned int mask_length:8;
73 } split; 65 } split;
@@ -76,7 +68,7 @@ union cpuid10_eax {
76 68
77union cpuid10_edx { 69union cpuid10_edx {
78 struct { 70 struct {
79 unsigned int num_events_fixed:4; 71 unsigned int num_counters_fixed:4;
80 unsigned int reserved:28; 72 unsigned int reserved:28;
81 } split; 73 } split;
82 unsigned int full; 74 unsigned int full;
@@ -136,6 +128,18 @@ extern void perf_events_lapic_init(void);
136 128
137#define PERF_EVENT_INDEX_OFFSET 0 129#define PERF_EVENT_INDEX_OFFSET 0
138 130
131/*
132 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
133 * This flag is otherwise unused and ABI specified to be 0, so nobody should
134 * care what we do with it.
135 */
136#define PERF_EFLAGS_EXACT (1UL << 3)
137
138struct pt_regs;
139extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
140extern unsigned long perf_misc_flags(struct pt_regs *regs);
141#define perf_misc_flags(regs) perf_misc_flags(regs)
142
139#else 143#else
140static inline void init_hw_perf_events(void) { } 144static inline void init_hw_perf_events(void) { }
141static inline void perf_events_lapic_init(void) { } 145static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
new file mode 100644
index 000000000000..b05400a542ff
--- /dev/null
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -0,0 +1,794 @@
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 */
4
5#ifndef PERF_EVENT_P4_H
6#define PERF_EVENT_P4_H
7
8#include <linux/cpu.h>
9#include <linux/bitops.h>
10
11/*
12 * NetBurst has perfomance MSRs shared between
13 * threads if HT is turned on, ie for both logical
14 * processors (mem: in turn in Atom with HT support
15 * perf-MSRs are not shared and every thread has its
16 * own perf-MSRs set)
17 */
18#define ARCH_P4_TOTAL_ESCR (46)
19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18)
22#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
23
24#define P4_ESCR_EVENT_MASK 0x7e000000U
25#define P4_ESCR_EVENT_SHIFT 25
26#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
27#define P4_ESCR_EVENTMASK_SHIFT 9
28#define P4_ESCR_TAG_MASK 0x000001e0U
29#define P4_ESCR_TAG_SHIFT 5
30#define P4_ESCR_TAG_ENABLE 0x00000010U
31#define P4_ESCR_T0_OS 0x00000008U
32#define P4_ESCR_T0_USR 0x00000004U
33#define P4_ESCR_T1_OS 0x00000002U
34#define P4_ESCR_T1_USR 0x00000001U
35
36#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
37#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
38#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
39
40/* Non HT mask */
41#define P4_ESCR_MASK \
42 (P4_ESCR_EVENT_MASK | \
43 P4_ESCR_EVENTMASK_MASK | \
44 P4_ESCR_TAG_MASK | \
45 P4_ESCR_TAG_ENABLE | \
46 P4_ESCR_T0_OS | \
47 P4_ESCR_T0_USR)
48
49/* HT mask */
50#define P4_ESCR_MASK_HT \
51 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
52
53#define P4_CCCR_OVF 0x80000000U
54#define P4_CCCR_CASCADE 0x40000000U
55#define P4_CCCR_OVF_PMI_T0 0x04000000U
56#define P4_CCCR_OVF_PMI_T1 0x08000000U
57#define P4_CCCR_FORCE_OVF 0x02000000U
58#define P4_CCCR_EDGE 0x01000000U
59#define P4_CCCR_THRESHOLD_MASK 0x00f00000U
60#define P4_CCCR_THRESHOLD_SHIFT 20
61#define P4_CCCR_COMPLEMENT 0x00080000U
62#define P4_CCCR_COMPARE 0x00040000U
63#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
64#define P4_CCCR_ESCR_SELECT_SHIFT 13
65#define P4_CCCR_ENABLE 0x00001000U
66#define P4_CCCR_THREAD_SINGLE 0x00010000U
67#define P4_CCCR_THREAD_BOTH 0x00020000U
68#define P4_CCCR_THREAD_ANY 0x00030000U
69#define P4_CCCR_RESERVED 0x00000fffU
70
71#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
72#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
73
74/* Custom bits in reerved CCCR area */
75#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
76
77
78/* Non HT mask */
79#define P4_CCCR_MASK \
80 (P4_CCCR_OVF | \
81 P4_CCCR_CASCADE | \
82 P4_CCCR_OVF_PMI_T0 | \
83 P4_CCCR_FORCE_OVF | \
84 P4_CCCR_EDGE | \
85 P4_CCCR_THRESHOLD_MASK | \
86 P4_CCCR_COMPLEMENT | \
87 P4_CCCR_COMPARE | \
88 P4_CCCR_ESCR_SELECT_MASK | \
89 P4_CCCR_ENABLE)
90
91/* HT mask */
92#define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY)
93
94#define P4_GEN_ESCR_EMASK(class, name, bit) \
95 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
96#define P4_ESCR_EMASK_BIT(class, name) class##__##name
97
98/*
99 * config field is 64bit width and consists of
100 * HT << 63 | ESCR << 32 | CCCR
101 * where HT is HyperThreading bit (since ESCR
102 * has it reserved we may use it for own purpose)
103 *
104 * note that this is NOT the addresses of respective
105 * ESCR and CCCR but rather an only packed value should
106 * be unpacked and written to a proper addresses
107 *
108 * the base idea is to pack as much info as
109 * possible
110 */
111#define p4_config_pack_escr(v) (((u64)(v)) << 32)
112#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
113#define p4_config_unpack_escr(v) (((u64)(v)) >> 32)
114#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL)
115
116#define p4_config_unpack_emask(v) \
117 ({ \
118 u32 t = p4_config_unpack_escr((v)); \
119 t = t & P4_ESCR_EVENTMASK_MASK; \
120 t = t >> P4_ESCR_EVENTMASK_SHIFT; \
121 t; \
122 })
123
124#define p4_config_unpack_event(v) \
125 ({ \
126 u32 t = p4_config_unpack_escr((v)); \
127 t = t & P4_ESCR_EVENT_MASK; \
128 t = t >> P4_ESCR_EVENT_SHIFT; \
129 t; \
130 })
131
132#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
133
134#define P4_CONFIG_HT_SHIFT 63
135#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
136
137static inline bool p4_is_event_cascaded(u64 config)
138{
139 u32 cccr = p4_config_unpack_cccr(config);
140 return !!(cccr & P4_CCCR_CASCADE);
141}
142
143static inline int p4_ht_config_thread(u64 config)
144{
145 return !!(config & P4_CONFIG_HT);
146}
147
148static inline u64 p4_set_ht_bit(u64 config)
149{
150 return config | P4_CONFIG_HT;
151}
152
153static inline u64 p4_clear_ht_bit(u64 config)
154{
155 return config & ~P4_CONFIG_HT;
156}
157
158static inline int p4_ht_active(void)
159{
160#ifdef CONFIG_SMP
161 return smp_num_siblings > 1;
162#endif
163 return 0;
164}
165
166static inline int p4_ht_thread(int cpu)
167{
168#ifdef CONFIG_SMP
169 if (smp_num_siblings == 2)
170 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
171#endif
172 return 0;
173}
174
175static inline int p4_should_swap_ts(u64 config, int cpu)
176{
177 return p4_ht_config_thread(config) ^ p4_ht_thread(cpu);
178}
179
180static inline u32 p4_default_cccr_conf(int cpu)
181{
182 /*
183 * Note that P4_CCCR_THREAD_ANY is "required" on
184 * non-HT machines (on HT machines we count TS events
185 * regardless the state of second logical processor
186 */
187 u32 cccr = P4_CCCR_THREAD_ANY;
188
189 if (!p4_ht_thread(cpu))
190 cccr |= P4_CCCR_OVF_PMI_T0;
191 else
192 cccr |= P4_CCCR_OVF_PMI_T1;
193
194 return cccr;
195}
196
197static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
198{
199 u32 escr = 0;
200
201 if (!p4_ht_thread(cpu)) {
202 if (!exclude_os)
203 escr |= P4_ESCR_T0_OS;
204 if (!exclude_usr)
205 escr |= P4_ESCR_T0_USR;
206 } else {
207 if (!exclude_os)
208 escr |= P4_ESCR_T1_OS;
209 if (!exclude_usr)
210 escr |= P4_ESCR_T1_USR;
211 }
212
213 return escr;
214}
215
216enum P4_EVENTS {
217 P4_EVENT_TC_DELIVER_MODE,
218 P4_EVENT_BPU_FETCH_REQUEST,
219 P4_EVENT_ITLB_REFERENCE,
220 P4_EVENT_MEMORY_CANCEL,
221 P4_EVENT_MEMORY_COMPLETE,
222 P4_EVENT_LOAD_PORT_REPLAY,
223 P4_EVENT_STORE_PORT_REPLAY,
224 P4_EVENT_MOB_LOAD_REPLAY,
225 P4_EVENT_PAGE_WALK_TYPE,
226 P4_EVENT_BSQ_CACHE_REFERENCE,
227 P4_EVENT_IOQ_ALLOCATION,
228 P4_EVENT_IOQ_ACTIVE_ENTRIES,
229 P4_EVENT_FSB_DATA_ACTIVITY,
230 P4_EVENT_BSQ_ALLOCATION,
231 P4_EVENT_BSQ_ACTIVE_ENTRIES,
232 P4_EVENT_SSE_INPUT_ASSIST,
233 P4_EVENT_PACKED_SP_UOP,
234 P4_EVENT_PACKED_DP_UOP,
235 P4_EVENT_SCALAR_SP_UOP,
236 P4_EVENT_SCALAR_DP_UOP,
237 P4_EVENT_64BIT_MMX_UOP,
238 P4_EVENT_128BIT_MMX_UOP,
239 P4_EVENT_X87_FP_UOP,
240 P4_EVENT_TC_MISC,
241 P4_EVENT_GLOBAL_POWER_EVENTS,
242 P4_EVENT_TC_MS_XFER,
243 P4_EVENT_UOP_QUEUE_WRITES,
244 P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE,
245 P4_EVENT_RETIRED_BRANCH_TYPE,
246 P4_EVENT_RESOURCE_STALL,
247 P4_EVENT_WC_BUFFER,
248 P4_EVENT_B2B_CYCLES,
249 P4_EVENT_BNR,
250 P4_EVENT_SNOOP,
251 P4_EVENT_RESPONSE,
252 P4_EVENT_FRONT_END_EVENT,
253 P4_EVENT_EXECUTION_EVENT,
254 P4_EVENT_REPLAY_EVENT,
255 P4_EVENT_INSTR_RETIRED,
256 P4_EVENT_UOPS_RETIRED,
257 P4_EVENT_UOP_TYPE,
258 P4_EVENT_BRANCH_RETIRED,
259 P4_EVENT_MISPRED_BRANCH_RETIRED,
260 P4_EVENT_X87_ASSIST,
261 P4_EVENT_MACHINE_CLEAR,
262 P4_EVENT_INSTR_COMPLETED,
263};
264
265#define P4_OPCODE(event) event##_OPCODE
266#define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0)
267#define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8)
268#define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel)
269
270/*
271 * Comments below the event represent ESCR restriction
272 * for this event and counter index per ESCR
273 *
274 * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early
275 * processor builds (family 0FH, models 01H-02H). These MSRs
276 * are not available on later versions, so that we don't use
277 * them completely
278 *
279 * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly
280 * working so that we should not use this CCCR and respective
281 * counter as result
282 */
283enum P4_EVENT_OPCODES {
284 P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01),
285 /*
286 * MSR_P4_TC_ESCR0: 4, 5
287 * MSR_P4_TC_ESCR1: 6, 7
288 */
289
290 P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00),
291 /*
292 * MSR_P4_BPU_ESCR0: 0, 1
293 * MSR_P4_BPU_ESCR1: 2, 3
294 */
295
296 P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03),
297 /*
298 * MSR_P4_ITLB_ESCR0: 0, 1
299 * MSR_P4_ITLB_ESCR1: 2, 3
300 */
301
302 P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05),
303 /*
304 * MSR_P4_DAC_ESCR0: 8, 9
305 * MSR_P4_DAC_ESCR1: 10, 11
306 */
307
308 P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02),
309 /*
310 * MSR_P4_SAAT_ESCR0: 8, 9
311 * MSR_P4_SAAT_ESCR1: 10, 11
312 */
313
314 P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02),
315 /*
316 * MSR_P4_SAAT_ESCR0: 8, 9
317 * MSR_P4_SAAT_ESCR1: 10, 11
318 */
319
320 P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02),
321 /*
322 * MSR_P4_SAAT_ESCR0: 8, 9
323 * MSR_P4_SAAT_ESCR1: 10, 11
324 */
325
326 P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02),
327 /*
328 * MSR_P4_MOB_ESCR0: 0, 1
329 * MSR_P4_MOB_ESCR1: 2, 3
330 */
331
332 P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04),
333 /*
334 * MSR_P4_PMH_ESCR0: 0, 1
335 * MSR_P4_PMH_ESCR1: 2, 3
336 */
337
338 P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07),
339 /*
340 * MSR_P4_BSU_ESCR0: 0, 1
341 * MSR_P4_BSU_ESCR1: 2, 3
342 */
343
344 P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06),
345 /*
346 * MSR_P4_FSB_ESCR0: 0, 1
347 * MSR_P4_FSB_ESCR1: 2, 3
348 */
349
350 P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06),
351 /*
352 * MSR_P4_FSB_ESCR1: 2, 3
353 */
354
355 P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06),
356 /*
357 * MSR_P4_FSB_ESCR0: 0, 1
358 * MSR_P4_FSB_ESCR1: 2, 3
359 */
360
361 P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07),
362 /*
363 * MSR_P4_BSU_ESCR0: 0, 1
364 */
365
366 P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07),
367 /*
368 * NOTE: no ESCR name in docs, it's guessed
369 * MSR_P4_BSU_ESCR1: 2, 3
370 */
371
372 P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01),
373 /*
374 * MSR_P4_FIRM_ESCR0: 8, 9
375 * MSR_P4_FIRM_ESCR1: 10, 11
376 */
377
378 P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01),
379 /*
380 * MSR_P4_FIRM_ESCR0: 8, 9
381 * MSR_P4_FIRM_ESCR1: 10, 11
382 */
383
384 P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01),
385 /*
386 * MSR_P4_FIRM_ESCR0: 8, 9
387 * MSR_P4_FIRM_ESCR1: 10, 11
388 */
389
390 P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01),
391 /*
392 * MSR_P4_FIRM_ESCR0: 8, 9
393 * MSR_P4_FIRM_ESCR1: 10, 11
394 */
395
396 P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01),
397 /*
398 * MSR_P4_FIRM_ESCR0: 8, 9
399 * MSR_P4_FIRM_ESCR1: 10, 11
400 */
401
402 P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01),
403 /*
404 * MSR_P4_FIRM_ESCR0: 8, 9
405 * MSR_P4_FIRM_ESCR1: 10, 11
406 */
407
408 P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01),
409 /*
410 * MSR_P4_FIRM_ESCR0: 8, 9
411 * MSR_P4_FIRM_ESCR1: 10, 11
412 */
413
414 P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01),
415 /*
416 * MSR_P4_FIRM_ESCR0: 8, 9
417 * MSR_P4_FIRM_ESCR1: 10, 11
418 */
419
420 P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01),
421 /*
422 * MSR_P4_TC_ESCR0: 4, 5
423 * MSR_P4_TC_ESCR1: 6, 7
424 */
425
426 P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06),
427 /*
428 * MSR_P4_FSB_ESCR0: 0, 1
429 * MSR_P4_FSB_ESCR1: 2, 3
430 */
431
432 P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00),
433 /*
434 * MSR_P4_MS_ESCR0: 4, 5
435 * MSR_P4_MS_ESCR1: 6, 7
436 */
437
438 P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00),
439 /*
440 * MSR_P4_MS_ESCR0: 4, 5
441 * MSR_P4_MS_ESCR1: 6, 7
442 */
443
444 P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02),
445 /*
446 * MSR_P4_TBPU_ESCR0: 4, 5
447 * MSR_P4_TBPU_ESCR1: 6, 7
448 */
449
450 P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02),
451 /*
452 * MSR_P4_TBPU_ESCR0: 4, 5
453 * MSR_P4_TBPU_ESCR1: 6, 7
454 */
455
456 P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01),
457 /*
458 * MSR_P4_ALF_ESCR0: 12, 13, 16
459 * MSR_P4_ALF_ESCR1: 14, 15, 17
460 */
461
462 P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05),
463 /*
464 * MSR_P4_DAC_ESCR0: 8, 9
465 * MSR_P4_DAC_ESCR1: 10, 11
466 */
467
468 P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03),
469 /*
470 * MSR_P4_FSB_ESCR0: 0, 1
471 * MSR_P4_FSB_ESCR1: 2, 3
472 */
473
474 P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03),
475 /*
476 * MSR_P4_FSB_ESCR0: 0, 1
477 * MSR_P4_FSB_ESCR1: 2, 3
478 */
479
480 P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03),
481 /*
482 * MSR_P4_FSB_ESCR0: 0, 1
483 * MSR_P4_FSB_ESCR1: 2, 3
484 */
485
486 P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03),
487 /*
488 * MSR_P4_FSB_ESCR0: 0, 1
489 * MSR_P4_FSB_ESCR1: 2, 3
490 */
491
492 P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05),
493 /*
494 * MSR_P4_CRU_ESCR2: 12, 13, 16
495 * MSR_P4_CRU_ESCR3: 14, 15, 17
496 */
497
498 P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05),
499 /*
500 * MSR_P4_CRU_ESCR2: 12, 13, 16
501 * MSR_P4_CRU_ESCR3: 14, 15, 17
502 */
503
504 P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05),
505 /*
506 * MSR_P4_CRU_ESCR2: 12, 13, 16
507 * MSR_P4_CRU_ESCR3: 14, 15, 17
508 */
509
510 P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04),
511 /*
512 * MSR_P4_CRU_ESCR0: 12, 13, 16
513 * MSR_P4_CRU_ESCR1: 14, 15, 17
514 */
515
516 P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04),
517 /*
518 * MSR_P4_CRU_ESCR0: 12, 13, 16
519 * MSR_P4_CRU_ESCR1: 14, 15, 17
520 */
521
522 P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02),
523 /*
524 * MSR_P4_RAT_ESCR0: 12, 13, 16
525 * MSR_P4_RAT_ESCR1: 14, 15, 17
526 */
527
528 P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05),
529 /*
530 * MSR_P4_CRU_ESCR2: 12, 13, 16
531 * MSR_P4_CRU_ESCR3: 14, 15, 17
532 */
533
534 P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04),
535 /*
536 * MSR_P4_CRU_ESCR0: 12, 13, 16
537 * MSR_P4_CRU_ESCR1: 14, 15, 17
538 */
539
540 P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05),
541 /*
542 * MSR_P4_CRU_ESCR2: 12, 13, 16
543 * MSR_P4_CRU_ESCR3: 14, 15, 17
544 */
545
546 P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05),
547 /*
548 * MSR_P4_CRU_ESCR2: 12, 13, 16
549 * MSR_P4_CRU_ESCR3: 14, 15, 17
550 */
551
552 P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04),
553 /*
554 * MSR_P4_CRU_ESCR0: 12, 13, 16
555 * MSR_P4_CRU_ESCR1: 14, 15, 17
556 */
557};
558
559/*
560 * a caller should use P4_ESCR_EMASK_NAME helper to
561 * pick the EventMask needed, for example
562 *
563 * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
564 */
565enum P4_ESCR_EMASKS {
566 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
567 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1),
568 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2),
569 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3),
570 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4),
571 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5),
572 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6),
573
574 P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0),
575
576 P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0),
577 P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1),
578 P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2),
579
580 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2),
581 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3),
582
583 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0),
584 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1),
585
586 P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1),
587
588 P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1),
589
590 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1),
591 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3),
592 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4),
593 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5),
594
595 P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0),
596 P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1),
597
598 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0),
599 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1),
600 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2),
601 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3),
602 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4),
603 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5),
604 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8),
605 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9),
606 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10),
607
608 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0),
609 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5),
610 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6),
611 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7),
612 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8),
613 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9),
614 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10),
615 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11),
616 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13),
617 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14),
618 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15),
619
620 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0),
621 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5),
622 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6),
623 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7),
624 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8),
625 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9),
626 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10),
627 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11),
628 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13),
629 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14),
630 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15),
631
632 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0),
633 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1),
634 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2),
635 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3),
636 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4),
637 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5),
638
639 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0),
640 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1),
641 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2),
642 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3),
643 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5),
644 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6),
645 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7),
646 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8),
647 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9),
648 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10),
649 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11),
650 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12),
651 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13),
652
653 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0),
654 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1),
655 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2),
656 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3),
657 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5),
658 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6),
659 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7),
660 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8),
661 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9),
662 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10),
663 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11),
664 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12),
665 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13),
666
667 P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15),
668
669 P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15),
670
671 P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15),
672
673 P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15),
674
675 P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15),
676
677 P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15),
678
679 P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15),
680
681 P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15),
682
683 P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4),
684
685 P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0),
686
687 P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0),
688
689 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0),
690 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1),
691 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2),
692
693 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1),
694 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2),
695 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3),
696 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4),
697
698 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1),
699 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2),
700 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3),
701 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4),
702
703 P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5),
704
705 P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0),
706 P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1),
707
708 P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0),
709 P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1),
710
711 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0),
712 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1),
713 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2),
714 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3),
715 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4),
716 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5),
717 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6),
718 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7),
719
720 P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0),
721 P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1),
722
723 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0),
724 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1),
725 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2),
726 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3),
727
728 P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0),
729 P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1),
730
731 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1),
732 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2),
733
734 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0),
735 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1),
736 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2),
737 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3),
738
739 P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0),
740
741 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0),
742 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1),
743 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2),
744 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3),
745 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4),
746
747 P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0),
748 P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1),
749 P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2),
750
751 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0),
752 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
753};
754
755/* P4 PEBS: stale for a while */
756#define P4_PEBS_METRIC_MASK 0x00001fffU
757#define P4_PEBS_UOB_TAG 0x01000000U
758#define P4_PEBS_ENABLE 0x02000000U
759
760/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
761#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001
762#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002
763#define P4_PEBS__dtlb_load_miss_retired 0x3000004
764#define P4_PEBS__dtlb_store_miss_retired 0x3000004
765#define P4_PEBS__dtlb_all_miss_retired 0x3000004
766#define P4_PEBS__tagged_mispred_branch 0x3018000
767#define P4_PEBS__mob_load_replay_retired 0x3000200
768#define P4_PEBS__split_load_retired 0x3000400
769#define P4_PEBS__split_store_retired 0x3000400
770
771#define P4_VERT__1stl_cache_load_miss_retired 0x0000001
772#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001
773#define P4_VERT__dtlb_load_miss_retired 0x0000001
774#define P4_VERT__dtlb_store_miss_retired 0x0000002
775#define P4_VERT__dtlb_all_miss_retired 0x0000003
776#define P4_VERT__tagged_mispred_branch 0x0000010
777#define P4_VERT__mob_load_replay_retired 0x0000001
778#define P4_VERT__split_load_retired 0x0000001
779#define P4_VERT__split_store_retired 0x0000002
780
781enum P4_CACHE_EVENTS {
782 P4_CACHE__NONE,
783
784 P4_CACHE__1stl_cache_load_miss_retired,
785 P4_CACHE__2ndl_cache_load_miss_retired,
786 P4_CACHE__dtlb_load_miss_retired,
787 P4_CACHE__dtlb_store_miss_retired,
788 P4_CACHE__itlb_reference_hit,
789 P4_CACHE__itlb_reference_miss,
790
791 P4_CACHE__MAX
792};
793
794#endif /* PERF_EVENT_P4_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b753ea59703a..32428b410b55 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,7 +21,6 @@ struct mm_struct;
21#include <asm/msr.h> 21#include <asm/msr.h>
22#include <asm/desc_defs.h> 22#include <asm/desc_defs.h>
23#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/ds.h>
25 24
26#include <linux/personality.h> 25#include <linux/personality.h>
27#include <linux/cpumask.h> 26#include <linux/cpumask.h>
@@ -29,6 +28,7 @@ struct mm_struct;
29#include <linux/threads.h> 28#include <linux/threads.h>
30#include <linux/math64.h> 29#include <linux/math64.h>
31#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/err.h>
32 32
33#define HBP_NUM 4 33#define HBP_NUM 4
34/* 34/*
@@ -473,10 +473,6 @@ struct thread_struct {
473 unsigned long iopl; 473 unsigned long iopl;
474 /* Max allowed port in the bitmap, in bytes: */ 474 /* Max allowed port in the bitmap, in bytes: */
475 unsigned io_bitmap_max; 475 unsigned io_bitmap_max;
476/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
477 unsigned long debugctlmsr;
478 /* Debug Store context; see asm/ds.h */
479 struct ds_context *ds_ctx;
480}; 476};
481 477
482static inline unsigned long native_get_debugreg(int regno) 478static inline unsigned long native_get_debugreg(int regno)
@@ -803,7 +799,7 @@ extern void cpu_init(void);
803 799
804static inline unsigned long get_debugctlmsr(void) 800static inline unsigned long get_debugctlmsr(void)
805{ 801{
806 unsigned long debugctlmsr = 0; 802 unsigned long debugctlmsr = 0;
807 803
808#ifndef CONFIG_X86_DEBUGCTLMSR 804#ifndef CONFIG_X86_DEBUGCTLMSR
809 if (boot_cpu_data.x86 < 6) 805 if (boot_cpu_data.x86 < 6)
@@ -811,21 +807,6 @@ static inline unsigned long get_debugctlmsr(void)
811#endif 807#endif
812 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); 808 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
813 809
814 return debugctlmsr;
815}
816
817static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
818{
819 u64 debugctlmsr = 0;
820 u32 val1, val2;
821
822#ifndef CONFIG_X86_DEBUGCTLMSR
823 if (boot_cpu_data.x86 < 6)
824 return 0;
825#endif
826 rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
827 debugctlmsr = val1 | ((u64)val2 << 32);
828
829 return debugctlmsr; 810 return debugctlmsr;
830} 811}
831 812
@@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
838 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); 819 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
839} 820}
840 821
841static inline void update_debugctlmsr_on_cpu(int cpu,
842 unsigned long debugctlmsr)
843{
844#ifndef CONFIG_X86_DEBUGCTLMSR
845 if (boot_cpu_data.x86 < 6)
846 return;
847#endif
848 wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
849 (u32)((u64)debugctlmsr),
850 (u32)((u64)debugctlmsr >> 32));
851}
852
853/* 822/*
854 * from system description table in BIOS. Mostly for MCA use, but 823 * from system description table in BIOS. Mostly for MCA use, but
855 * others may find it useful: 824 * others may find it useful:
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h
index 86723035a515..52b098a6eebb 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/asm/ptrace-abi.h
@@ -82,61 +82,6 @@
82 82
83#ifndef __ASSEMBLY__ 83#ifndef __ASSEMBLY__
84#include <linux/types.h> 84#include <linux/types.h>
85 85#endif
86/* configuration/status structure used in PTRACE_BTS_CONFIG and
87 PTRACE_BTS_STATUS commands.
88*/
89struct ptrace_bts_config {
90 /* requested or actual size of BTS buffer in bytes */
91 __u32 size;
92 /* bitmask of below flags */
93 __u32 flags;
94 /* buffer overflow signal */
95 __u32 signal;
96 /* actual size of bts_struct in bytes */
97 __u32 bts_size;
98};
99#endif /* __ASSEMBLY__ */
100
101#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
102#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
103#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
104 instead of wrapping around */
105#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */
106
107#define PTRACE_BTS_CONFIG 40
108/* Configure branch trace recording.
109 ADDR points to a struct ptrace_bts_config.
110 DATA gives the size of that buffer.
111 A new buffer is allocated, if requested in the flags.
112 An overflow signal may only be requested for new buffers.
113 Returns the number of bytes read.
114*/
115#define PTRACE_BTS_STATUS 41
116/* Return the current configuration in a struct ptrace_bts_config
117 pointed to by ADDR; DATA gives the size of that buffer.
118 Returns the number of bytes written.
119*/
120#define PTRACE_BTS_SIZE 42
121/* Return the number of available BTS records for draining.
122 DATA and ADDR are ignored.
123*/
124#define PTRACE_BTS_GET 43
125/* Get a single BTS record.
126 DATA defines the index into the BTS array, where 0 is the newest
127 entry, and higher indices refer to older entries.
128 ADDR is pointing to struct bts_struct (see asm/ds.h).
129*/
130#define PTRACE_BTS_CLEAR 44
131/* Clear the BTS buffer.
132 DATA and ADDR are ignored.
133*/
134#define PTRACE_BTS_DRAIN 45
135/* Read all available BTS records and clear the buffer.
136 ADDR points to an array of struct bts_struct.
137 DATA gives the size of that buffer.
138 BTS records are read from oldest to newest.
139 Returns number of BTS records drained.
140*/
141 86
142#endif /* _ASM_X86_PTRACE_ABI_H */ 87#endif /* _ASM_X86_PTRACE_ABI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 69a686a7dff0..78cd1ea94500 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
289extern int do_set_thread_area(struct task_struct *p, int idx, 289extern int do_set_thread_area(struct task_struct *p, int idx,
290 struct user_desc __user *info, int can_allocate); 290 struct user_desc __user *info, int can_allocate);
291 291
292#ifdef CONFIG_X86_PTRACE_BTS
293extern void ptrace_bts_untrace(struct task_struct *tsk);
294
295#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
296#endif /* CONFIG_X86_PTRACE_BTS */
297
298#endif /* __KERNEL__ */ 292#endif /* __KERNEL__ */
299 293
300#endif /* !__ASSEMBLY__ */ 294#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0d28901e969..d017ed5502e2 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,8 +92,7 @@ struct thread_info {
92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
93#define TIF_FREEZE 23 /* is freezing for suspend */ 93#define TIF_FREEZE 23 /* is freezing for suspend */
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ 96#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
98#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ 97#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
99 98
@@ -115,8 +114,7 @@ struct thread_info {
115#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 114#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
116#define _TIF_FREEZE (1 << TIF_FREEZE) 115#define _TIF_FREEZE (1 << TIF_FREEZE)
117#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
118#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 117#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
119#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
120#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) 118#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
121#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 119#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
122 120
@@ -147,7 +145,7 @@ struct thread_info {
147 145
148/* flags to check in __switch_to() */ 146/* flags to check in __switch_to() */
149#define _TIF_WORK_CTXSW \ 147#define _TIF_WORK_CTXSW \
150 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) 148 (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
151 149
152#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) 150#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
153#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 151#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4c58352209e0..e77b22083721 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
47obj-y += process.o 47obj-y += process.o
48obj-y += i387.o xsave.o 48obj-y += i387.o xsave.o
49obj-y += ptrace.o 49obj-y += ptrace.o
50obj-$(CONFIG_X86_DS) += ds.o
51obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
52obj-$(CONFIG_X86_32) += tls.o 50obj-$(CONFIG_X86_32) += tls.o
53obj-$(CONFIG_IA32_EMULATION) += tls.o 51obj-$(CONFIG_IA32_EMULATION) += tls.o
54obj-y += step.o 52obj-y += step.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7e1cca13af35..d72377c41c76 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -12,7 +12,6 @@
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15#include <asm/ds.h>
16#include <asm/bugs.h> 15#include <asm/bugs.h>
17#include <asm/cpu.h> 16#include <asm/cpu.h>
18 17
@@ -367,7 +366,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
367 set_cpu_cap(c, X86_FEATURE_BTS); 366 set_cpu_cap(c, X86_FEATURE_BTS);
368 if (!(l1 & (1<<12))) 367 if (!(l1 & (1<<12)))
369 set_cpu_cap(c, X86_FEATURE_PEBS); 368 set_cpu_cap(c, X86_FEATURE_PEBS);
370 ds_init_intel(c);
371 } 369 }
372 370
373 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) 371 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index db5bdc8addf8..2ea78abf69d9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,46 +31,51 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33 33
34static u64 perf_event_mask __read_mostly; 34#if 0
35#undef wrmsrl
36#define wrmsrl(msr, val) \
37do { \
38 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
39 (unsigned long)(val)); \
40 native_write_msr((msr), (u32)((u64)(val)), \
41 (u32)((u64)(val) >> 32)); \
42} while (0)
43#endif
35 44
36/* The maximal number of PEBS events: */ 45/*
37#define MAX_PEBS_EVENTS 4 46 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
47 */
48static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{
51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0;
54 struct page *page;
55 void *map;
56 int ret;
38 57
39/* The size of a BTS record in bytes: */ 58 do {
40#define BTS_RECORD_SIZE 24 59 ret = __get_user_pages_fast(addr, 1, 0, &page);
60 if (!ret)
61 break;
41 62
42/* The size of a per-cpu BTS buffer in bytes: */ 63 offset = addr & (PAGE_SIZE - 1);
43#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) 64 size = min(PAGE_SIZE - offset, n - len);
44 65
45/* The BTS overflow threshold in bytes from the end of the buffer: */ 66 map = kmap_atomic(page, type);
46#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) 67 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type);
69 put_page(page);
47 70
71 len += size;
72 to += size;
73 addr += size;
48 74
49/* 75 } while (len < n);
50 * Bits in the debugctlmsr controlling branch tracing.
51 */
52#define X86_DEBUGCTL_TR (1 << 6)
53#define X86_DEBUGCTL_BTS (1 << 7)
54#define X86_DEBUGCTL_BTINT (1 << 8)
55#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
56#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
57 76
58/* 77 return len;
59 * A debug store configuration. 78}
60 *
61 * We only support architectures that use 64bit fields.
62 */
63struct debug_store {
64 u64 bts_buffer_base;
65 u64 bts_index;
66 u64 bts_absolute_maximum;
67 u64 bts_interrupt_threshold;
68 u64 pebs_buffer_base;
69 u64 pebs_index;
70 u64 pebs_absolute_maximum;
71 u64 pebs_interrupt_threshold;
72 u64 pebs_event_reset[MAX_PEBS_EVENTS];
73};
74 79
75struct event_constraint { 80struct event_constraint {
76 union { 81 union {
@@ -89,18 +94,39 @@ struct amd_nb {
89 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
90}; 95};
91 96
97#define MAX_LBR_ENTRIES 16
98
92struct cpu_hw_events { 99struct cpu_hw_events {
100 /*
101 * Generic x86 PMC bits
102 */
93 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 103 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
94 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 104 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
95 unsigned long interrupts;
96 int enabled; 105 int enabled;
97 struct debug_store *ds;
98 106
99 int n_events; 107 int n_events;
100 int n_added; 108 int n_added;
101 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
102 u64 tags[X86_PMC_IDX_MAX]; 110 u64 tags[X86_PMC_IDX_MAX];
103 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
112
113 /*
114 * Intel DebugStore bits
115 */
116 struct debug_store *ds;
117 u64 pebs_enabled;
118
119 /*
120 * Intel LBR bits
121 */
122 int lbr_users;
123 void *lbr_context;
124 struct perf_branch_stack lbr_stack;
125 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
126
127 /*
128 * AMD specific bits
129 */
104 struct amd_nb *amd_nb; 130 struct amd_nb *amd_nb;
105}; 131};
106 132
@@ -114,11 +140,31 @@ struct cpu_hw_events {
114#define EVENT_CONSTRAINT(c, n, m) \ 140#define EVENT_CONSTRAINT(c, n, m) \
115 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 141 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
116 142
143/*
144 * Constraint on the Event code.
145 */
117#define INTEL_EVENT_CONSTRAINT(c, n) \ 146#define INTEL_EVENT_CONSTRAINT(c, n) \
118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 147 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
119 148
149/*
150 * Constraint on the Event code + UMask + fixed-mask
151 *
152 * filter mask to validate fixed counter events.
153 * the following filters disqualify for fixed counters:
154 * - inv
155 * - edge
156 * - cnt-mask
157 * The other filters are supported by fixed counters.
158 * The any-thread option is supported starting with v3.
159 */
120#define FIXED_EVENT_CONSTRAINT(c, n) \ 160#define FIXED_EVENT_CONSTRAINT(c, n) \
121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 161 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
162
163/*
164 * Constraint on the Event code + UMask
165 */
166#define PEBS_EVENT_CONSTRAINT(c, n) \
167 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
122 168
123#define EVENT_CONSTRAINT_END \ 169#define EVENT_CONSTRAINT_END \
124 EVENT_CONSTRAINT(0, 0, 0) 170 EVENT_CONSTRAINT(0, 0, 0)
@@ -126,32 +172,43 @@ struct cpu_hw_events {
126#define for_each_event_constraint(e, c) \ 172#define for_each_event_constraint(e, c) \
127 for ((e) = (c); (e)->cmask; (e)++) 173 for ((e) = (c); (e)->cmask; (e)++)
128 174
175union perf_capabilities {
176 struct {
177 u64 lbr_format : 6;
178 u64 pebs_trap : 1;
179 u64 pebs_arch_reg : 1;
180 u64 pebs_format : 4;
181 u64 smm_freeze : 1;
182 };
183 u64 capabilities;
184};
185
129/* 186/*
130 * struct x86_pmu - generic x86 pmu 187 * struct x86_pmu - generic x86 pmu
131 */ 188 */
132struct x86_pmu { 189struct x86_pmu {
190 /*
191 * Generic x86 PMC bits
192 */
133 const char *name; 193 const char *name;
134 int version; 194 int version;
135 int (*handle_irq)(struct pt_regs *); 195 int (*handle_irq)(struct pt_regs *);
136 void (*disable_all)(void); 196 void (*disable_all)(void);
137 void (*enable_all)(void); 197 void (*enable_all)(int added);
138 void (*enable)(struct perf_event *); 198 void (*enable)(struct perf_event *);
139 void (*disable)(struct perf_event *); 199 void (*disable)(struct perf_event *);
200 int (*hw_config)(struct perf_event *event);
201 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
140 unsigned eventsel; 202 unsigned eventsel;
141 unsigned perfctr; 203 unsigned perfctr;
142 u64 (*event_map)(int); 204 u64 (*event_map)(int);
143 u64 (*raw_event)(u64);
144 int max_events; 205 int max_events;
145 int num_events; 206 int num_counters;
146 int num_events_fixed; 207 int num_counters_fixed;
147 int event_bits; 208 int cntval_bits;
148 u64 event_mask; 209 u64 cntval_mask;
149 int apic; 210 int apic;
150 u64 max_period; 211 u64 max_period;
151 u64 intel_ctrl;
152 void (*enable_bts)(u64 config);
153 void (*disable_bts)(void);
154
155 struct event_constraint * 212 struct event_constraint *
156 (*get_event_constraints)(struct cpu_hw_events *cpuc, 213 (*get_event_constraints)(struct cpu_hw_events *cpuc,
157 struct perf_event *event); 214 struct perf_event *event);
@@ -159,11 +216,32 @@ struct x86_pmu {
159 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 216 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
160 struct perf_event *event); 217 struct perf_event *event);
161 struct event_constraint *event_constraints; 218 struct event_constraint *event_constraints;
219 void (*quirks)(void);
162 220
163 int (*cpu_prepare)(int cpu); 221 int (*cpu_prepare)(int cpu);
164 void (*cpu_starting)(int cpu); 222 void (*cpu_starting)(int cpu);
165 void (*cpu_dying)(int cpu); 223 void (*cpu_dying)(int cpu);
166 void (*cpu_dead)(int cpu); 224 void (*cpu_dead)(int cpu);
225
226 /*
227 * Intel Arch Perfmon v2+
228 */
229 u64 intel_ctrl;
230 union perf_capabilities intel_cap;
231
232 /*
233 * Intel DebugStore bits
234 */
235 int bts, pebs;
236 int pebs_record_size;
237 void (*drain_pebs)(struct pt_regs *regs);
238 struct event_constraint *pebs_constraints;
239
240 /*
241 * Intel LBR
242 */
243 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
244 int lbr_nr; /* hardware stack size */
167}; 245};
168 246
169static struct x86_pmu x86_pmu __read_mostly; 247static struct x86_pmu x86_pmu __read_mostly;
@@ -198,7 +276,7 @@ static u64
198x86_perf_event_update(struct perf_event *event) 276x86_perf_event_update(struct perf_event *event)
199{ 277{
200 struct hw_perf_event *hwc = &event->hw; 278 struct hw_perf_event *hwc = &event->hw;
201 int shift = 64 - x86_pmu.event_bits; 279 int shift = 64 - x86_pmu.cntval_bits;
202 u64 prev_raw_count, new_raw_count; 280 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx; 281 int idx = hwc->idx;
204 s64 delta; 282 s64 delta;
@@ -241,33 +319,32 @@ again:
241static atomic_t active_events; 319static atomic_t active_events;
242static DEFINE_MUTEX(pmc_reserve_mutex); 320static DEFINE_MUTEX(pmc_reserve_mutex);
243 321
322#ifdef CONFIG_X86_LOCAL_APIC
323
244static bool reserve_pmc_hardware(void) 324static bool reserve_pmc_hardware(void)
245{ 325{
246#ifdef CONFIG_X86_LOCAL_APIC
247 int i; 326 int i;
248 327
249 if (nmi_watchdog == NMI_LOCAL_APIC) 328 if (nmi_watchdog == NMI_LOCAL_APIC)
250 disable_lapic_nmi_watchdog(); 329 disable_lapic_nmi_watchdog();
251 330
252 for (i = 0; i < x86_pmu.num_events; i++) { 331 for (i = 0; i < x86_pmu.num_counters; i++) {
253 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 332 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
254 goto perfctr_fail; 333 goto perfctr_fail;
255 } 334 }
256 335
257 for (i = 0; i < x86_pmu.num_events; i++) { 336 for (i = 0; i < x86_pmu.num_counters; i++) {
258 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 337 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
259 goto eventsel_fail; 338 goto eventsel_fail;
260 } 339 }
261#endif
262 340
263 return true; 341 return true;
264 342
265#ifdef CONFIG_X86_LOCAL_APIC
266eventsel_fail: 343eventsel_fail:
267 for (i--; i >= 0; i--) 344 for (i--; i >= 0; i--)
268 release_evntsel_nmi(x86_pmu.eventsel + i); 345 release_evntsel_nmi(x86_pmu.eventsel + i);
269 346
270 i = x86_pmu.num_events; 347 i = x86_pmu.num_counters;
271 348
272perfctr_fail: 349perfctr_fail:
273 for (i--; i >= 0; i--) 350 for (i--; i >= 0; i--)
@@ -277,128 +354,36 @@ perfctr_fail:
277 enable_lapic_nmi_watchdog(); 354 enable_lapic_nmi_watchdog();
278 355
279 return false; 356 return false;
280#endif
281} 357}
282 358
283static void release_pmc_hardware(void) 359static void release_pmc_hardware(void)
284{ 360{
285#ifdef CONFIG_X86_LOCAL_APIC
286 int i; 361 int i;
287 362
288 for (i = 0; i < x86_pmu.num_events; i++) { 363 for (i = 0; i < x86_pmu.num_counters; i++) {
289 release_perfctr_nmi(x86_pmu.perfctr + i); 364 release_perfctr_nmi(x86_pmu.perfctr + i);
290 release_evntsel_nmi(x86_pmu.eventsel + i); 365 release_evntsel_nmi(x86_pmu.eventsel + i);
291 } 366 }
292 367
293 if (nmi_watchdog == NMI_LOCAL_APIC) 368 if (nmi_watchdog == NMI_LOCAL_APIC)
294 enable_lapic_nmi_watchdog(); 369 enable_lapic_nmi_watchdog();
295#endif
296}
297
298static inline bool bts_available(void)
299{
300 return x86_pmu.enable_bts != NULL;
301}
302
303static void init_debug_store_on_cpu(int cpu)
304{
305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306
307 if (!ds)
308 return;
309
310 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
311 (u32)((u64)(unsigned long)ds),
312 (u32)((u64)(unsigned long)ds >> 32));
313}
314
315static void fini_debug_store_on_cpu(int cpu)
316{
317 if (!per_cpu(cpu_hw_events, cpu).ds)
318 return;
319
320 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
321}
322
323static void release_bts_hardware(void)
324{
325 int cpu;
326
327 if (!bts_available())
328 return;
329
330 get_online_cpus();
331
332 for_each_online_cpu(cpu)
333 fini_debug_store_on_cpu(cpu);
334
335 for_each_possible_cpu(cpu) {
336 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
337
338 if (!ds)
339 continue;
340
341 per_cpu(cpu_hw_events, cpu).ds = NULL;
342
343 kfree((void *)(unsigned long)ds->bts_buffer_base);
344 kfree(ds);
345 }
346
347 put_online_cpus();
348} 370}
349 371
350static int reserve_bts_hardware(void) 372#else
351{
352 int cpu, err = 0;
353
354 if (!bts_available())
355 return 0;
356
357 get_online_cpus();
358
359 for_each_possible_cpu(cpu) {
360 struct debug_store *ds;
361 void *buffer;
362
363 err = -ENOMEM;
364 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
365 if (unlikely(!buffer))
366 break;
367
368 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
369 if (unlikely(!ds)) {
370 kfree(buffer);
371 break;
372 }
373
374 ds->bts_buffer_base = (u64)(unsigned long)buffer;
375 ds->bts_index = ds->bts_buffer_base;
376 ds->bts_absolute_maximum =
377 ds->bts_buffer_base + BTS_BUFFER_SIZE;
378 ds->bts_interrupt_threshold =
379 ds->bts_absolute_maximum - BTS_OVFL_TH;
380 373
381 per_cpu(cpu_hw_events, cpu).ds = ds; 374static bool reserve_pmc_hardware(void) { return true; }
382 err = 0; 375static void release_pmc_hardware(void) {}
383 }
384 376
385 if (err) 377#endif
386 release_bts_hardware();
387 else {
388 for_each_online_cpu(cpu)
389 init_debug_store_on_cpu(cpu);
390 }
391
392 put_online_cpus();
393 378
394 return err; 379static int reserve_ds_buffers(void);
395} 380static void release_ds_buffers(void);
396 381
397static void hw_perf_event_destroy(struct perf_event *event) 382static void hw_perf_event_destroy(struct perf_event *event)
398{ 383{
399 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 384 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
400 release_pmc_hardware(); 385 release_pmc_hardware();
401 release_bts_hardware(); 386 release_ds_buffers();
402 mutex_unlock(&pmc_reserve_mutex); 387 mutex_unlock(&pmc_reserve_mutex);
403 } 388 }
404} 389}
@@ -441,6 +426,28 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
441 return 0; 426 return 0;
442} 427}
443 428
429static int x86_pmu_hw_config(struct perf_event *event)
430{
431 /*
432 * Generate PMC IRQs:
433 * (keep 'enabled' bit clear for now)
434 */
435 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
436
437 /*
438 * Count user and OS events unless requested not to
439 */
440 if (!event->attr.exclude_user)
441 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
442 if (!event->attr.exclude_kernel)
443 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
444
445 if (event->attr.type == PERF_TYPE_RAW)
446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
447
448 return 0;
449}
450
444/* 451/*
445 * Setup the hardware configuration for a given attr_type 452 * Setup the hardware configuration for a given attr_type
446 */ 453 */
@@ -460,8 +467,11 @@ static int __hw_perf_event_init(struct perf_event *event)
460 if (atomic_read(&active_events) == 0) { 467 if (atomic_read(&active_events) == 0) {
461 if (!reserve_pmc_hardware()) 468 if (!reserve_pmc_hardware())
462 err = -EBUSY; 469 err = -EBUSY;
463 else 470 else {
464 err = reserve_bts_hardware(); 471 err = reserve_ds_buffers();
472 if (err)
473 release_pmc_hardware();
474 }
465 } 475 }
466 if (!err) 476 if (!err)
467 atomic_inc(&active_events); 477 atomic_inc(&active_events);
@@ -472,23 +482,14 @@ static int __hw_perf_event_init(struct perf_event *event)
472 482
473 event->destroy = hw_perf_event_destroy; 483 event->destroy = hw_perf_event_destroy;
474 484
475 /*
476 * Generate PMC IRQs:
477 * (keep 'enabled' bit clear for now)
478 */
479 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
480
481 hwc->idx = -1; 485 hwc->idx = -1;
482 hwc->last_cpu = -1; 486 hwc->last_cpu = -1;
483 hwc->last_tag = ~0ULL; 487 hwc->last_tag = ~0ULL;
484 488
485 /* 489 /* Processor specifics */
486 * Count user and OS events unless requested not to. 490 err = x86_pmu.hw_config(event);
487 */ 491 if (err)
488 if (!attr->exclude_user) 492 return err;
489 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
490 if (!attr->exclude_kernel)
491 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
492 493
493 if (!hwc->sample_period) { 494 if (!hwc->sample_period) {
494 hwc->sample_period = x86_pmu.max_period; 495 hwc->sample_period = x86_pmu.max_period;
@@ -505,16 +506,8 @@ static int __hw_perf_event_init(struct perf_event *event)
505 return -EOPNOTSUPP; 506 return -EOPNOTSUPP;
506 } 507 }
507 508
508 /* 509 if (attr->type == PERF_TYPE_RAW)
509 * Raw hw_event type provide the config in the hw_event structure
510 */
511 if (attr->type == PERF_TYPE_RAW) {
512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
516 return 0; 510 return 0;
517 }
518 511
519 if (attr->type == PERF_TYPE_HW_CACHE) 512 if (attr->type == PERF_TYPE_HW_CACHE)
520 return set_ext_hw_attr(hwc, attr); 513 return set_ext_hw_attr(hwc, attr);
@@ -539,11 +532,11 @@ static int __hw_perf_event_init(struct perf_event *event)
539 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 532 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
540 (hwc->sample_period == 1)) { 533 (hwc->sample_period == 1)) {
541 /* BTS is not supported by this architecture. */ 534 /* BTS is not supported by this architecture. */
542 if (!bts_available()) 535 if (!x86_pmu.bts)
543 return -EOPNOTSUPP; 536 return -EOPNOTSUPP;
544 537
545 /* BTS is currently only allowed for user-mode. */ 538 /* BTS is currently only allowed for user-mode. */
546 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 539 if (!attr->exclude_kernel)
547 return -EOPNOTSUPP; 540 return -EOPNOTSUPP;
548 } 541 }
549 542
@@ -557,7 +550,7 @@ static void x86_pmu_disable_all(void)
557 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 550 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
558 int idx; 551 int idx;
559 552
560 for (idx = 0; idx < x86_pmu.num_events; idx++) { 553 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
561 u64 val; 554 u64 val;
562 555
563 if (!test_bit(idx, cpuc->active_mask)) 556 if (!test_bit(idx, cpuc->active_mask))
@@ -587,12 +580,12 @@ void hw_perf_disable(void)
587 x86_pmu.disable_all(); 580 x86_pmu.disable_all();
588} 581}
589 582
590static void x86_pmu_enable_all(void) 583static void x86_pmu_enable_all(int added)
591{ 584{
592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 585 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
593 int idx; 586 int idx;
594 587
595 for (idx = 0; idx < x86_pmu.num_events; idx++) { 588 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
596 struct perf_event *event = cpuc->events[idx]; 589 struct perf_event *event = cpuc->events[idx];
597 u64 val; 590 u64 val;
598 591
@@ -667,14 +660,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
667 * assign events to counters starting with most 660 * assign events to counters starting with most
668 * constrained events. 661 * constrained events.
669 */ 662 */
670 wmax = x86_pmu.num_events; 663 wmax = x86_pmu.num_counters;
671 664
672 /* 665 /*
673 * when fixed event counters are present, 666 * when fixed event counters are present,
674 * wmax is incremented by 1 to account 667 * wmax is incremented by 1 to account
675 * for one more choice 668 * for one more choice
676 */ 669 */
677 if (x86_pmu.num_events_fixed) 670 if (x86_pmu.num_counters_fixed)
678 wmax++; 671 wmax++;
679 672
680 for (w = 1, num = n; num && w <= wmax; w++) { 673 for (w = 1, num = n; num && w <= wmax; w++) {
@@ -724,7 +717,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
724 struct perf_event *event; 717 struct perf_event *event;
725 int n, max_count; 718 int n, max_count;
726 719
727 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; 720 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
728 721
729 /* current number of events already accepted */ 722 /* current number of events already accepted */
730 n = cpuc->n_events; 723 n = cpuc->n_events;
@@ -795,7 +788,7 @@ void hw_perf_enable(void)
795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 788 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
796 struct perf_event *event; 789 struct perf_event *event;
797 struct hw_perf_event *hwc; 790 struct hw_perf_event *hwc;
798 int i; 791 int i, added = cpuc->n_added;
799 792
800 if (!x86_pmu_initialized()) 793 if (!x86_pmu_initialized())
801 return; 794 return;
@@ -847,19 +840,20 @@ void hw_perf_enable(void)
847 cpuc->enabled = 1; 840 cpuc->enabled = 1;
848 barrier(); 841 barrier();
849 842
850 x86_pmu.enable_all(); 843 x86_pmu.enable_all(added);
851} 844}
852 845
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) 846static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
854{ 847{
855 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 848 wrmsrl(hwc->config_base + hwc->idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); 849 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 850}
858 851
859static inline void x86_pmu_disable_event(struct perf_event *event) 852static inline void x86_pmu_disable_event(struct perf_event *event)
860{ 853{
861 struct hw_perf_event *hwc = &event->hw; 854 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); 855
856 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
863} 857}
864 858
865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 859static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -874,7 +868,7 @@ x86_perf_event_set_period(struct perf_event *event)
874 struct hw_perf_event *hwc = &event->hw; 868 struct hw_perf_event *hwc = &event->hw;
875 s64 left = atomic64_read(&hwc->period_left); 869 s64 left = atomic64_read(&hwc->period_left);
876 s64 period = hwc->sample_period; 870 s64 period = hwc->sample_period;
877 int err, ret = 0, idx = hwc->idx; 871 int ret = 0, idx = hwc->idx;
878 872
879 if (idx == X86_PMC_IDX_FIXED_BTS) 873 if (idx == X86_PMC_IDX_FIXED_BTS)
880 return 0; 874 return 0;
@@ -912,8 +906,8 @@ x86_perf_event_set_period(struct perf_event *event)
912 */ 906 */
913 atomic64_set(&hwc->prev_count, (u64)-left); 907 atomic64_set(&hwc->prev_count, (u64)-left);
914 908
915 err = checking_wrmsrl(hwc->event_base + idx, 909 wrmsrl(hwc->event_base + idx,
916 (u64)(-left) & x86_pmu.event_mask); 910 (u64)(-left) & x86_pmu.cntval_mask);
917 911
918 perf_event_update_userpage(event); 912 perf_event_update_userpage(event);
919 913
@@ -950,7 +944,7 @@ static int x86_pmu_enable(struct perf_event *event)
950 if (n < 0) 944 if (n < 0)
951 return n; 945 return n;
952 946
953 ret = x86_schedule_events(cpuc, n, assign); 947 ret = x86_pmu.schedule_events(cpuc, n, assign);
954 if (ret) 948 if (ret)
955 return ret; 949 return ret;
956 /* 950 /*
@@ -991,11 +985,12 @@ static void x86_pmu_unthrottle(struct perf_event *event)
991void perf_event_print_debug(void) 985void perf_event_print_debug(void)
992{ 986{
993 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 987 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
988 u64 pebs;
994 struct cpu_hw_events *cpuc; 989 struct cpu_hw_events *cpuc;
995 unsigned long flags; 990 unsigned long flags;
996 int cpu, idx; 991 int cpu, idx;
997 992
998 if (!x86_pmu.num_events) 993 if (!x86_pmu.num_counters)
999 return; 994 return;
1000 995
1001 local_irq_save(flags); 996 local_irq_save(flags);
@@ -1008,16 +1003,18 @@ void perf_event_print_debug(void)
1008 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1003 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1009 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 1004 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1010 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 1005 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1006 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1011 1007
1012 pr_info("\n"); 1008 pr_info("\n");
1013 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 1009 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1014 pr_info("CPU#%d: status: %016llx\n", cpu, status); 1010 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1015 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1011 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1016 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1012 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1013 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1017 } 1014 }
1018 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1015 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1019 1016
1020 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1017 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1021 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1018 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1022 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1019 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1023 1020
@@ -1030,7 +1027,7 @@ void perf_event_print_debug(void)
1030 pr_info("CPU#%d: gen-PMC%d left: %016llx\n", 1027 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1031 cpu, idx, prev_left); 1028 cpu, idx, prev_left);
1032 } 1029 }
1033 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 1030 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1034 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); 1031 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1035 1032
1036 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", 1033 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1095,7 +1092,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1095 1092
1096 cpuc = &__get_cpu_var(cpu_hw_events); 1093 cpuc = &__get_cpu_var(cpu_hw_events);
1097 1094
1098 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1095 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1099 if (!test_bit(idx, cpuc->active_mask)) 1096 if (!test_bit(idx, cpuc->active_mask))
1100 continue; 1097 continue;
1101 1098
@@ -1103,7 +1100,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1103 hwc = &event->hw; 1100 hwc = &event->hw;
1104 1101
1105 val = x86_perf_event_update(event); 1102 val = x86_perf_event_update(event);
1106 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1103 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1107 continue; 1104 continue;
1108 1105
1109 /* 1106 /*
@@ -1146,7 +1143,6 @@ void set_perf_event_pending(void)
1146 1143
1147void perf_events_lapic_init(void) 1144void perf_events_lapic_init(void)
1148{ 1145{
1149#ifdef CONFIG_X86_LOCAL_APIC
1150 if (!x86_pmu.apic || !x86_pmu_initialized()) 1146 if (!x86_pmu.apic || !x86_pmu_initialized())
1151 return; 1147 return;
1152 1148
@@ -1154,7 +1150,6 @@ void perf_events_lapic_init(void)
1154 * Always use NMI for PMU 1150 * Always use NMI for PMU
1155 */ 1151 */
1156 apic_write(APIC_LVTPC, APIC_DM_NMI); 1152 apic_write(APIC_LVTPC, APIC_DM_NMI);
1157#endif
1158} 1153}
1159 1154
1160static int __kprobes 1155static int __kprobes
@@ -1178,9 +1173,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1178 1173
1179 regs = args->regs; 1174 regs = args->regs;
1180 1175
1181#ifdef CONFIG_X86_LOCAL_APIC
1182 apic_write(APIC_LVTPC, APIC_DM_NMI); 1176 apic_write(APIC_LVTPC, APIC_DM_NMI);
1183#endif
1184 /* 1177 /*
1185 * Can't rely on the handled return value to say it was our NMI, two 1178 * Can't rely on the handled return value to say it was our NMI, two
1186 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1179 * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1274,12 +1267,15 @@ int hw_perf_group_sched_in(struct perf_event *leader,
1274 int assign[X86_PMC_IDX_MAX]; 1267 int assign[X86_PMC_IDX_MAX];
1275 int n0, n1, ret; 1268 int n0, n1, ret;
1276 1269
1270 if (!x86_pmu_initialized())
1271 return 0;
1272
1277 /* n0 = total number of events */ 1273 /* n0 = total number of events */
1278 n0 = collect_events(cpuc, leader, true); 1274 n0 = collect_events(cpuc, leader, true);
1279 if (n0 < 0) 1275 if (n0 < 0)
1280 return n0; 1276 return n0;
1281 1277
1282 ret = x86_schedule_events(cpuc, n0, assign); 1278 ret = x86_pmu.schedule_events(cpuc, n0, assign);
1283 if (ret) 1279 if (ret)
1284 return ret; 1280 return ret;
1285 1281
@@ -1329,6 +1325,9 @@ undo:
1329 1325
1330#include "perf_event_amd.c" 1326#include "perf_event_amd.c"
1331#include "perf_event_p6.c" 1327#include "perf_event_p6.c"
1328#include "perf_event_p4.c"
1329#include "perf_event_intel_lbr.c"
1330#include "perf_event_intel_ds.c"
1332#include "perf_event_intel.c" 1331#include "perf_event_intel.c"
1333 1332
1334static int __cpuinit 1333static int __cpuinit
@@ -1402,48 +1401,50 @@ void __init init_hw_perf_events(void)
1402 1401
1403 pr_cont("%s PMU driver.\n", x86_pmu.name); 1402 pr_cont("%s PMU driver.\n", x86_pmu.name);
1404 1403
1405 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1404 if (x86_pmu.quirks)
1405 x86_pmu.quirks();
1406
1407 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1406 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1408 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1407 x86_pmu.num_events, X86_PMC_MAX_GENERIC); 1409 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1408 x86_pmu.num_events = X86_PMC_MAX_GENERIC; 1410 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1409 } 1411 }
1410 perf_event_mask = (1 << x86_pmu.num_events) - 1; 1412 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1411 perf_max_events = x86_pmu.num_events; 1413 perf_max_events = x86_pmu.num_counters;
1412 1414
1413 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { 1415 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1414 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1416 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1415 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); 1417 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1416 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; 1418 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1417 } 1419 }
1418 1420
1419 perf_event_mask |= 1421 x86_pmu.intel_ctrl |=
1420 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; 1422 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1421 x86_pmu.intel_ctrl = perf_event_mask;
1422 1423
1423 perf_events_lapic_init(); 1424 perf_events_lapic_init();
1424 register_die_notifier(&perf_event_nmi_notifier); 1425 register_die_notifier(&perf_event_nmi_notifier);
1425 1426
1426 unconstrained = (struct event_constraint) 1427 unconstrained = (struct event_constraint)
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1428 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1428 0, x86_pmu.num_events); 1429 0, x86_pmu.num_counters);
1429 1430
1430 if (x86_pmu.event_constraints) { 1431 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) { 1432 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK) 1433 if (c->cmask != X86_RAW_EVENT_MASK)
1433 continue; 1434 continue;
1434 1435
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; 1436 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1436 c->weight += x86_pmu.num_events; 1437 c->weight += x86_pmu.num_counters;
1437 } 1438 }
1438 } 1439 }
1439 1440
1440 pr_info("... version: %d\n", x86_pmu.version); 1441 pr_info("... version: %d\n", x86_pmu.version);
1441 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1442 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1442 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1443 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1443 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); 1444 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1445 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1446 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1446 pr_info("... event mask: %016Lx\n", perf_event_mask); 1447 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1447 1448
1448 perf_cpu_notifier(x86_pmu_notifier); 1449 perf_cpu_notifier(x86_pmu_notifier);
1449} 1450}
@@ -1463,6 +1464,32 @@ static const struct pmu pmu = {
1463}; 1464};
1464 1465
1465/* 1466/*
1467 * validate that we can schedule this event
1468 */
1469static int validate_event(struct perf_event *event)
1470{
1471 struct cpu_hw_events *fake_cpuc;
1472 struct event_constraint *c;
1473 int ret = 0;
1474
1475 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1476 if (!fake_cpuc)
1477 return -ENOMEM;
1478
1479 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1480
1481 if (!c || !c->weight)
1482 ret = -ENOSPC;
1483
1484 if (x86_pmu.put_event_constraints)
1485 x86_pmu.put_event_constraints(fake_cpuc, event);
1486
1487 kfree(fake_cpuc);
1488
1489 return ret;
1490}
1491
1492/*
1466 * validate a single event group 1493 * validate a single event group
1467 * 1494 *
1468 * validation include: 1495 * validation include:
@@ -1502,7 +1529,7 @@ static int validate_group(struct perf_event *event)
1502 1529
1503 fake_cpuc->n_events = n; 1530 fake_cpuc->n_events = n;
1504 1531
1505 ret = x86_schedule_events(fake_cpuc, n, NULL); 1532 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1506 1533
1507out_free: 1534out_free:
1508 kfree(fake_cpuc); 1535 kfree(fake_cpuc);
@@ -1527,6 +1554,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1527 1554
1528 if (event->group_leader != event) 1555 if (event->group_leader != event)
1529 err = validate_group(event); 1556 err = validate_group(event);
1557 else
1558 err = validate_event(event);
1530 1559
1531 event->pmu = tmp; 1560 event->pmu = tmp;
1532 } 1561 }
@@ -1574,8 +1603,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1574{ 1603{
1575 struct perf_callchain_entry *entry = data; 1604 struct perf_callchain_entry *entry = data;
1576 1605
1577 if (reliable) 1606 callchain_store(entry, addr);
1578 callchain_store(entry, addr);
1579} 1607}
1580 1608
1581static const struct stacktrace_ops backtrace_ops = { 1609static const struct stacktrace_ops backtrace_ops = {
@@ -1597,41 +1625,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1597 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1625 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1598} 1626}
1599 1627
1600/*
1601 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1602 */
1603static unsigned long
1604copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1605{
1606 unsigned long offset, addr = (unsigned long)from;
1607 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1608 unsigned long size, len = 0;
1609 struct page *page;
1610 void *map;
1611 int ret;
1612
1613 do {
1614 ret = __get_user_pages_fast(addr, 1, 0, &page);
1615 if (!ret)
1616 break;
1617
1618 offset = addr & (PAGE_SIZE - 1);
1619 size = min(PAGE_SIZE - offset, n - len);
1620
1621 map = kmap_atomic(page, type);
1622 memcpy(to, map+offset, size);
1623 kunmap_atomic(map, type);
1624 put_page(page);
1625
1626 len += size;
1627 to += size;
1628 addr += size;
1629
1630 } while (len < n);
1631
1632 return len;
1633}
1634
1635#ifdef CONFIG_COMPAT 1628#ifdef CONFIG_COMPAT
1636static inline int 1629static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1630perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -1727,6 +1720,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1727{ 1720{
1728 struct perf_callchain_entry *entry; 1721 struct perf_callchain_entry *entry;
1729 1722
1723 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1724 /* TODO: We don't support guest os callchain now */
1725 return NULL;
1726 }
1727
1730 if (in_nmi()) 1728 if (in_nmi())
1731 entry = &__get_cpu_var(pmc_nmi_entry); 1729 entry = &__get_cpu_var(pmc_nmi_entry);
1732 else 1730 else
@@ -1750,3 +1748,29 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1750 regs->cs = __KERNEL_CS; 1748 regs->cs = __KERNEL_CS;
1751 local_save_flags(regs->flags); 1749 local_save_flags(regs->flags);
1752} 1750}
1751
1752unsigned long perf_instruction_pointer(struct pt_regs *regs)
1753{
1754 unsigned long ip;
1755 if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1756 ip = perf_guest_cbs->get_guest_ip();
1757 else
1758 ip = instruction_pointer(regs);
1759 return ip;
1760}
1761
1762unsigned long perf_misc_flags(struct pt_regs *regs)
1763{
1764 int misc = 0;
1765 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1766 misc |= perf_guest_cbs->is_user_mode() ?
1767 PERF_RECORD_MISC_GUEST_USER :
1768 PERF_RECORD_MISC_GUEST_KERNEL;
1769 } else
1770 misc |= user_mode(regs) ? PERF_RECORD_MISC_USER :
1771 PERF_RECORD_MISC_KERNEL;
1772 if (regs->flags & PERF_EFLAGS_EXACT)
1773 misc |= PERF_RECORD_MISC_EXACT;
1774
1775 return misc;
1776}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index db6f7d4056e1..611df11ba15e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -2,7 +2,7 @@
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock); 3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4 4
5static __initconst u64 amd_hw_cache_event_ids 5static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event)
111 return amd_perfmon_event_map[hw_event]; 111 return amd_perfmon_event_map[hw_event];
112} 112}
113 113
114static u64 amd_pmu_raw_event(u64 hw_event) 114static int amd_pmu_hw_config(struct perf_event *event)
115{ 115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL 116 int ret = x86_pmu_hw_config(event);
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL 117
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL 118 if (ret)
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL 119 return ret;
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL 120
121 121 if (event->attr.type != PERF_TYPE_RAW)
122#define K7_EVNTSEL_MASK \ 122 return 0;
123 (K7_EVNTSEL_EVENT_MASK | \ 123
124 K7_EVNTSEL_UNIT_MASK | \ 124 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
125 K7_EVNTSEL_EDGE_MASK | \ 125
126 K7_EVNTSEL_INV_MASK | \ 126 return 0;
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130} 127}
131 128
132/* 129/*
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
165 * be removed on one CPU at a time AND PMU is disabled 162 * be removed on one CPU at a time AND PMU is disabled
166 * when we come here 163 * when we come here
167 */ 164 */
168 for (i = 0; i < x86_pmu.num_events; i++) { 165 for (i = 0; i < x86_pmu.num_counters; i++) {
169 if (nb->owners[i] == event) { 166 if (nb->owners[i] == event) {
170 cmpxchg(nb->owners+i, event, NULL); 167 cmpxchg(nb->owners+i, event, NULL);
171 break; 168 break;
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
215 struct hw_perf_event *hwc = &event->hw; 212 struct hw_perf_event *hwc = &event->hw;
216 struct amd_nb *nb = cpuc->amd_nb; 213 struct amd_nb *nb = cpuc->amd_nb;
217 struct perf_event *old = NULL; 214 struct perf_event *old = NULL;
218 int max = x86_pmu.num_events; 215 int max = x86_pmu.num_counters;
219 int i, j, k = -1; 216 int i, j, k = -1;
220 217
221 /* 218 /*
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
293 /* 290 /*
294 * initialize all possible NB constraints 291 * initialize all possible NB constraints
295 */ 292 */
296 for (i = 0; i < x86_pmu.num_events; i++) { 293 for (i = 0; i < x86_pmu.num_counters; i++) {
297 __set_bit(i, nb->event_constraints[i].idxmsk); 294 __set_bit(i, nb->event_constraints[i].idxmsk);
298 nb->event_constraints[i].weight = 1; 295 nb->event_constraints[i].weight = 1;
299 } 296 }
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu)
371 raw_spin_unlock(&amd_nb_lock); 368 raw_spin_unlock(&amd_nb_lock);
372} 369}
373 370
374static __initconst struct x86_pmu amd_pmu = { 371static __initconst const struct x86_pmu amd_pmu = {
375 .name = "AMD", 372 .name = "AMD",
376 .handle_irq = x86_pmu_handle_irq, 373 .handle_irq = x86_pmu_handle_irq,
377 .disable_all = x86_pmu_disable_all, 374 .disable_all = x86_pmu_disable_all,
378 .enable_all = x86_pmu_enable_all, 375 .enable_all = x86_pmu_enable_all,
379 .enable = x86_pmu_enable_event, 376 .enable = x86_pmu_enable_event,
380 .disable = x86_pmu_disable_event, 377 .disable = x86_pmu_disable_event,
378 .hw_config = amd_pmu_hw_config,
379 .schedule_events = x86_schedule_events,
381 .eventsel = MSR_K7_EVNTSEL0, 380 .eventsel = MSR_K7_EVNTSEL0,
382 .perfctr = MSR_K7_PERFCTR0, 381 .perfctr = MSR_K7_PERFCTR0,
383 .event_map = amd_pmu_event_map, 382 .event_map = amd_pmu_event_map,
384 .raw_event = amd_pmu_raw_event,
385 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 383 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
386 .num_events = 4, 384 .num_counters = 4,
387 .event_bits = 48, 385 .cntval_bits = 48,
388 .event_mask = (1ULL << 48) - 1, 386 .cntval_mask = (1ULL << 48) - 1,
389 .apic = 1, 387 .apic = 1,
390 /* use highest bit to detect overflow */ 388 /* use highest bit to detect overflow */
391 .max_period = (1ULL << 47) - 1, 389 .max_period = (1ULL << 47) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9c794ac87837..a099df96f916 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event)
88 return intel_perfmon_event_map[hw_event]; 88 return intel_perfmon_event_map[hw_event];
89} 89}
90 90
91static __initconst u64 westmere_hw_cache_event_ids 91static __initconst const u64 westmere_hw_cache_event_ids
92 [PERF_COUNT_HW_CACHE_MAX] 92 [PERF_COUNT_HW_CACHE_MAX]
93 [PERF_COUNT_HW_CACHE_OP_MAX] 93 [PERF_COUNT_HW_CACHE_OP_MAX]
94 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 94 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids
179 }, 179 },
180}; 180};
181 181
182static __initconst u64 nehalem_hw_cache_event_ids 182static __initconst const u64 nehalem_hw_cache_event_ids
183 [PERF_COUNT_HW_CACHE_MAX] 183 [PERF_COUNT_HW_CACHE_MAX]
184 [PERF_COUNT_HW_CACHE_OP_MAX] 184 [PERF_COUNT_HW_CACHE_OP_MAX]
185 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 185 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids
270 }, 270 },
271}; 271};
272 272
273static __initconst u64 core2_hw_cache_event_ids 273static __initconst const u64 core2_hw_cache_event_ids
274 [PERF_COUNT_HW_CACHE_MAX] 274 [PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX] 275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 276 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids
361 }, 361 },
362}; 362};
363 363
364static __initconst u64 atom_hw_cache_event_ids 364static __initconst const u64 atom_hw_cache_event_ids
365 [PERF_COUNT_HW_CACHE_MAX] 365 [PERF_COUNT_HW_CACHE_MAX]
366 [PERF_COUNT_HW_CACHE_OP_MAX] 366 [PERF_COUNT_HW_CACHE_OP_MAX]
367 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 367 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids
452 }, 452 },
453}; 453};
454 454
455static u64 intel_pmu_raw_event(u64 hw_event)
456{
457#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
458#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
459#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
460#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
461#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
462
463#define CORE_EVNTSEL_MASK \
464 (INTEL_ARCH_EVTSEL_MASK | \
465 INTEL_ARCH_UNIT_MASK | \
466 INTEL_ARCH_EDGE_MASK | \
467 INTEL_ARCH_INV_MASK | \
468 INTEL_ARCH_CNT_MASK)
469
470 return hw_event & CORE_EVNTSEL_MASK;
471}
472
473static void intel_pmu_enable_bts(u64 config)
474{
475 unsigned long debugctlmsr;
476
477 debugctlmsr = get_debugctlmsr();
478
479 debugctlmsr |= X86_DEBUGCTL_TR;
480 debugctlmsr |= X86_DEBUGCTL_BTS;
481 debugctlmsr |= X86_DEBUGCTL_BTINT;
482
483 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
484 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
485
486 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
487 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
488
489 update_debugctlmsr(debugctlmsr);
490}
491
492static void intel_pmu_disable_bts(void)
493{
494 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
495 unsigned long debugctlmsr;
496
497 if (!cpuc->ds)
498 return;
499
500 debugctlmsr = get_debugctlmsr();
501
502 debugctlmsr &=
503 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
504 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
505
506 update_debugctlmsr(debugctlmsr);
507}
508
509static void intel_pmu_disable_all(void) 455static void intel_pmu_disable_all(void)
510{ 456{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void)
514 460
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 461 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
516 intel_pmu_disable_bts(); 462 intel_pmu_disable_bts();
463
464 intel_pmu_pebs_disable_all();
465 intel_pmu_lbr_disable_all();
517} 466}
518 467
519static void intel_pmu_enable_all(void) 468static void intel_pmu_enable_all(int added)
520{ 469{
521 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 470 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
522 471
472 intel_pmu_pebs_enable_all();
473 intel_pmu_lbr_enable_all();
523 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 474 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
524 475
525 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 476 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -533,6 +484,41 @@ static void intel_pmu_enable_all(void)
533 } 484 }
534} 485}
535 486
487/*
488 * Workaround for:
489 * Intel Errata AAK100 (model 26)
490 * Intel Errata AAP53 (model 30)
491 * Intel Errata BD53 (model 44)
492 *
493 * These chips need to be 'reset' when adding counters by programming
494 * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
495 * either in sequence on the same PMC or on different PMCs.
496 */
497static void intel_pmu_nhm_enable_all(int added)
498{
499 if (added) {
500 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
501 int i;
502
503 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
504 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
505 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
506
507 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
508 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
509
510 for (i = 0; i < 3; i++) {
511 struct perf_event *event = cpuc->events[i];
512
513 if (!event)
514 continue;
515
516 __x86_pmu_enable_event(&event->hw);
517 }
518 }
519 intel_pmu_enable_all(added);
520}
521
536static inline u64 intel_pmu_get_status(void) 522static inline u64 intel_pmu_get_status(void)
537{ 523{
538 u64 status; 524 u64 status;
@@ -547,8 +533,7 @@ static inline void intel_pmu_ack_status(u64 ack)
547 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 533 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
548} 534}
549 535
550static inline void 536static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
551intel_pmu_disable_fixed(struct hw_perf_event *hwc)
552{ 537{
553 int idx = hwc->idx - X86_PMC_IDX_FIXED; 538 int idx = hwc->idx - X86_PMC_IDX_FIXED;
554 u64 ctrl_val, mask; 539 u64 ctrl_val, mask;
@@ -557,71 +542,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc)
557 542
558 rdmsrl(hwc->config_base, ctrl_val); 543 rdmsrl(hwc->config_base, ctrl_val);
559 ctrl_val &= ~mask; 544 ctrl_val &= ~mask;
560 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 545 wrmsrl(hwc->config_base, ctrl_val);
561}
562
563static void intel_pmu_drain_bts_buffer(void)
564{
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct debug_store *ds = cpuc->ds;
567 struct bts_record {
568 u64 from;
569 u64 to;
570 u64 flags;
571 };
572 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
573 struct bts_record *at, *top;
574 struct perf_output_handle handle;
575 struct perf_event_header header;
576 struct perf_sample_data data;
577 struct pt_regs regs;
578
579 if (!event)
580 return;
581
582 if (!ds)
583 return;
584
585 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
586 top = (struct bts_record *)(unsigned long)ds->bts_index;
587
588 if (top <= at)
589 return;
590
591 ds->bts_index = ds->bts_buffer_base;
592
593 perf_sample_data_init(&data, 0);
594
595 data.period = event->hw.last_period;
596 regs.ip = 0;
597
598 /*
599 * Prepare a generic sample, i.e. fill in the invariant fields.
600 * We will overwrite the from and to address before we output
601 * the sample.
602 */
603 perf_prepare_sample(&header, &data, event, &regs);
604
605 if (perf_output_begin(&handle, event,
606 header.size * (top - at), 1, 1))
607 return;
608
609 for (; at < top; at++) {
610 data.ip = at->from;
611 data.addr = at->to;
612
613 perf_output_sample(&handle, &header, &data, event);
614 }
615
616 perf_output_end(&handle);
617
618 /* There's new data available. */
619 event->hw.interrupts++;
620 event->pending_kill = POLL_IN;
621} 546}
622 547
623static inline void 548static void intel_pmu_disable_event(struct perf_event *event)
624intel_pmu_disable_event(struct perf_event *event)
625{ 549{
626 struct hw_perf_event *hwc = &event->hw; 550 struct hw_perf_event *hwc = &event->hw;
627 551
@@ -637,14 +561,15 @@ intel_pmu_disable_event(struct perf_event *event)
637 } 561 }
638 562
639 x86_pmu_disable_event(event); 563 x86_pmu_disable_event(event);
564
565 if (unlikely(event->attr.precise))
566 intel_pmu_pebs_disable(event);
640} 567}
641 568
642static inline void 569static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
643intel_pmu_enable_fixed(struct hw_perf_event *hwc)
644{ 570{
645 int idx = hwc->idx - X86_PMC_IDX_FIXED; 571 int idx = hwc->idx - X86_PMC_IDX_FIXED;
646 u64 ctrl_val, bits, mask; 572 u64 ctrl_val, bits, mask;
647 int err;
648 573
649 /* 574 /*
650 * Enable IRQ generation (0x8), 575 * Enable IRQ generation (0x8),
@@ -669,7 +594,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc)
669 rdmsrl(hwc->config_base, ctrl_val); 594 rdmsrl(hwc->config_base, ctrl_val);
670 ctrl_val &= ~mask; 595 ctrl_val &= ~mask;
671 ctrl_val |= bits; 596 ctrl_val |= bits;
672 err = checking_wrmsrl(hwc->config_base, ctrl_val); 597 wrmsrl(hwc->config_base, ctrl_val);
673} 598}
674 599
675static void intel_pmu_enable_event(struct perf_event *event) 600static void intel_pmu_enable_event(struct perf_event *event)
@@ -689,6 +614,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
689 return; 614 return;
690 } 615 }
691 616
617 if (unlikely(event->attr.precise))
618 intel_pmu_pebs_enable(event);
619
692 __x86_pmu_enable_event(hwc); 620 __x86_pmu_enable_event(hwc);
693} 621}
694 622
@@ -708,20 +636,20 @@ static void intel_pmu_reset(void)
708 unsigned long flags; 636 unsigned long flags;
709 int idx; 637 int idx;
710 638
711 if (!x86_pmu.num_events) 639 if (!x86_pmu.num_counters)
712 return; 640 return;
713 641
714 local_irq_save(flags); 642 local_irq_save(flags);
715 643
716 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 644 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
717 645
718 for (idx = 0; idx < x86_pmu.num_events; idx++) { 646 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
719 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 647 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
720 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 648 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
721 } 649 }
722 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 650 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
723 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 651 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
724 } 652
725 if (ds) 653 if (ds)
726 ds->bts_index = ds->bts_buffer_base; 654 ds->bts_index = ds->bts_buffer_base;
727 655
@@ -747,7 +675,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
747 intel_pmu_drain_bts_buffer(); 675 intel_pmu_drain_bts_buffer();
748 status = intel_pmu_get_status(); 676 status = intel_pmu_get_status();
749 if (!status) { 677 if (!status) {
750 intel_pmu_enable_all(); 678 intel_pmu_enable_all(0);
751 return 0; 679 return 0;
752 } 680 }
753 681
@@ -762,6 +690,15 @@ again:
762 690
763 inc_irq_stat(apic_perf_irqs); 691 inc_irq_stat(apic_perf_irqs);
764 ack = status; 692 ack = status;
693
694 intel_pmu_lbr_read();
695
696 /*
697 * PEBS overflow sets bit 62 in the global status register
698 */
699 if (__test_and_clear_bit(62, (unsigned long *)&status))
700 x86_pmu.drain_pebs(regs);
701
765 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 702 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
766 struct perf_event *event = cpuc->events[bit]; 703 struct perf_event *event = cpuc->events[bit];
767 704
@@ -787,26 +724,22 @@ again:
787 goto again; 724 goto again;
788 725
789done: 726done:
790 intel_pmu_enable_all(); 727 intel_pmu_enable_all(0);
791 return 1; 728 return 1;
792} 729}
793 730
794static struct event_constraint bts_constraint =
795 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
796
797static struct event_constraint * 731static struct event_constraint *
798intel_special_constraints(struct perf_event *event) 732intel_bts_constraints(struct perf_event *event)
799{ 733{
800 unsigned int hw_event; 734 struct hw_perf_event *hwc = &event->hw;
801 735 unsigned int hw_event, bts_event;
802 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
803 736
804 if (unlikely((hw_event == 737 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
805 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && 738 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
806 (event->hw.sample_period == 1))) {
807 739
740 if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
808 return &bts_constraint; 741 return &bts_constraint;
809 } 742
810 return NULL; 743 return NULL;
811} 744}
812 745
@@ -815,24 +748,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
815{ 748{
816 struct event_constraint *c; 749 struct event_constraint *c;
817 750
818 c = intel_special_constraints(event); 751 c = intel_bts_constraints(event);
752 if (c)
753 return c;
754
755 c = intel_pebs_constraints(event);
819 if (c) 756 if (c)
820 return c; 757 return c;
821 758
822 return x86_get_event_constraints(cpuc, event); 759 return x86_get_event_constraints(cpuc, event);
823} 760}
824 761
825static __initconst struct x86_pmu core_pmu = { 762static int intel_pmu_hw_config(struct perf_event *event)
763{
764 int ret = x86_pmu_hw_config(event);
765
766 if (ret)
767 return ret;
768
769 if (event->attr.type != PERF_TYPE_RAW)
770 return 0;
771
772 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
773 return 0;
774
775 if (x86_pmu.version < 3)
776 return -EINVAL;
777
778 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
779 return -EACCES;
780
781 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
782
783 return 0;
784}
785
786static __initconst const struct x86_pmu core_pmu = {
826 .name = "core", 787 .name = "core",
827 .handle_irq = x86_pmu_handle_irq, 788 .handle_irq = x86_pmu_handle_irq,
828 .disable_all = x86_pmu_disable_all, 789 .disable_all = x86_pmu_disable_all,
829 .enable_all = x86_pmu_enable_all, 790 .enable_all = x86_pmu_enable_all,
830 .enable = x86_pmu_enable_event, 791 .enable = x86_pmu_enable_event,
831 .disable = x86_pmu_disable_event, 792 .disable = x86_pmu_disable_event,
793 .hw_config = x86_pmu_hw_config,
794 .schedule_events = x86_schedule_events,
832 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 795 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
833 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 796 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
834 .event_map = intel_pmu_event_map, 797 .event_map = intel_pmu_event_map,
835 .raw_event = intel_pmu_raw_event,
836 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 798 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
837 .apic = 1, 799 .apic = 1,
838 /* 800 /*
@@ -845,17 +807,32 @@ static __initconst struct x86_pmu core_pmu = {
845 .event_constraints = intel_core_event_constraints, 807 .event_constraints = intel_core_event_constraints,
846}; 808};
847 809
848static __initconst struct x86_pmu intel_pmu = { 810static void intel_pmu_cpu_starting(int cpu)
811{
812 init_debug_store_on_cpu(cpu);
813 /*
814 * Deal with CPUs that don't clear their LBRs on power-up.
815 */
816 intel_pmu_lbr_reset();
817}
818
819static void intel_pmu_cpu_dying(int cpu)
820{
821 fini_debug_store_on_cpu(cpu);
822}
823
824static __initconst const struct x86_pmu intel_pmu = {
849 .name = "Intel", 825 .name = "Intel",
850 .handle_irq = intel_pmu_handle_irq, 826 .handle_irq = intel_pmu_handle_irq,
851 .disable_all = intel_pmu_disable_all, 827 .disable_all = intel_pmu_disable_all,
852 .enable_all = intel_pmu_enable_all, 828 .enable_all = intel_pmu_enable_all,
853 .enable = intel_pmu_enable_event, 829 .enable = intel_pmu_enable_event,
854 .disable = intel_pmu_disable_event, 830 .disable = intel_pmu_disable_event,
831 .hw_config = intel_pmu_hw_config,
832 .schedule_events = x86_schedule_events,
855 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 833 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
856 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 834 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
857 .event_map = intel_pmu_event_map, 835 .event_map = intel_pmu_event_map,
858 .raw_event = intel_pmu_raw_event,
859 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 836 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
860 .apic = 1, 837 .apic = 1,
861 /* 838 /*
@@ -864,14 +841,38 @@ static __initconst struct x86_pmu intel_pmu = {
864 * the generic event period: 841 * the generic event period:
865 */ 842 */
866 .max_period = (1ULL << 31) - 1, 843 .max_period = (1ULL << 31) - 1,
867 .enable_bts = intel_pmu_enable_bts,
868 .disable_bts = intel_pmu_disable_bts,
869 .get_event_constraints = intel_get_event_constraints, 844 .get_event_constraints = intel_get_event_constraints,
870 845
871 .cpu_starting = init_debug_store_on_cpu, 846 .cpu_starting = intel_pmu_cpu_starting,
872 .cpu_dying = fini_debug_store_on_cpu, 847 .cpu_dying = intel_pmu_cpu_dying,
873}; 848};
874 849
850static void intel_clovertown_quirks(void)
851{
852 /*
853 * PEBS is unreliable due to:
854 *
855 * AJ67 - PEBS may experience CPL leaks
856 * AJ68 - PEBS PMI may be delayed by one event
857 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
858 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
859 *
860 * AJ67 could be worked around by restricting the OS/USR flags.
861 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
862 *
863 * AJ106 could possibly be worked around by not allowing LBR
864 * usage from PEBS, including the fixup.
865 * AJ68 could possibly be worked around by always programming
866 * a pebs_event_reset[0] value and coping with the lost events.
867 *
868 * But taken together it might just make sense to not enable PEBS on
869 * these chips.
870 */
871 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
872 x86_pmu.pebs = 0;
873 x86_pmu.pebs_constraints = NULL;
874}
875
875static __init int intel_pmu_init(void) 876static __init int intel_pmu_init(void)
876{ 877{
877 union cpuid10_edx edx; 878 union cpuid10_edx edx;
@@ -881,12 +882,13 @@ static __init int intel_pmu_init(void)
881 int version; 882 int version;
882 883
883 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 884 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
884 /* check for P6 processor family */ 885 switch (boot_cpu_data.x86) {
885 if (boot_cpu_data.x86 == 6) { 886 case 0x6:
886 return p6_pmu_init(); 887 return p6_pmu_init();
887 } else { 888 case 0xf:
889 return p4_pmu_init();
890 }
888 return -ENODEV; 891 return -ENODEV;
889 }
890 } 892 }
891 893
892 /* 894 /*
@@ -904,16 +906,28 @@ static __init int intel_pmu_init(void)
904 x86_pmu = intel_pmu; 906 x86_pmu = intel_pmu;
905 907
906 x86_pmu.version = version; 908 x86_pmu.version = version;
907 x86_pmu.num_events = eax.split.num_events; 909 x86_pmu.num_counters = eax.split.num_counters;
908 x86_pmu.event_bits = eax.split.bit_width; 910 x86_pmu.cntval_bits = eax.split.bit_width;
909 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; 911 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
910 912
911 /* 913 /*
912 * Quirk: v2 perfmon does not report fixed-purpose events, so 914 * Quirk: v2 perfmon does not report fixed-purpose events, so
913 * assume at least 3 events: 915 * assume at least 3 events:
914 */ 916 */
915 if (version > 1) 917 if (version > 1)
916 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); 918 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
919
920 /*
921 * v2 and above have a perf capabilities MSR
922 */
923 if (version > 1) {
924 u64 capabilities;
925
926 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
927 x86_pmu.intel_cap.capabilities = capabilities;
928 }
929
930 intel_ds_init();
917 931
918 /* 932 /*
919 * Install the hw-cache-events table: 933 * Install the hw-cache-events table:
@@ -924,12 +938,15 @@ static __init int intel_pmu_init(void)
924 break; 938 break;
925 939
926 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 940 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
941 x86_pmu.quirks = intel_clovertown_quirks;
927 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 942 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
928 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 943 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
929 case 29: /* six-core 45 nm xeon "Dunnington" */ 944 case 29: /* six-core 45 nm xeon "Dunnington" */
930 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 945 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
931 sizeof(hw_cache_event_ids)); 946 sizeof(hw_cache_event_ids));
932 947
948 intel_pmu_lbr_init_core();
949
933 x86_pmu.event_constraints = intel_core2_event_constraints; 950 x86_pmu.event_constraints = intel_core2_event_constraints;
934 pr_cont("Core2 events, "); 951 pr_cont("Core2 events, ");
935 break; 952 break;
@@ -940,13 +957,19 @@ static __init int intel_pmu_init(void)
940 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 957 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
941 sizeof(hw_cache_event_ids)); 958 sizeof(hw_cache_event_ids));
942 959
960 intel_pmu_lbr_init_nhm();
961
943 x86_pmu.event_constraints = intel_nehalem_event_constraints; 962 x86_pmu.event_constraints = intel_nehalem_event_constraints;
944 pr_cont("Nehalem/Corei7 events, "); 963 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
964 pr_cont("Nehalem events, ");
945 break; 965 break;
966
946 case 28: /* Atom */ 967 case 28: /* Atom */
947 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 968 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
948 sizeof(hw_cache_event_ids)); 969 sizeof(hw_cache_event_ids));
949 970
971 intel_pmu_lbr_init_atom();
972
950 x86_pmu.event_constraints = intel_gen_event_constraints; 973 x86_pmu.event_constraints = intel_gen_event_constraints;
951 pr_cont("Atom events, "); 974 pr_cont("Atom events, ");
952 break; 975 break;
@@ -956,7 +979,10 @@ static __init int intel_pmu_init(void)
956 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 979 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
957 sizeof(hw_cache_event_ids)); 980 sizeof(hw_cache_event_ids));
958 981
982 intel_pmu_lbr_init_nhm();
983
959 x86_pmu.event_constraints = intel_westmere_event_constraints; 984 x86_pmu.event_constraints = intel_westmere_event_constraints;
985 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
960 pr_cont("Westmere events, "); 986 pr_cont("Westmere events, ");
961 break; 987 break;
962 988
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
new file mode 100644
index 000000000000..ec8b2e12e104
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -0,0 +1,664 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/* The maximal number of PEBS events: */
4#define MAX_PEBS_EVENTS 4
5
6/* The size of a BTS record in bytes: */
7#define BTS_RECORD_SIZE 24
8
9#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10#define PEBS_BUFFER_SIZE PAGE_SIZE
11
12/*
13 * pebs_record_32 for p4 and core not supported
14
15struct pebs_record_32 {
16 u32 flags, ip;
17 u32 ax, bc, cx, dx;
18 u32 si, di, bp, sp;
19};
20
21 */
22
23struct pebs_record_core {
24 u64 flags, ip;
25 u64 ax, bx, cx, dx;
26 u64 si, di, bp, sp;
27 u64 r8, r9, r10, r11;
28 u64 r12, r13, r14, r15;
29};
30
31struct pebs_record_nhm {
32 u64 flags, ip;
33 u64 ax, bx, cx, dx;
34 u64 si, di, bp, sp;
35 u64 r8, r9, r10, r11;
36 u64 r12, r13, r14, r15;
37 u64 status, dla, dse, lat;
38};
39
40/*
41 * A debug store configuration.
42 *
43 * We only support architectures that use 64bit fields.
44 */
45struct debug_store {
46 u64 bts_buffer_base;
47 u64 bts_index;
48 u64 bts_absolute_maximum;
49 u64 bts_interrupt_threshold;
50 u64 pebs_buffer_base;
51 u64 pebs_index;
52 u64 pebs_absolute_maximum;
53 u64 pebs_interrupt_threshold;
54 u64 pebs_event_reset[MAX_PEBS_EVENTS];
55};
56
57static void init_debug_store_on_cpu(int cpu)
58{
59 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
60
61 if (!ds)
62 return;
63
64 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
65 (u32)((u64)(unsigned long)ds),
66 (u32)((u64)(unsigned long)ds >> 32));
67}
68
69static void fini_debug_store_on_cpu(int cpu)
70{
71 if (!per_cpu(cpu_hw_events, cpu).ds)
72 return;
73
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75}
76
77static void release_ds_buffers(void)
78{
79 int cpu;
80
81 if (!x86_pmu.bts && !x86_pmu.pebs)
82 return;
83
84 get_online_cpus();
85
86 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu);
88
89 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
91
92 if (!ds)
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 }
101
102 put_online_cpus();
103}
104
105static int reserve_ds_buffers(void)
106{
107 int cpu, err = 0;
108
109 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0;
111
112 get_online_cpus();
113
114 for_each_possible_cpu(cpu) {
115 struct debug_store *ds;
116 void *buffer;
117 int max, thresh;
118
119 err = -ENOMEM;
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
121 if (unlikely(!ds))
122 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds;
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140
141 if (x86_pmu.pebs) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
143 if (unlikely(!buffer))
144 break;
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158
159 err = 0;
160 }
161
162 if (err)
163 release_ds_buffers();
164 else {
165 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu);
167 }
168
169 put_online_cpus();
170
171 return err;
172}
173
174/*
175 * BTS
176 */
177
178static struct event_constraint bts_constraint =
179 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
180
181static void intel_pmu_enable_bts(u64 config)
182{
183 unsigned long debugctlmsr;
184
185 debugctlmsr = get_debugctlmsr();
186
187 debugctlmsr |= DEBUGCTLMSR_TR;
188 debugctlmsr |= DEBUGCTLMSR_BTS;
189 debugctlmsr |= DEBUGCTLMSR_BTINT;
190
191 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
192 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
193
194 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
195 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
196
197 update_debugctlmsr(debugctlmsr);
198}
199
200static void intel_pmu_disable_bts(void)
201{
202 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
203 unsigned long debugctlmsr;
204
205 if (!cpuc->ds)
206 return;
207
208 debugctlmsr = get_debugctlmsr();
209
210 debugctlmsr &=
211 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
212 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
213
214 update_debugctlmsr(debugctlmsr);
215}
216
217static void intel_pmu_drain_bts_buffer(void)
218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds;
221 struct bts_record {
222 u64 from;
223 u64 to;
224 u64 flags;
225 };
226 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
227 struct bts_record *at, *top;
228 struct perf_output_handle handle;
229 struct perf_event_header header;
230 struct perf_sample_data data;
231 struct pt_regs regs;
232
233 if (!event)
234 return;
235
236 if (!ds)
237 return;
238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241
242 if (top <= at)
243 return;
244
245 ds->bts_index = ds->bts_buffer_base;
246
247 perf_sample_data_init(&data, 0);
248 data.period = event->hw.last_period;
249 regs.ip = 0;
250
251 /*
252 * Prepare a generic sample, i.e. fill in the invariant fields.
253 * We will overwrite the from and to address before we output
254 * the sample.
255 */
256 perf_prepare_sample(&header, &data, event, &regs);
257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return;
260
261 for (; at < top; at++) {
262 data.ip = at->from;
263 data.addr = at->to;
264
265 perf_output_sample(&handle, &header, &data, event);
266 }
267
268 perf_output_end(&handle);
269
270 /* There's new data available. */
271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN;
273}
274
275/*
276 * PEBS
277 */
278
279static struct event_constraint intel_core_pebs_events[] = {
280 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
281 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
282 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
283 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
284 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
285 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
286 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
287 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
288 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
289 EVENT_CONSTRAINT_END
290};
291
292static struct event_constraint intel_nehalem_pebs_events[] = {
293 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
294 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
295 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
296 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
297 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
298 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
299 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
300 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
301 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
302 EVENT_CONSTRAINT_END
303};
304
305static struct event_constraint *
306intel_pebs_constraints(struct perf_event *event)
307{
308 struct event_constraint *c;
309
310 if (!event->attr.precise)
311 return NULL;
312
313 if (x86_pmu.pebs_constraints) {
314 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
315 if ((event->hw.config & c->cmask) == c->code)
316 return c;
317 }
318 }
319
320 return &emptyconstraint;
321}
322
323static void intel_pmu_pebs_enable(struct perf_event *event)
324{
325 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
326 struct hw_perf_event *hwc = &event->hw;
327
328 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
329
330 cpuc->pebs_enabled |= 1ULL << hwc->idx;
331 WARN_ON_ONCE(cpuc->enabled);
332
333 if (x86_pmu.intel_cap.pebs_trap)
334 intel_pmu_lbr_enable(event);
335}
336
337static void intel_pmu_pebs_disable(struct perf_event *event)
338{
339 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
340 struct hw_perf_event *hwc = &event->hw;
341
342 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
343 if (cpuc->enabled)
344 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
345
346 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
347
348 if (x86_pmu.intel_cap.pebs_trap)
349 intel_pmu_lbr_disable(event);
350}
351
352static void intel_pmu_pebs_enable_all(void)
353{
354 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
355
356 if (cpuc->pebs_enabled)
357 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
358}
359
360static void intel_pmu_pebs_disable_all(void)
361{
362 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
363
364 if (cpuc->pebs_enabled)
365 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
366}
367
368#include <asm/insn.h>
369
370static inline bool kernel_ip(unsigned long ip)
371{
372#ifdef CONFIG_X86_32
373 return ip > PAGE_OFFSET;
374#else
375 return (long)ip < 0;
376#endif
377}
378
379static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
380{
381 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
382 unsigned long from = cpuc->lbr_entries[0].from;
383 unsigned long old_to, to = cpuc->lbr_entries[0].to;
384 unsigned long ip = regs->ip;
385
386 /*
387 * We don't need to fixup if the PEBS assist is fault like
388 */
389 if (!x86_pmu.intel_cap.pebs_trap)
390 return 1;
391
392 /*
393 * No LBR entry, no basic block, no rewinding
394 */
395 if (!cpuc->lbr_stack.nr || !from || !to)
396 return 0;
397
398 /*
399 * Basic blocks should never cross user/kernel boundaries
400 */
401 if (kernel_ip(ip) != kernel_ip(to))
402 return 0;
403
404 /*
405 * unsigned math, either ip is before the start (impossible) or
406 * the basic block is larger than 1 page (sanity)
407 */
408 if ((ip - to) > PAGE_SIZE)
409 return 0;
410
411 /*
412 * We sampled a branch insn, rewind using the LBR stack
413 */
414 if (ip == to) {
415 regs->ip = from;
416 return 1;
417 }
418
419 do {
420 struct insn insn;
421 u8 buf[MAX_INSN_SIZE];
422 void *kaddr;
423
424 old_to = to;
425 if (!kernel_ip(ip)) {
426 int bytes, size = MAX_INSN_SIZE;
427
428 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
429 if (bytes != size)
430 return 0;
431
432 kaddr = buf;
433 } else
434 kaddr = (void *)to;
435
436 kernel_insn_init(&insn, kaddr);
437 insn_get_length(&insn);
438 to += insn.length;
439 } while (to < ip);
440
441 if (to == ip) {
442 regs->ip = old_to;
443 return 1;
444 }
445
446 /*
447 * Even though we decoded the basic block, the instruction stream
448 * never matched the given IP, either the TO or the IP got corrupted.
449 */
450 return 0;
451}
452
453static int intel_pmu_save_and_restart(struct perf_event *event);
454
455static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
456{
457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
458 struct debug_store *ds = cpuc->ds;
459 struct perf_event *event = cpuc->events[0]; /* PMC0 only */
460 struct pebs_record_core *at, *top;
461 struct perf_sample_data data;
462 struct perf_raw_record raw;
463 struct pt_regs regs;
464 int n;
465
466 if (!ds || !x86_pmu.pebs)
467 return;
468
469 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
470 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
471
472 /*
473 * Whatever else happens, drain the thing
474 */
475 ds->pebs_index = ds->pebs_buffer_base;
476
477 if (!test_bit(0, cpuc->active_mask))
478 return;
479
480 WARN_ON_ONCE(!event);
481
482 if (!event->attr.precise)
483 return;
484
485 n = top - at;
486 if (n <= 0)
487 return;
488
489 if (!intel_pmu_save_and_restart(event))
490 return;
491
492 /*
493 * Should not happen, we program the threshold at 1 and do not
494 * set a reset value.
495 */
496 WARN_ON_ONCE(n > 1);
497 at += n - 1;
498
499 perf_sample_data_init(&data, 0);
500 data.period = event->hw.last_period;
501
502 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
503 raw.size = x86_pmu.pebs_record_size;
504 raw.data = at;
505 data.raw = &raw;
506 }
507
508 /*
509 * We use the interrupt regs as a base because the PEBS record
510 * does not contain a full regs set, specifically it seems to
511 * lack segment descriptors, which get used by things like
512 * user_mode().
513 *
514 * In the simple case fix up only the IP and BP,SP regs, for
515 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
516 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
517 */
518 regs = *iregs;
519 regs.ip = at->ip;
520 regs.bp = at->bp;
521 regs.sp = at->sp;
522
523 if (intel_pmu_pebs_fixup_ip(&regs))
524 regs.flags |= PERF_EFLAGS_EXACT;
525 else
526 regs.flags &= ~PERF_EFLAGS_EXACT;
527
528 if (perf_event_overflow(event, 1, &data, &regs))
529 x86_pmu_stop(event);
530}
531
532static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
533{
534 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
535 struct debug_store *ds = cpuc->ds;
536 struct pebs_record_nhm *at, *top;
537 struct perf_sample_data data;
538 struct perf_event *event = NULL;
539 struct perf_raw_record raw;
540 struct pt_regs regs;
541 u64 status = 0;
542 int bit, n;
543
544 if (!ds || !x86_pmu.pebs)
545 return;
546
547 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
548 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
549
550 ds->pebs_index = ds->pebs_buffer_base;
551
552 n = top - at;
553 if (n <= 0)
554 return;
555
556 /*
557 * Should not happen, we program the threshold at 1 and do not
558 * set a reset value.
559 */
560 WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
561
562 for ( ; at < top; at++) {
563 for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
564 event = cpuc->events[bit];
565 if (!test_bit(bit, cpuc->active_mask))
566 continue;
567
568 WARN_ON_ONCE(!event);
569
570 if (!event->attr.precise)
571 continue;
572
573 if (__test_and_set_bit(bit, (unsigned long *)&status))
574 continue;
575
576 break;
577 }
578
579 if (!event || bit >= MAX_PEBS_EVENTS)
580 continue;
581
582 if (!intel_pmu_save_and_restart(event))
583 continue;
584
585 perf_sample_data_init(&data, 0);
586 data.period = event->hw.last_period;
587
588 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
589 raw.size = x86_pmu.pebs_record_size;
590 raw.data = at;
591 data.raw = &raw;
592 }
593
594 /*
595 * See the comment in intel_pmu_drain_pebs_core()
596 */
597 regs = *iregs;
598 regs.ip = at->ip;
599 regs.bp = at->bp;
600 regs.sp = at->sp;
601
602 if (intel_pmu_pebs_fixup_ip(&regs))
603 regs.flags |= PERF_EFLAGS_EXACT;
604 else
605 regs.flags &= ~PERF_EFLAGS_EXACT;
606
607 if (perf_event_overflow(event, 1, &data, &regs))
608 x86_pmu_stop(event);
609 }
610}
611
612/*
613 * BTS, PEBS probe and setup
614 */
615
616static void intel_ds_init(void)
617{
618 /*
619 * No support for 32bit formats
620 */
621 if (!boot_cpu_has(X86_FEATURE_DTES64))
622 return;
623
624 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
625 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
626 if (x86_pmu.pebs) {
627 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
628 int format = x86_pmu.intel_cap.pebs_format;
629
630 switch (format) {
631 case 0:
632 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
633 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
634 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
635 x86_pmu.pebs_constraints = intel_core_pebs_events;
636 break;
637
638 case 1:
639 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
640 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
641 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
642 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
643 break;
644
645 default:
646 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
647 x86_pmu.pebs = 0;
648 break;
649 }
650 }
651}
652
653#else /* CONFIG_CPU_SUP_INTEL */
654
655static int reserve_ds_buffers(void)
656{
657 return 0;
658}
659
660static void release_ds_buffers(void)
661{
662}
663
664#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..d202c1bece1a
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,218 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3enum {
4 LBR_FORMAT_32 = 0x00,
5 LBR_FORMAT_LIP = 0x01,
6 LBR_FORMAT_EIP = 0x02,
7 LBR_FORMAT_EIP_FLAGS = 0x03,
8};
9
10/*
11 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
12 * otherwise it becomes near impossible to get a reliable stack.
13 */
14
15static void __intel_pmu_lbr_enable(void)
16{
17 u64 debugctl;
18
19 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
20 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
21 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
22}
23
24static void __intel_pmu_lbr_disable(void)
25{
26 u64 debugctl;
27
28 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
29 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
30 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
31}
32
33static void intel_pmu_lbr_reset_32(void)
34{
35 int i;
36
37 for (i = 0; i < x86_pmu.lbr_nr; i++)
38 wrmsrl(x86_pmu.lbr_from + i, 0);
39}
40
41static void intel_pmu_lbr_reset_64(void)
42{
43 int i;
44
45 for (i = 0; i < x86_pmu.lbr_nr; i++) {
46 wrmsrl(x86_pmu.lbr_from + i, 0);
47 wrmsrl(x86_pmu.lbr_to + i, 0);
48 }
49}
50
51static void intel_pmu_lbr_reset(void)
52{
53 if (!x86_pmu.lbr_nr)
54 return;
55
56 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
57 intel_pmu_lbr_reset_32();
58 else
59 intel_pmu_lbr_reset_64();
60}
61
62static void intel_pmu_lbr_enable(struct perf_event *event)
63{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65
66 if (!x86_pmu.lbr_nr)
67 return;
68
69 WARN_ON_ONCE(cpuc->enabled);
70
71 /*
72 * Reset the LBR stack if we changed task context to
73 * avoid data leaks.
74 */
75
76 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
77 intel_pmu_lbr_reset();
78 cpuc->lbr_context = event->ctx;
79 }
80
81 cpuc->lbr_users++;
82}
83
84static void intel_pmu_lbr_disable(struct perf_event *event)
85{
86 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
87
88 if (!x86_pmu.lbr_nr)
89 return;
90
91 cpuc->lbr_users--;
92 WARN_ON_ONCE(cpuc->lbr_users < 0);
93
94 if (cpuc->enabled && !cpuc->lbr_users)
95 __intel_pmu_lbr_disable();
96}
97
98static void intel_pmu_lbr_enable_all(void)
99{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101
102 if (cpuc->lbr_users)
103 __intel_pmu_lbr_enable();
104}
105
106static void intel_pmu_lbr_disable_all(void)
107{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109
110 if (cpuc->lbr_users)
111 __intel_pmu_lbr_disable();
112}
113
114static inline u64 intel_pmu_lbr_tos(void)
115{
116 u64 tos;
117
118 rdmsrl(x86_pmu.lbr_tos, tos);
119
120 return tos;
121}
122
123static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
124{
125 unsigned long mask = x86_pmu.lbr_nr - 1;
126 u64 tos = intel_pmu_lbr_tos();
127 int i;
128
129 for (i = 0; i < x86_pmu.lbr_nr; i++) {
130 unsigned long lbr_idx = (tos - i) & mask;
131 union {
132 struct {
133 u32 from;
134 u32 to;
135 };
136 u64 lbr;
137 } msr_lastbranch;
138
139 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
140
141 cpuc->lbr_entries[i].from = msr_lastbranch.from;
142 cpuc->lbr_entries[i].to = msr_lastbranch.to;
143 cpuc->lbr_entries[i].flags = 0;
144 }
145 cpuc->lbr_stack.nr = i;
146}
147
148#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
149
150/*
151 * Due to lack of segmentation in Linux the effective address (offset)
152 * is the same as the linear address, allowing us to merge the LIP and EIP
153 * LBR formats.
154 */
155static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
156{
157 unsigned long mask = x86_pmu.lbr_nr - 1;
158 int lbr_format = x86_pmu.intel_cap.lbr_format;
159 u64 tos = intel_pmu_lbr_tos();
160 int i;
161
162 for (i = 0; i < x86_pmu.lbr_nr; i++) {
163 unsigned long lbr_idx = (tos - i) & mask;
164 u64 from, to, flags = 0;
165
166 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
167 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
168
169 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
170 flags = !!(from & LBR_FROM_FLAG_MISPRED);
171 from = (u64)((((s64)from) << 1) >> 1);
172 }
173
174 cpuc->lbr_entries[i].from = from;
175 cpuc->lbr_entries[i].to = to;
176 cpuc->lbr_entries[i].flags = flags;
177 }
178 cpuc->lbr_stack.nr = i;
179}
180
181static void intel_pmu_lbr_read(void)
182{
183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
184
185 if (!cpuc->lbr_users)
186 return;
187
188 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
189 intel_pmu_lbr_read_32(cpuc);
190 else
191 intel_pmu_lbr_read_64(cpuc);
192}
193
194static void intel_pmu_lbr_init_core(void)
195{
196 x86_pmu.lbr_nr = 4;
197 x86_pmu.lbr_tos = 0x01c9;
198 x86_pmu.lbr_from = 0x40;
199 x86_pmu.lbr_to = 0x60;
200}
201
202static void intel_pmu_lbr_init_nhm(void)
203{
204 x86_pmu.lbr_nr = 16;
205 x86_pmu.lbr_tos = 0x01c9;
206 x86_pmu.lbr_from = 0x680;
207 x86_pmu.lbr_to = 0x6c0;
208}
209
210static void intel_pmu_lbr_init_atom(void)
211{
212 x86_pmu.lbr_nr = 8;
213 x86_pmu.lbr_tos = 0x01c9;
214 x86_pmu.lbr_from = 0x40;
215 x86_pmu.lbr_to = 0x60;
216}
217
218#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
new file mode 100644
index 000000000000..15367cce66bd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -0,0 +1,834 @@
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 *
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
6 *
7 * For licencing details see kernel-base/COPYING
8 */
9
10#ifdef CONFIG_CPU_SUP_INTEL
11
12#include <asm/perf_event_p4.h>
13
14#define P4_CNTR_LIMIT 3
15/*
16 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 */
18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22};
23
24struct p4_cache_event_bind {
25 unsigned int metric_pebs;
26 unsigned int metric_vert;
27};
28
29#define P4_GEN_CACHE_EVENT_BIND(name) \
30 [P4_CACHE__##name] = { \
31 .metric_pebs = P4_PEBS__##name, \
32 .metric_vert = P4_VERT__##name, \
33 }
34
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
40};
41
42/*
43 * Note that we don't use CCCR1 here, there is an
44 * exception for P4_BSQ_ALLOCATION but we just have
45 * no workaround
46 *
47 * consider this binding as resources which particular
48 * event may borrow, it doesn't contain EventMask,
49 * Tags and friends -- they are left to a caller
50 */
51static struct p4_event_bind p4_event_bind_map[] = {
52 [P4_EVENT_TC_DELIVER_MODE] = {
53 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
54 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
55 .cntr = { {4, 5, -1}, {6, 7, -1} },
56 },
57 [P4_EVENT_BPU_FETCH_REQUEST] = {
58 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
59 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
60 .cntr = { {0, -1, -1}, {2, -1, -1} },
61 },
62 [P4_EVENT_ITLB_REFERENCE] = {
63 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
64 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
65 .cntr = { {0, -1, -1}, {2, -1, -1} },
66 },
67 [P4_EVENT_MEMORY_CANCEL] = {
68 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
69 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
70 .cntr = { {8, 9, -1}, {10, 11, -1} },
71 },
72 [P4_EVENT_MEMORY_COMPLETE] = {
73 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
74 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
75 .cntr = { {8, 9, -1}, {10, 11, -1} },
76 },
77 [P4_EVENT_LOAD_PORT_REPLAY] = {
78 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
79 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
80 .cntr = { {8, 9, -1}, {10, 11, -1} },
81 },
82 [P4_EVENT_STORE_PORT_REPLAY] = {
83 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
84 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
85 .cntr = { {8, 9, -1}, {10, 11, -1} },
86 },
87 [P4_EVENT_MOB_LOAD_REPLAY] = {
88 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
89 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
90 .cntr = { {0, -1, -1}, {2, -1, -1} },
91 },
92 [P4_EVENT_PAGE_WALK_TYPE] = {
93 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
94 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
95 .cntr = { {0, -1, -1}, {2, -1, -1} },
96 },
97 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
98 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
99 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
100 .cntr = { {0, -1, -1}, {2, -1, -1} },
101 },
102 [P4_EVENT_IOQ_ALLOCATION] = {
103 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
104 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
105 .cntr = { {0, -1, -1}, {2, -1, -1} },
106 },
107 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
108 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
109 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
110 .cntr = { {2, -1, -1}, {3, -1, -1} },
111 },
112 [P4_EVENT_FSB_DATA_ACTIVITY] = {
113 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
114 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
115 .cntr = { {0, -1, -1}, {2, -1, -1} },
116 },
117 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
118 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
119 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
120 .cntr = { {0, -1, -1}, {1, -1, -1} },
121 },
122 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
123 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
124 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
125 .cntr = { {2, -1, -1}, {3, -1, -1} },
126 },
127 [P4_EVENT_SSE_INPUT_ASSIST] = {
128 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
129 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
130 .cntr = { {8, 9, -1}, {10, 11, -1} },
131 },
132 [P4_EVENT_PACKED_SP_UOP] = {
133 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
134 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
135 .cntr = { {8, 9, -1}, {10, 11, -1} },
136 },
137 [P4_EVENT_PACKED_DP_UOP] = {
138 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
139 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
140 .cntr = { {8, 9, -1}, {10, 11, -1} },
141 },
142 [P4_EVENT_SCALAR_SP_UOP] = {
143 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
144 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
145 .cntr = { {8, 9, -1}, {10, 11, -1} },
146 },
147 [P4_EVENT_SCALAR_DP_UOP] = {
148 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
149 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
150 .cntr = { {8, 9, -1}, {10, 11, -1} },
151 },
152 [P4_EVENT_64BIT_MMX_UOP] = {
153 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
154 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
155 .cntr = { {8, 9, -1}, {10, 11, -1} },
156 },
157 [P4_EVENT_128BIT_MMX_UOP] = {
158 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
159 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
160 .cntr = { {8, 9, -1}, {10, 11, -1} },
161 },
162 [P4_EVENT_X87_FP_UOP] = {
163 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
164 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
165 .cntr = { {8, 9, -1}, {10, 11, -1} },
166 },
167 [P4_EVENT_TC_MISC] = {
168 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
169 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
170 .cntr = { {4, 5, -1}, {6, 7, -1} },
171 },
172 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
173 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
174 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
175 .cntr = { {0, -1, -1}, {2, -1, -1} },
176 },
177 [P4_EVENT_TC_MS_XFER] = {
178 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
179 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
180 .cntr = { {4, 5, -1}, {6, 7, -1} },
181 },
182 [P4_EVENT_UOP_QUEUE_WRITES] = {
183 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
184 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
185 .cntr = { {4, 5, -1}, {6, 7, -1} },
186 },
187 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
188 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
189 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
190 .cntr = { {4, 5, -1}, {6, 7, -1} },
191 },
192 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
193 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
194 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
195 .cntr = { {4, 5, -1}, {6, 7, -1} },
196 },
197 [P4_EVENT_RESOURCE_STALL] = {
198 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
199 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
200 .cntr = { {12, 13, 16}, {14, 15, 17} },
201 },
202 [P4_EVENT_WC_BUFFER] = {
203 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
204 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
205 .cntr = { {8, 9, -1}, {10, 11, -1} },
206 },
207 [P4_EVENT_B2B_CYCLES] = {
208 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
209 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
210 .cntr = { {0, -1, -1}, {2, -1, -1} },
211 },
212 [P4_EVENT_BNR] = {
213 .opcode = P4_OPCODE(P4_EVENT_BNR),
214 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
215 .cntr = { {0, -1, -1}, {2, -1, -1} },
216 },
217 [P4_EVENT_SNOOP] = {
218 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
219 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
220 .cntr = { {0, -1, -1}, {2, -1, -1} },
221 },
222 [P4_EVENT_RESPONSE] = {
223 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
224 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
225 .cntr = { {0, -1, -1}, {2, -1, -1} },
226 },
227 [P4_EVENT_FRONT_END_EVENT] = {
228 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
229 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
230 .cntr = { {12, 13, 16}, {14, 15, 17} },
231 },
232 [P4_EVENT_EXECUTION_EVENT] = {
233 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
234 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
235 .cntr = { {12, 13, 16}, {14, 15, 17} },
236 },
237 [P4_EVENT_REPLAY_EVENT] = {
238 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
239 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
240 .cntr = { {12, 13, 16}, {14, 15, 17} },
241 },
242 [P4_EVENT_INSTR_RETIRED] = {
243 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
244 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
245 .cntr = { {12, 13, 16}, {14, 15, 17} },
246 },
247 [P4_EVENT_UOPS_RETIRED] = {
248 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
249 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
250 .cntr = { {12, 13, 16}, {14, 15, 17} },
251 },
252 [P4_EVENT_UOP_TYPE] = {
253 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
254 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
255 .cntr = { {12, 13, 16}, {14, 15, 17} },
256 },
257 [P4_EVENT_BRANCH_RETIRED] = {
258 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
259 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
260 .cntr = { {12, 13, 16}, {14, 15, 17} },
261 },
262 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
263 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
264 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
265 .cntr = { {12, 13, 16}, {14, 15, 17} },
266 },
267 [P4_EVENT_X87_ASSIST] = {
268 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
269 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
270 .cntr = { {12, 13, 16}, {14, 15, 17} },
271 },
272 [P4_EVENT_MACHINE_CLEAR] = {
273 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
274 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
275 .cntr = { {12, 13, 16}, {14, 15, 17} },
276 },
277 [P4_EVENT_INSTR_COMPLETED] = {
278 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
279 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
280 .cntr = { {12, 13, 16}, {14, 15, 17} },
281 },
282};
283
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289
290static __initconst const u64 p4_hw_cache_event_ids
291 [PERF_COUNT_HW_CACHE_MAX]
292 [PERF_COUNT_HW_CACHE_OP_MAX]
293 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
294{
295 [ C(L1D ) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired),
300 },
301 },
302 [ C(LL ) ] = {
303 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired),
307 },
308},
309 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired),
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired),
319 },
320 },
321 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss),
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
340 /* non-halted CPU clocks */
341 [PERF_COUNT_HW_CPU_CYCLES] =
342 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
343 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
344
345 /*
346 * retired instructions
347 * in a sake of simplicity we don't use the FSB tagging
348 */
349 [PERF_COUNT_HW_INSTRUCTIONS] =
350 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
351 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
352 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
353
354 /* cache hits */
355 [PERF_COUNT_HW_CACHE_REFERENCES] =
356 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
359 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
360 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
361 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
362 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
363
364 /* cache misses */
365 [PERF_COUNT_HW_CACHE_MISSES] =
366 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
367 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
368 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
369 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
370
371 /* branch instructions retired */
372 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
373 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
374 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
375 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
376 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
377 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
378
379 /* mispredicted branches retired */
380 [PERF_COUNT_HW_BRANCH_MISSES] =
381 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
382 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
383
384 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
385 [PERF_COUNT_HW_BUS_CYCLES] =
386 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
387 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
388 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
389 p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
390};
391
392static struct p4_event_bind *p4_config_get_bind(u64 config)
393{
394 unsigned int evnt = p4_config_unpack_event(config);
395 struct p4_event_bind *bind = NULL;
396
397 if (evnt < ARRAY_SIZE(p4_event_bind_map))
398 bind = &p4_event_bind_map[evnt];
399
400 return bind;
401}
402
403static u64 p4_pmu_event_map(int hw_event)
404{
405 struct p4_event_bind *bind;
406 unsigned int esel;
407 u64 config;
408
409 if (hw_event > ARRAY_SIZE(p4_general_events)) {
410 printk_once(KERN_ERR "P4 PMU: Bad index: %i\n", hw_event);
411 return 0;
412 }
413
414 config = p4_general_events[hw_event];
415 bind = p4_config_get_bind(config);
416 esel = P4_OPCODE_ESEL(bind->opcode);
417 config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
418
419 return config;
420}
421
422static int p4_hw_config(struct perf_event *event)
423{
424 int cpu = raw_smp_processor_id();
425 u32 escr, cccr;
426
427 /*
428 * the reason we use cpu that early is that: if we get scheduled
429 * first time on the same cpu -- we will not need swap thread
430 * specific flags in config (and will save some cpu cycles)
431 */
432
433 cccr = p4_default_cccr_conf(cpu);
434 escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
435 event->attr.exclude_user);
436 event->hw.config = p4_config_pack_escr(escr) |
437 p4_config_pack_cccr(cccr);
438
439 if (p4_ht_active() && p4_ht_thread(cpu))
440 event->hw.config = p4_set_ht_bit(event->hw.config);
441
442 if (event->attr.type != PERF_TYPE_RAW)
443 return 0;
444
445 /*
446 * We don't control raw events so it's up to the caller
447 * to pass sane values (and we don't count the thread number
448 * on HT machine but allow HT-compatible specifics to be
449 * passed on)
450 *
451 * XXX: HT wide things should check perf_paranoid_cpu() &&
452 * CAP_SYS_ADMIN
453 */
454 event->hw.config |= event->attr.config &
455 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
456 p4_config_pack_cccr(P4_CCCR_MASK_HT));
457
458 return 0;
459}
460
461static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
462{
463 unsigned long dummy;
464
465 rdmsrl(hwc->config_base + hwc->idx, dummy);
466 if (dummy & P4_CCCR_OVF) {
467 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
468 ((u64)dummy) & ~P4_CCCR_OVF);
469 }
470}
471
472static inline void p4_pmu_disable_event(struct perf_event *event)
473{
474 struct hw_perf_event *hwc = &event->hw;
475
476 /*
477 * If event gets disabled while counter is in overflowed
478 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
479 * asserted again and again
480 */
481 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
482 (u64)(p4_config_unpack_cccr(hwc->config)) &
483 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
484}
485
486static void p4_pmu_disable_all(void)
487{
488 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
489 int idx;
490
491 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
492 struct perf_event *event = cpuc->events[idx];
493 if (!test_bit(idx, cpuc->active_mask))
494 continue;
495 p4_pmu_disable_event(event);
496 }
497}
498
499static void p4_pmu_enable_event(struct perf_event *event)
500{
501 struct hw_perf_event *hwc = &event->hw;
502 int thread = p4_ht_config_thread(hwc->config);
503 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
504 unsigned int idx = p4_config_unpack_event(hwc->config);
505 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
506 struct p4_event_bind *bind;
507 struct p4_cache_event_bind *bind_cache;
508 u64 escr_addr, cccr;
509
510 bind = &p4_event_bind_map[idx];
511 escr_addr = (u64)bind->escr_msr[thread];
512
513 /*
514 * - we dont support cascaded counters yet
515 * - and counter 1 is broken (erratum)
516 */
517 WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
518 WARN_ON_ONCE(hwc->idx == 1);
519
520 /* we need a real Event value */
521 escr_conf &= ~P4_ESCR_EVENT_MASK;
522 escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
523
524 cccr = p4_config_unpack_cccr(hwc->config);
525
526 /*
527 * it could be Cache event so that we need to
528 * set metrics into additional MSRs
529 */
530 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
531 if (idx_cache > P4_CACHE__NONE &&
532 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
533 bind_cache = &p4_cache_event_bind_map[idx_cache];
534 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
535 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
536 }
537
538 (void)checking_wrmsrl(escr_addr, escr_conf);
539 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
540 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
541}
542
543static void p4_pmu_enable_all(int added)
544{
545 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
546 int idx;
547
548 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
549 struct perf_event *event = cpuc->events[idx];
550 if (!test_bit(idx, cpuc->active_mask))
551 continue;
552 p4_pmu_enable_event(event);
553 }
554}
555
556static int p4_pmu_handle_irq(struct pt_regs *regs)
557{
558 struct perf_sample_data data;
559 struct cpu_hw_events *cpuc;
560 struct perf_event *event;
561 struct hw_perf_event *hwc;
562 int idx, handled = 0;
563 u64 val;
564
565 data.addr = 0;
566 data.raw = NULL;
567
568 cpuc = &__get_cpu_var(cpu_hw_events);
569
570 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
571
572 if (!test_bit(idx, cpuc->active_mask))
573 continue;
574
575 event = cpuc->events[idx];
576 hwc = &event->hw;
577
578 WARN_ON_ONCE(hwc->idx != idx);
579
580 /*
581 * FIXME: Redundant call, actually not needed
582 * but just to check if we're screwed
583 */
584 p4_pmu_clear_cccr_ovf(hwc);
585
586 val = x86_perf_event_update(event);
587 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
588 continue;
589
590 /*
591 * event overflow
592 */
593 handled = 1;
594 data.period = event->hw.last_period;
595
596 if (!x86_perf_event_set_period(event))
597 continue;
598 if (perf_event_overflow(event, 1, &data, regs))
599 p4_pmu_disable_event(event);
600 }
601
602 if (handled) {
603 /* p4 quirk: unmask it again */
604 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
605 inc_irq_stat(apic_perf_irqs);
606 }
607
608 return handled;
609}
610
611/*
612 * swap thread specific fields according to a thread
613 * we are going to run on
614 */
615static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
616{
617 u32 escr, cccr;
618
619 /*
620 * we either lucky and continue on same cpu or no HT support
621 */
622 if (!p4_should_swap_ts(hwc->config, cpu))
623 return;
624
625 /*
626 * the event is migrated from an another logical
627 * cpu, so we need to swap thread specific flags
628 */
629
630 escr = p4_config_unpack_escr(hwc->config);
631 cccr = p4_config_unpack_cccr(hwc->config);
632
633 if (p4_ht_thread(cpu)) {
634 cccr &= ~P4_CCCR_OVF_PMI_T0;
635 cccr |= P4_CCCR_OVF_PMI_T1;
636 if (escr & P4_ESCR_T0_OS) {
637 escr &= ~P4_ESCR_T0_OS;
638 escr |= P4_ESCR_T1_OS;
639 }
640 if (escr & P4_ESCR_T0_USR) {
641 escr &= ~P4_ESCR_T0_USR;
642 escr |= P4_ESCR_T1_USR;
643 }
644 hwc->config = p4_config_pack_escr(escr);
645 hwc->config |= p4_config_pack_cccr(cccr);
646 hwc->config |= P4_CONFIG_HT;
647 } else {
648 cccr &= ~P4_CCCR_OVF_PMI_T1;
649 cccr |= P4_CCCR_OVF_PMI_T0;
650 if (escr & P4_ESCR_T1_OS) {
651 escr &= ~P4_ESCR_T1_OS;
652 escr |= P4_ESCR_T0_OS;
653 }
654 if (escr & P4_ESCR_T1_USR) {
655 escr &= ~P4_ESCR_T1_USR;
656 escr |= P4_ESCR_T0_USR;
657 }
658 hwc->config = p4_config_pack_escr(escr);
659 hwc->config |= p4_config_pack_cccr(cccr);
660 hwc->config &= ~P4_CONFIG_HT;
661 }
662}
663
664/* ESCRs are not sequential in memory so we need a map */
665static const unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = {
666 MSR_P4_ALF_ESCR0, /* 0 */
667 MSR_P4_ALF_ESCR1, /* 1 */
668 MSR_P4_BPU_ESCR0, /* 2 */
669 MSR_P4_BPU_ESCR1, /* 3 */
670 MSR_P4_BSU_ESCR0, /* 4 */
671 MSR_P4_BSU_ESCR1, /* 5 */
672 MSR_P4_CRU_ESCR0, /* 6 */
673 MSR_P4_CRU_ESCR1, /* 7 */
674 MSR_P4_CRU_ESCR2, /* 8 */
675 MSR_P4_CRU_ESCR3, /* 9 */
676 MSR_P4_CRU_ESCR4, /* 10 */
677 MSR_P4_CRU_ESCR5, /* 11 */
678 MSR_P4_DAC_ESCR0, /* 12 */
679 MSR_P4_DAC_ESCR1, /* 13 */
680 MSR_P4_FIRM_ESCR0, /* 14 */
681 MSR_P4_FIRM_ESCR1, /* 15 */
682 MSR_P4_FLAME_ESCR0, /* 16 */
683 MSR_P4_FLAME_ESCR1, /* 17 */
684 MSR_P4_FSB_ESCR0, /* 18 */
685 MSR_P4_FSB_ESCR1, /* 19 */
686 MSR_P4_IQ_ESCR0, /* 20 */
687 MSR_P4_IQ_ESCR1, /* 21 */
688 MSR_P4_IS_ESCR0, /* 22 */
689 MSR_P4_IS_ESCR1, /* 23 */
690 MSR_P4_ITLB_ESCR0, /* 24 */
691 MSR_P4_ITLB_ESCR1, /* 25 */
692 MSR_P4_IX_ESCR0, /* 26 */
693 MSR_P4_IX_ESCR1, /* 27 */
694 MSR_P4_MOB_ESCR0, /* 28 */
695 MSR_P4_MOB_ESCR1, /* 29 */
696 MSR_P4_MS_ESCR0, /* 30 */
697 MSR_P4_MS_ESCR1, /* 31 */
698 MSR_P4_PMH_ESCR0, /* 32 */
699 MSR_P4_PMH_ESCR1, /* 33 */
700 MSR_P4_RAT_ESCR0, /* 34 */
701 MSR_P4_RAT_ESCR1, /* 35 */
702 MSR_P4_SAAT_ESCR0, /* 36 */
703 MSR_P4_SAAT_ESCR1, /* 37 */
704 MSR_P4_SSU_ESCR0, /* 38 */
705 MSR_P4_SSU_ESCR1, /* 39 */
706 MSR_P4_TBPU_ESCR0, /* 40 */
707 MSR_P4_TBPU_ESCR1, /* 41 */
708 MSR_P4_TC_ESCR0, /* 42 */
709 MSR_P4_TC_ESCR1, /* 43 */
710 MSR_P4_U2L_ESCR0, /* 44 */
711 MSR_P4_U2L_ESCR1, /* 45 */
712};
713
714static int p4_get_escr_idx(unsigned int addr)
715{
716 unsigned int i;
717
718 for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) {
719 if (addr == p4_escr_map[i])
720 return i;
721 }
722
723 return -1;
724}
725
726static int p4_next_cntr(int thread, unsigned long *used_mask,
727 struct p4_event_bind *bind)
728{
729 int i = 0, j;
730
731 for (i = 0; i < P4_CNTR_LIMIT; i++) {
732 j = bind->cntr[thread][i++];
733 if (j == -1 || !test_bit(j, used_mask))
734 return j;
735 }
736
737 return -1;
738}
739
740static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
741{
742 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
743 unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)];
744 int cpu = raw_smp_processor_id();
745 struct hw_perf_event *hwc;
746 struct p4_event_bind *bind;
747 unsigned int i, thread, num;
748 int cntr_idx, escr_idx;
749
750 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
751 bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR);
752
753 for (i = 0, num = n; i < n; i++, num--) {
754
755 hwc = &cpuc->event_list[i]->hw;
756 thread = p4_ht_thread(cpu);
757 bind = p4_config_get_bind(hwc->config);
758 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
759
760 if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
761 cntr_idx = hwc->idx;
762 if (assign)
763 assign[i] = hwc->idx;
764 goto reserve;
765 }
766
767 cntr_idx = p4_next_cntr(thread, used_mask, bind);
768 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
769 goto done;
770
771 p4_pmu_swap_config_ts(hwc, cpu);
772 if (assign)
773 assign[i] = cntr_idx;
774reserve:
775 set_bit(cntr_idx, used_mask);
776 set_bit(escr_idx, escr_mask);
777 }
778
779done:
780 return num ? -ENOSPC : 0;
781}
782
783static __initconst const struct x86_pmu p4_pmu = {
784 .name = "Netburst P4/Xeon",
785 .handle_irq = p4_pmu_handle_irq,
786 .disable_all = p4_pmu_disable_all,
787 .enable_all = p4_pmu_enable_all,
788 .enable = p4_pmu_enable_event,
789 .disable = p4_pmu_disable_event,
790 .eventsel = MSR_P4_BPU_CCCR0,
791 .perfctr = MSR_P4_BPU_PERFCTR0,
792 .event_map = p4_pmu_event_map,
793 .max_events = ARRAY_SIZE(p4_general_events),
794 .get_event_constraints = x86_get_event_constraints,
795 /*
796 * IF HT disabled we may need to use all
797 * ARCH_P4_MAX_CCCR counters simulaneously
798 * though leave it restricted at moment assuming
799 * HT is on
800 */
801 .num_counters = ARCH_P4_MAX_CCCR,
802 .apic = 1,
803 .cntval_bits = 40,
804 .cntval_mask = (1ULL << 40) - 1,
805 .max_period = (1ULL << 39) - 1,
806 .hw_config = p4_hw_config,
807 .schedule_events = p4_pmu_schedule_events,
808};
809
810static __init int p4_pmu_init(void)
811{
812 unsigned int low, high;
813
814 /* If we get stripped -- indexig fails */
815 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
816
817 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
818 if (!(low & (1 << 7))) {
819 pr_cont("unsupported Netburst CPU model %d ",
820 boot_cpu_data.x86_model);
821 return -ENODEV;
822 }
823
824 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
825 sizeof(hw_cache_event_ids));
826
827 pr_cont("Netburst events, ");
828
829 x86_pmu = p4_pmu;
830
831 return 0;
832}
833
834#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index a330485d14da..34ba07be2cda 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event)
27 */ 27 */
28#define P6_NOP_EVENT 0x0000002EULL 28#define P6_NOP_EVENT 0x0000002EULL
29 29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] = 30static struct event_constraint p6_event_constraints[] =
49{ 31{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ 32 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void)
66 wrmsrl(MSR_P6_EVNTSEL0, val); 48 wrmsrl(MSR_P6_EVNTSEL0, val);
67} 49}
68 50
69static void p6_pmu_enable_all(void) 51static void p6_pmu_enable_all(int added)
70{ 52{
71 unsigned long val; 53 unsigned long val;
72 54
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
102 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 84 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
103} 85}
104 86
105static __initconst struct x86_pmu p6_pmu = { 87static __initconst const struct x86_pmu p6_pmu = {
106 .name = "p6", 88 .name = "p6",
107 .handle_irq = x86_pmu_handle_irq, 89 .handle_irq = x86_pmu_handle_irq,
108 .disable_all = p6_pmu_disable_all, 90 .disable_all = p6_pmu_disable_all,
109 .enable_all = p6_pmu_enable_all, 91 .enable_all = p6_pmu_enable_all,
110 .enable = p6_pmu_enable_event, 92 .enable = p6_pmu_enable_event,
111 .disable = p6_pmu_disable_event, 93 .disable = p6_pmu_disable_event,
94 .hw_config = x86_pmu_hw_config,
95 .schedule_events = x86_schedule_events,
112 .eventsel = MSR_P6_EVNTSEL0, 96 .eventsel = MSR_P6_EVNTSEL0,
113 .perfctr = MSR_P6_PERFCTR0, 97 .perfctr = MSR_P6_PERFCTR0,
114 .event_map = p6_pmu_event_map, 98 .event_map = p6_pmu_event_map,
115 .raw_event = p6_pmu_raw_event,
116 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 99 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
117 .apic = 1, 100 .apic = 1,
118 .max_period = (1ULL << 31) - 1, 101 .max_period = (1ULL << 31) - 1,
119 .version = 0, 102 .version = 0,
120 .num_events = 2, 103 .num_counters = 2,
121 /* 104 /*
122 * Events have 40 bits implemented. However they are designed such 105 * Events have 40 bits implemented. However they are designed such
123 * that bits [32-39] are sign extensions of bit 31. As such the 106 * that bits [32-39] are sign extensions of bit 31. As such the
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = {
125 * 108 *
126 * See IA-32 Intel Architecture Software developer manual Vol 3B 109 * See IA-32 Intel Architecture Software developer manual Vol 3B
127 */ 110 */
128 .event_bits = 32, 111 .cntval_bits = 32,
129 .event_mask = (1ULL << 32) - 1, 112 .cntval_mask = (1ULL << 32) - 1,
130 .get_event_constraints = x86_get_event_constraints, 113 .get_event_constraints = x86_get_event_constraints,
131 .event_constraints = p6_event_constraints, 114 .event_constraints = p6_event_constraints,
132}; 115};
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
deleted file mode 100644
index 1c47390dd0e5..000000000000
--- a/arch/x86/kernel/ds.c
+++ /dev/null
@@ -1,1437 +0,0 @@
1/*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS).
7 *
8 * It manages:
9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done)
11 * - buffer access
12 *
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
16 *
17 *
18 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */
21
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/errno.h>
25#include <linux/sched.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/trace_clock.h>
29
30#include <asm/ds.h>
31
32#include "ds_selftest.h"
33
34/*
35 * The configuration for a particular DS hardware implementation:
36 */
37struct ds_configuration {
38 /* The name of the configuration: */
39 const char *name;
40
41 /* The size of pointer-typed fields in DS, BTS, and PEBS: */
42 unsigned char sizeof_ptr_field;
43
44 /* The size of a BTS/PEBS record in bytes: */
45 unsigned char sizeof_rec[2];
46
47 /* The number of pebs counter reset values in the DS structure. */
48 unsigned char nr_counter_reset;
49
50 /* Control bit-masks indexed by enum ds_feature: */
51 unsigned long ctl[dsf_ctl_max];
52};
53static struct ds_configuration ds_cfg __read_mostly;
54
55
56/* Maximal size of a DS configuration: */
57#define MAX_SIZEOF_DS 0x80
58
59/* Maximal size of a BTS record: */
60#define MAX_SIZEOF_BTS (3 * 8)
61
62/* BTS and PEBS buffer alignment: */
63#define DS_ALIGNMENT (1 << 3)
64
65/* Number of buffer pointers in DS: */
66#define NUM_DS_PTR_FIELDS 8
67
68/* Size of a pebs reset value in DS: */
69#define PEBS_RESET_FIELD_SIZE 8
70
71/* Mask of control bits in the DS MSR register: */
72#define BTS_CONTROL \
73 ( ds_cfg.ctl[dsf_bts] | \
74 ds_cfg.ctl[dsf_bts_kernel] | \
75 ds_cfg.ctl[dsf_bts_user] | \
76 ds_cfg.ctl[dsf_bts_overflow] )
77
78/*
79 * A BTS or PEBS tracer.
80 *
81 * This holds the configuration of the tracer and serves as a handle
82 * to identify tracers.
83 */
84struct ds_tracer {
85 /* The DS context (partially) owned by this tracer. */
86 struct ds_context *context;
87 /* The buffer provided on ds_request() and its size in bytes. */
88 void *buffer;
89 size_t size;
90};
91
92struct bts_tracer {
93 /* The common DS part: */
94 struct ds_tracer ds;
95
96 /* The trace including the DS configuration: */
97 struct bts_trace trace;
98
99 /* Buffer overflow notification function: */
100 bts_ovfl_callback_t ovfl;
101
102 /* Active flags affecting trace collection. */
103 unsigned int flags;
104};
105
106struct pebs_tracer {
107 /* The common DS part: */
108 struct ds_tracer ds;
109
110 /* The trace including the DS configuration: */
111 struct pebs_trace trace;
112
113 /* Buffer overflow notification function: */
114 pebs_ovfl_callback_t ovfl;
115};
116
117/*
118 * Debug Store (DS) save area configuration (see Intel64 and IA32
119 * Architectures Software Developer's Manual, section 18.5)
120 *
121 * The DS configuration consists of the following fields; different
122 * architetures vary in the size of those fields.
123 *
124 * - double-word aligned base linear address of the BTS buffer
125 * - write pointer into the BTS buffer
126 * - end linear address of the BTS buffer (one byte beyond the end of
127 * the buffer)
128 * - interrupt pointer into BTS buffer
129 * (interrupt occurs when write pointer passes interrupt pointer)
130 * - double-word aligned base linear address of the PEBS buffer
131 * - write pointer into the PEBS buffer
132 * - end linear address of the PEBS buffer (one byte beyond the end of
133 * the buffer)
134 * - interrupt pointer into PEBS buffer
135 * (interrupt occurs when write pointer passes interrupt pointer)
136 * - value to which counter is reset following counter overflow
137 *
138 * Later architectures use 64bit pointers throughout, whereas earlier
139 * architectures use 32bit pointers in 32bit mode.
140 *
141 *
142 * We compute the base address for the first 8 fields based on:
143 * - the field size stored in the DS configuration
144 * - the relative field position
145 * - an offset giving the start of the respective region
146 *
147 * This offset is further used to index various arrays holding
148 * information for BTS and PEBS at the respective index.
149 *
150 * On later 32bit processors, we only access the lower 32bit of the
151 * 64bit pointer fields. The upper halves will be zeroed out.
152 */
153
154enum ds_field {
155 ds_buffer_base = 0,
156 ds_index,
157 ds_absolute_maximum,
158 ds_interrupt_threshold,
159};
160
161enum ds_qualifier {
162 ds_bts = 0,
163 ds_pebs
164};
165
166static inline unsigned long
167ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
168{
169 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
170 return *(unsigned long *)base;
171}
172
173static inline void
174ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
175 unsigned long value)
176{
177 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
178 (*(unsigned long *)base) = value;
179}
180
181
182/*
183 * Locking is done only for allocating BTS or PEBS resources.
184 */
185static DEFINE_SPINLOCK(ds_lock);
186
187/*
188 * We either support (system-wide) per-cpu or per-thread allocation.
189 * We distinguish the two based on the task_struct pointer, where a
190 * NULL pointer indicates per-cpu allocation for the current cpu.
191 *
192 * Allocations are use-counted. As soon as resources are allocated,
193 * further allocations must be of the same type (per-cpu or
194 * per-thread). We model this by counting allocations (i.e. the number
195 * of tracers of a certain type) for one type negatively:
196 * =0 no tracers
197 * >0 number of per-thread tracers
198 * <0 number of per-cpu tracers
199 *
200 * Tracers essentially gives the number of ds contexts for a certain
201 * type of allocation.
202 */
203static atomic_t tracers = ATOMIC_INIT(0);
204
205static inline int get_tracer(struct task_struct *task)
206{
207 int error;
208
209 spin_lock_irq(&ds_lock);
210
211 if (task) {
212 error = -EPERM;
213 if (atomic_read(&tracers) < 0)
214 goto out;
215 atomic_inc(&tracers);
216 } else {
217 error = -EPERM;
218 if (atomic_read(&tracers) > 0)
219 goto out;
220 atomic_dec(&tracers);
221 }
222
223 error = 0;
224out:
225 spin_unlock_irq(&ds_lock);
226 return error;
227}
228
229static inline void put_tracer(struct task_struct *task)
230{
231 if (task)
232 atomic_dec(&tracers);
233 else
234 atomic_inc(&tracers);
235}
236
237/*
238 * The DS context is either attached to a thread or to a cpu:
239 * - in the former case, the thread_struct contains a pointer to the
240 * attached context.
241 * - in the latter case, we use a static array of per-cpu context
242 * pointers.
243 *
244 * Contexts are use-counted. They are allocated on first access and
245 * deallocated when the last user puts the context.
246 */
247struct ds_context {
248 /* The DS configuration; goes into MSR_IA32_DS_AREA: */
249 unsigned char ds[MAX_SIZEOF_DS];
250
251 /* The owner of the BTS and PEBS configuration, respectively: */
252 struct bts_tracer *bts_master;
253 struct pebs_tracer *pebs_master;
254
255 /* Use count: */
256 unsigned long count;
257
258 /* Pointer to the context pointer field: */
259 struct ds_context **this;
260
261 /* The traced task; NULL for cpu tracing: */
262 struct task_struct *task;
263
264 /* The traced cpu; only valid if task is NULL: */
265 int cpu;
266};
267
268static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
269
270
271static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
272{
273 struct ds_context **p_context =
274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
275 struct ds_context *context = NULL;
276 struct ds_context *new_context = NULL;
277
278 /* Chances are small that we already have a context. */
279 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
280 if (!new_context)
281 return NULL;
282
283 spin_lock_irq(&ds_lock);
284
285 context = *p_context;
286 if (likely(!context)) {
287 context = new_context;
288
289 context->this = p_context;
290 context->task = task;
291 context->cpu = cpu;
292 context->count = 0;
293
294 *p_context = context;
295 }
296
297 context->count++;
298
299 spin_unlock_irq(&ds_lock);
300
301 if (context != new_context)
302 kfree(new_context);
303
304 return context;
305}
306
307static void ds_put_context(struct ds_context *context)
308{
309 struct task_struct *task;
310 unsigned long irq;
311
312 if (!context)
313 return;
314
315 spin_lock_irqsave(&ds_lock, irq);
316
317 if (--context->count) {
318 spin_unlock_irqrestore(&ds_lock, irq);
319 return;
320 }
321
322 *(context->this) = NULL;
323
324 task = context->task;
325
326 if (task)
327 clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
328
329 /*
330 * We leave the (now dangling) pointer to the DS configuration in
331 * the DS_AREA msr. This is as good or as bad as replacing it with
332 * NULL - the hardware would crash if we enabled tracing.
333 *
334 * This saves us some problems with having to write an msr on a
335 * different cpu while preventing others from doing the same for the
336 * next context for that same cpu.
337 */
338
339 spin_unlock_irqrestore(&ds_lock, irq);
340
341 /* The context might still be in use for context switching. */
342 if (task && (task != current))
343 wait_task_context_switch(task);
344
345 kfree(context);
346}
347
348static void ds_install_ds_area(struct ds_context *context)
349{
350 unsigned long ds;
351
352 ds = (unsigned long)context->ds;
353
354 /*
355 * There is a race between the bts master and the pebs master.
356 *
357 * The thread/cpu access is synchronized via get/put_cpu() for
358 * task tracing and via wrmsr_on_cpu for cpu tracing.
359 *
360 * If bts and pebs are collected for the same task or same cpu,
361 * the same confiuration is written twice.
362 */
363 if (context->task) {
364 get_cpu();
365 if (context->task == current)
366 wrmsrl(MSR_IA32_DS_AREA, ds);
367 set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
368 put_cpu();
369 } else
370 wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
371 (u32)((u64)ds), (u32)((u64)ds >> 32));
372}
373
374/*
375 * Call the tracer's callback on a buffer overflow.
376 *
377 * context: the ds context
378 * qual: the buffer type
379 */
380static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
381{
382 switch (qual) {
383 case ds_bts:
384 if (context->bts_master &&
385 context->bts_master->ovfl)
386 context->bts_master->ovfl(context->bts_master);
387 break;
388 case ds_pebs:
389 if (context->pebs_master &&
390 context->pebs_master->ovfl)
391 context->pebs_master->ovfl(context->pebs_master);
392 break;
393 }
394}
395
396
397/*
398 * Write raw data into the BTS or PEBS buffer.
399 *
400 * The remainder of any partially written record is zeroed out.
401 *
402 * context: the DS context
403 * qual: the buffer type
404 * record: the data to write
405 * size: the size of the data
406 */
407static int ds_write(struct ds_context *context, enum ds_qualifier qual,
408 const void *record, size_t size)
409{
410 int bytes_written = 0;
411
412 if (!record)
413 return -EINVAL;
414
415 while (size) {
416 unsigned long base, index, end, write_end, int_th;
417 unsigned long write_size, adj_write_size;
418
419 /*
420 * Write as much as possible without producing an
421 * overflow interrupt.
422 *
423 * Interrupt_threshold must either be
424 * - bigger than absolute_maximum or
425 * - point to a record between buffer_base and absolute_maximum
426 *
427 * Index points to a valid record.
428 */
429 base = ds_get(context->ds, qual, ds_buffer_base);
430 index = ds_get(context->ds, qual, ds_index);
431 end = ds_get(context->ds, qual, ds_absolute_maximum);
432 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
433
434 write_end = min(end, int_th);
435
436 /*
437 * If we are already beyond the interrupt threshold,
438 * we fill the entire buffer.
439 */
440 if (write_end <= index)
441 write_end = end;
442
443 if (write_end <= index)
444 break;
445
446 write_size = min((unsigned long) size, write_end - index);
447 memcpy((void *)index, record, write_size);
448
449 record = (const char *)record + write_size;
450 size -= write_size;
451 bytes_written += write_size;
452
453 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
454 adj_write_size *= ds_cfg.sizeof_rec[qual];
455
456 /* Zero out trailing bytes. */
457 memset((char *)index + write_size, 0,
458 adj_write_size - write_size);
459 index += adj_write_size;
460
461 if (index >= end)
462 index = base;
463 ds_set(context->ds, qual, ds_index, index);
464
465 if (index >= int_th)
466 ds_overflow(context, qual);
467 }
468
469 return bytes_written;
470}
471
472
473/*
474 * Branch Trace Store (BTS) uses the following format. Different
475 * architectures vary in the size of those fields.
476 * - source linear address
477 * - destination linear address
478 * - flags
479 *
480 * Later architectures use 64bit pointers throughout, whereas earlier
481 * architectures use 32bit pointers in 32bit mode.
482 *
483 * We compute the base address for the fields based on:
484 * - the field size stored in the DS configuration
485 * - the relative field position
486 *
487 * In order to store additional information in the BTS buffer, we use
488 * a special source address to indicate that the record requires
489 * special interpretation.
490 *
491 * Netburst indicated via a bit in the flags field whether the branch
492 * was predicted; this is ignored.
493 *
494 * We use two levels of abstraction:
495 * - the raw data level defined here
496 * - an arch-independent level defined in ds.h
497 */
498
499enum bts_field {
500 bts_from,
501 bts_to,
502 bts_flags,
503
504 bts_qual = bts_from,
505 bts_clock = bts_to,
506 bts_pid = bts_flags,
507
508 bts_qual_mask = (bts_qual_max - 1),
509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
510};
511
512static inline unsigned long bts_get(const char *base, unsigned long field)
513{
514 base += (ds_cfg.sizeof_ptr_field * field);
515 return *(unsigned long *)base;
516}
517
518static inline void bts_set(char *base, unsigned long field, unsigned long val)
519{
520 base += (ds_cfg.sizeof_ptr_field * field);
521 (*(unsigned long *)base) = val;
522}
523
524
525/*
526 * The raw BTS data is architecture dependent.
527 *
528 * For higher-level users, we give an arch-independent view.
529 * - ds.h defines struct bts_struct
530 * - bts_read translates one raw bts record into a bts_struct
531 * - bts_write translates one bts_struct into the raw format and
532 * writes it into the top of the parameter tracer's buffer.
533 *
534 * return: bytes read/written on success; -Eerrno, otherwise
535 */
536static int
537bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
538{
539 if (!tracer)
540 return -EINVAL;
541
542 if (at < tracer->trace.ds.begin)
543 return -EINVAL;
544
545 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
546 return -EINVAL;
547
548 memset(out, 0, sizeof(*out));
549 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
550 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
551 out->variant.event.clock = bts_get(at, bts_clock);
552 out->variant.event.pid = bts_get(at, bts_pid);
553 } else {
554 out->qualifier = bts_branch;
555 out->variant.lbr.from = bts_get(at, bts_from);
556 out->variant.lbr.to = bts_get(at, bts_to);
557
558 if (!out->variant.lbr.from && !out->variant.lbr.to)
559 out->qualifier = bts_invalid;
560 }
561
562 return ds_cfg.sizeof_rec[ds_bts];
563}
564
565static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
566{
567 unsigned char raw[MAX_SIZEOF_BTS];
568
569 if (!tracer)
570 return -EINVAL;
571
572 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
573 return -EOVERFLOW;
574
575 switch (in->qualifier) {
576 case bts_invalid:
577 bts_set(raw, bts_from, 0);
578 bts_set(raw, bts_to, 0);
579 bts_set(raw, bts_flags, 0);
580 break;
581 case bts_branch:
582 bts_set(raw, bts_from, in->variant.lbr.from);
583 bts_set(raw, bts_to, in->variant.lbr.to);
584 bts_set(raw, bts_flags, 0);
585 break;
586 case bts_task_arrives:
587 case bts_task_departs:
588 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
589 bts_set(raw, bts_clock, in->variant.event.clock);
590 bts_set(raw, bts_pid, in->variant.event.pid);
591 break;
592 default:
593 return -EINVAL;
594 }
595
596 return ds_write(tracer->ds.context, ds_bts, raw,
597 ds_cfg.sizeof_rec[ds_bts]);
598}
599
600
601static void ds_write_config(struct ds_context *context,
602 struct ds_trace *cfg, enum ds_qualifier qual)
603{
604 unsigned char *ds = context->ds;
605
606 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
607 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
608 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
609 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
610}
611
612static void ds_read_config(struct ds_context *context,
613 struct ds_trace *cfg, enum ds_qualifier qual)
614{
615 unsigned char *ds = context->ds;
616
617 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
618 cfg->top = (void *)ds_get(ds, qual, ds_index);
619 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
620 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
621}
622
623static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
624 void *base, size_t size, size_t ith,
625 unsigned int flags) {
626 unsigned long buffer, adj;
627
628 /*
629 * Adjust the buffer address and size to meet alignment
630 * constraints:
631 * - buffer is double-word aligned
632 * - size is multiple of record size
633 *
634 * We checked the size at the very beginning; we have enough
635 * space to do the adjustment.
636 */
637 buffer = (unsigned long)base;
638
639 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
640 buffer += adj;
641 size -= adj;
642
643 trace->n = size / ds_cfg.sizeof_rec[qual];
644 trace->size = ds_cfg.sizeof_rec[qual];
645
646 size = (trace->n * trace->size);
647
648 trace->begin = (void *)buffer;
649 trace->top = trace->begin;
650 trace->end = (void *)(buffer + size);
651 /*
652 * The value for 'no threshold' is -1, which will set the
653 * threshold outside of the buffer, just like we want it.
654 */
655 ith *= ds_cfg.sizeof_rec[qual];
656 trace->ith = (void *)(buffer + size - ith);
657
658 trace->flags = flags;
659}
660
661
662static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
663 enum ds_qualifier qual, struct task_struct *task,
664 int cpu, void *base, size_t size, size_t th)
665{
666 struct ds_context *context;
667 int error;
668 size_t req_size;
669
670 error = -EOPNOTSUPP;
671 if (!ds_cfg.sizeof_rec[qual])
672 goto out;
673
674 error = -EINVAL;
675 if (!base)
676 goto out;
677
678 req_size = ds_cfg.sizeof_rec[qual];
679 /* We might need space for alignment adjustments. */
680 if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
681 req_size += DS_ALIGNMENT;
682
683 error = -EINVAL;
684 if (size < req_size)
685 goto out;
686
687 if (th != (size_t)-1) {
688 th *= ds_cfg.sizeof_rec[qual];
689
690 error = -EINVAL;
691 if (size <= th)
692 goto out;
693 }
694
695 tracer->buffer = base;
696 tracer->size = size;
697
698 error = -ENOMEM;
699 context = ds_get_context(task, cpu);
700 if (!context)
701 goto out;
702 tracer->context = context;
703
704 /*
705 * Defer any tracer-specific initialization work for the context until
706 * context ownership has been clarified.
707 */
708
709 error = 0;
710 out:
711 return error;
712}
713
714static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
715 void *base, size_t size,
716 bts_ovfl_callback_t ovfl, size_t th,
717 unsigned int flags)
718{
719 struct bts_tracer *tracer;
720 int error;
721
722 /* Buffer overflow notification is not yet implemented. */
723 error = -EOPNOTSUPP;
724 if (ovfl)
725 goto out;
726
727 error = get_tracer(task);
728 if (error < 0)
729 goto out;
730
731 error = -ENOMEM;
732 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
733 if (!tracer)
734 goto out_put_tracer;
735 tracer->ovfl = ovfl;
736
737 /* Do some more error checking and acquire a tracing context. */
738 error = ds_request(&tracer->ds, &tracer->trace.ds,
739 ds_bts, task, cpu, base, size, th);
740 if (error < 0)
741 goto out_tracer;
742
743 /* Claim the bts part of the tracing context we acquired above. */
744 spin_lock_irq(&ds_lock);
745
746 error = -EPERM;
747 if (tracer->ds.context->bts_master)
748 goto out_unlock;
749 tracer->ds.context->bts_master = tracer;
750
751 spin_unlock_irq(&ds_lock);
752
753 /*
754 * Now that we own the bts part of the context, let's complete the
755 * initialization for that part.
756 */
757 ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
758 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
759 ds_install_ds_area(tracer->ds.context);
760
761 tracer->trace.read = bts_read;
762 tracer->trace.write = bts_write;
763
764 /* Start tracing. */
765 ds_resume_bts(tracer);
766
767 return tracer;
768
769 out_unlock:
770 spin_unlock_irq(&ds_lock);
771 ds_put_context(tracer->ds.context);
772 out_tracer:
773 kfree(tracer);
774 out_put_tracer:
775 put_tracer(task);
776 out:
777 return ERR_PTR(error);
778}
779
780struct bts_tracer *ds_request_bts_task(struct task_struct *task,
781 void *base, size_t size,
782 bts_ovfl_callback_t ovfl,
783 size_t th, unsigned int flags)
784{
785 return ds_request_bts(task, 0, base, size, ovfl, th, flags);
786}
787
788struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
789 bts_ovfl_callback_t ovfl,
790 size_t th, unsigned int flags)
791{
792 return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
793}
794
795static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
796 void *base, size_t size,
797 pebs_ovfl_callback_t ovfl, size_t th,
798 unsigned int flags)
799{
800 struct pebs_tracer *tracer;
801 int error;
802
803 /* Buffer overflow notification is not yet implemented. */
804 error = -EOPNOTSUPP;
805 if (ovfl)
806 goto out;
807
808 error = get_tracer(task);
809 if (error < 0)
810 goto out;
811
812 error = -ENOMEM;
813 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
814 if (!tracer)
815 goto out_put_tracer;
816 tracer->ovfl = ovfl;
817
818 /* Do some more error checking and acquire a tracing context. */
819 error = ds_request(&tracer->ds, &tracer->trace.ds,
820 ds_pebs, task, cpu, base, size, th);
821 if (error < 0)
822 goto out_tracer;
823
824 /* Claim the pebs part of the tracing context we acquired above. */
825 spin_lock_irq(&ds_lock);
826
827 error = -EPERM;
828 if (tracer->ds.context->pebs_master)
829 goto out_unlock;
830 tracer->ds.context->pebs_master = tracer;
831
832 spin_unlock_irq(&ds_lock);
833
834 /*
835 * Now that we own the pebs part of the context, let's complete the
836 * initialization for that part.
837 */
838 ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
839 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
840 ds_install_ds_area(tracer->ds.context);
841
842 /* Start tracing. */
843 ds_resume_pebs(tracer);
844
845 return tracer;
846
847 out_unlock:
848 spin_unlock_irq(&ds_lock);
849 ds_put_context(tracer->ds.context);
850 out_tracer:
851 kfree(tracer);
852 out_put_tracer:
853 put_tracer(task);
854 out:
855 return ERR_PTR(error);
856}
857
858struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
859 void *base, size_t size,
860 pebs_ovfl_callback_t ovfl,
861 size_t th, unsigned int flags)
862{
863 return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
864}
865
866struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
867 pebs_ovfl_callback_t ovfl,
868 size_t th, unsigned int flags)
869{
870 return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
871}
872
873static void ds_free_bts(struct bts_tracer *tracer)
874{
875 struct task_struct *task;
876
877 task = tracer->ds.context->task;
878
879 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
880 tracer->ds.context->bts_master = NULL;
881
882 /* Make sure tracing stopped and the tracer is not in use. */
883 if (task && (task != current))
884 wait_task_context_switch(task);
885
886 ds_put_context(tracer->ds.context);
887 put_tracer(task);
888
889 kfree(tracer);
890}
891
892void ds_release_bts(struct bts_tracer *tracer)
893{
894 might_sleep();
895
896 if (!tracer)
897 return;
898
899 ds_suspend_bts(tracer);
900 ds_free_bts(tracer);
901}
902
903int ds_release_bts_noirq(struct bts_tracer *tracer)
904{
905 struct task_struct *task;
906 unsigned long irq;
907 int error;
908
909 if (!tracer)
910 return 0;
911
912 task = tracer->ds.context->task;
913
914 local_irq_save(irq);
915
916 error = -EPERM;
917 if (!task &&
918 (tracer->ds.context->cpu != smp_processor_id()))
919 goto out;
920
921 error = -EPERM;
922 if (task && (task != current))
923 goto out;
924
925 ds_suspend_bts_noirq(tracer);
926 ds_free_bts(tracer);
927
928 error = 0;
929 out:
930 local_irq_restore(irq);
931 return error;
932}
933
934static void update_task_debugctlmsr(struct task_struct *task,
935 unsigned long debugctlmsr)
936{
937 task->thread.debugctlmsr = debugctlmsr;
938
939 get_cpu();
940 if (task == current)
941 update_debugctlmsr(debugctlmsr);
942 put_cpu();
943}
944
945void ds_suspend_bts(struct bts_tracer *tracer)
946{
947 struct task_struct *task;
948 unsigned long debugctlmsr;
949 int cpu;
950
951 if (!tracer)
952 return;
953
954 tracer->flags = 0;
955
956 task = tracer->ds.context->task;
957 cpu = tracer->ds.context->cpu;
958
959 WARN_ON(!task && irqs_disabled());
960
961 debugctlmsr = (task ?
962 task->thread.debugctlmsr :
963 get_debugctlmsr_on_cpu(cpu));
964 debugctlmsr &= ~BTS_CONTROL;
965
966 if (task)
967 update_task_debugctlmsr(task, debugctlmsr);
968 else
969 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
970}
971
972int ds_suspend_bts_noirq(struct bts_tracer *tracer)
973{
974 struct task_struct *task;
975 unsigned long debugctlmsr, irq;
976 int cpu, error = 0;
977
978 if (!tracer)
979 return 0;
980
981 tracer->flags = 0;
982
983 task = tracer->ds.context->task;
984 cpu = tracer->ds.context->cpu;
985
986 local_irq_save(irq);
987
988 error = -EPERM;
989 if (!task && (cpu != smp_processor_id()))
990 goto out;
991
992 debugctlmsr = (task ?
993 task->thread.debugctlmsr :
994 get_debugctlmsr());
995 debugctlmsr &= ~BTS_CONTROL;
996
997 if (task)
998 update_task_debugctlmsr(task, debugctlmsr);
999 else
1000 update_debugctlmsr(debugctlmsr);
1001
1002 error = 0;
1003 out:
1004 local_irq_restore(irq);
1005 return error;
1006}
1007
1008static unsigned long ds_bts_control(struct bts_tracer *tracer)
1009{
1010 unsigned long control;
1011
1012 control = ds_cfg.ctl[dsf_bts];
1013 if (!(tracer->trace.ds.flags & BTS_KERNEL))
1014 control |= ds_cfg.ctl[dsf_bts_kernel];
1015 if (!(tracer->trace.ds.flags & BTS_USER))
1016 control |= ds_cfg.ctl[dsf_bts_user];
1017
1018 return control;
1019}
1020
1021void ds_resume_bts(struct bts_tracer *tracer)
1022{
1023 struct task_struct *task;
1024 unsigned long debugctlmsr;
1025 int cpu;
1026
1027 if (!tracer)
1028 return;
1029
1030 tracer->flags = tracer->trace.ds.flags;
1031
1032 task = tracer->ds.context->task;
1033 cpu = tracer->ds.context->cpu;
1034
1035 WARN_ON(!task && irqs_disabled());
1036
1037 debugctlmsr = (task ?
1038 task->thread.debugctlmsr :
1039 get_debugctlmsr_on_cpu(cpu));
1040 debugctlmsr |= ds_bts_control(tracer);
1041
1042 if (task)
1043 update_task_debugctlmsr(task, debugctlmsr);
1044 else
1045 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
1046}
1047
1048int ds_resume_bts_noirq(struct bts_tracer *tracer)
1049{
1050 struct task_struct *task;
1051 unsigned long debugctlmsr, irq;
1052 int cpu, error = 0;
1053
1054 if (!tracer)
1055 return 0;
1056
1057 tracer->flags = tracer->trace.ds.flags;
1058
1059 task = tracer->ds.context->task;
1060 cpu = tracer->ds.context->cpu;
1061
1062 local_irq_save(irq);
1063
1064 error = -EPERM;
1065 if (!task && (cpu != smp_processor_id()))
1066 goto out;
1067
1068 debugctlmsr = (task ?
1069 task->thread.debugctlmsr :
1070 get_debugctlmsr());
1071 debugctlmsr |= ds_bts_control(tracer);
1072
1073 if (task)
1074 update_task_debugctlmsr(task, debugctlmsr);
1075 else
1076 update_debugctlmsr(debugctlmsr);
1077
1078 error = 0;
1079 out:
1080 local_irq_restore(irq);
1081 return error;
1082}
1083
1084static void ds_free_pebs(struct pebs_tracer *tracer)
1085{
1086 struct task_struct *task;
1087
1088 task = tracer->ds.context->task;
1089
1090 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
1091 tracer->ds.context->pebs_master = NULL;
1092
1093 ds_put_context(tracer->ds.context);
1094 put_tracer(task);
1095
1096 kfree(tracer);
1097}
1098
1099void ds_release_pebs(struct pebs_tracer *tracer)
1100{
1101 might_sleep();
1102
1103 if (!tracer)
1104 return;
1105
1106 ds_suspend_pebs(tracer);
1107 ds_free_pebs(tracer);
1108}
1109
1110int ds_release_pebs_noirq(struct pebs_tracer *tracer)
1111{
1112 struct task_struct *task;
1113 unsigned long irq;
1114 int error;
1115
1116 if (!tracer)
1117 return 0;
1118
1119 task = tracer->ds.context->task;
1120
1121 local_irq_save(irq);
1122
1123 error = -EPERM;
1124 if (!task &&
1125 (tracer->ds.context->cpu != smp_processor_id()))
1126 goto out;
1127
1128 error = -EPERM;
1129 if (task && (task != current))
1130 goto out;
1131
1132 ds_suspend_pebs_noirq(tracer);
1133 ds_free_pebs(tracer);
1134
1135 error = 0;
1136 out:
1137 local_irq_restore(irq);
1138 return error;
1139}
1140
1141void ds_suspend_pebs(struct pebs_tracer *tracer)
1142{
1143
1144}
1145
1146int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
1147{
1148 return 0;
1149}
1150
1151void ds_resume_pebs(struct pebs_tracer *tracer)
1152{
1153
1154}
1155
1156int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
1157{
1158 return 0;
1159}
1160
1161const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
1162{
1163 if (!tracer)
1164 return NULL;
1165
1166 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
1167 return &tracer->trace;
1168}
1169
1170const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
1171{
1172 if (!tracer)
1173 return NULL;
1174
1175 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
1176
1177 tracer->trace.counters = ds_cfg.nr_counter_reset;
1178 memcpy(tracer->trace.counter_reset,
1179 tracer->ds.context->ds +
1180 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
1181 ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
1182
1183 return &tracer->trace;
1184}
1185
1186int ds_reset_bts(struct bts_tracer *tracer)
1187{
1188 if (!tracer)
1189 return -EINVAL;
1190
1191 tracer->trace.ds.top = tracer->trace.ds.begin;
1192
1193 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
1194 (unsigned long)tracer->trace.ds.top);
1195
1196 return 0;
1197}
1198
1199int ds_reset_pebs(struct pebs_tracer *tracer)
1200{
1201 if (!tracer)
1202 return -EINVAL;
1203
1204 tracer->trace.ds.top = tracer->trace.ds.begin;
1205
1206 ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
1207 (unsigned long)tracer->trace.ds.top);
1208
1209 return 0;
1210}
1211
1212int ds_set_pebs_reset(struct pebs_tracer *tracer,
1213 unsigned int counter, u64 value)
1214{
1215 if (!tracer)
1216 return -EINVAL;
1217
1218 if (ds_cfg.nr_counter_reset < counter)
1219 return -EINVAL;
1220
1221 *(u64 *)(tracer->ds.context->ds +
1222 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
1223 (counter * PEBS_RESET_FIELD_SIZE)) = value;
1224
1225 return 0;
1226}
1227
1228static const struct ds_configuration ds_cfg_netburst = {
1229 .name = "Netburst",
1230 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
1231 .ctl[dsf_bts_kernel] = (1 << 5),
1232 .ctl[dsf_bts_user] = (1 << 6),
1233 .nr_counter_reset = 1,
1234};
1235static const struct ds_configuration ds_cfg_pentium_m = {
1236 .name = "Pentium M",
1237 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1238 .nr_counter_reset = 1,
1239};
1240static const struct ds_configuration ds_cfg_core2_atom = {
1241 .name = "Core 2/Atom",
1242 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1243 .ctl[dsf_bts_kernel] = (1 << 9),
1244 .ctl[dsf_bts_user] = (1 << 10),
1245 .nr_counter_reset = 1,
1246};
1247static const struct ds_configuration ds_cfg_core_i7 = {
1248 .name = "Core i7",
1249 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1250 .ctl[dsf_bts_kernel] = (1 << 9),
1251 .ctl[dsf_bts_user] = (1 << 10),
1252 .nr_counter_reset = 4,
1253};
1254
1255static void
1256ds_configure(const struct ds_configuration *cfg,
1257 struct cpuinfo_x86 *cpu)
1258{
1259 unsigned long nr_pebs_fields = 0;
1260
1261 printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
1262
1263#ifdef __i386__
1264 nr_pebs_fields = 10;
1265#else
1266 nr_pebs_fields = 18;
1267#endif
1268
1269 /*
1270 * Starting with version 2, architectural performance
1271 * monitoring supports a format specifier.
1272 */
1273 if ((cpuid_eax(0xa) & 0xff) > 1) {
1274 unsigned long perf_capabilities, format;
1275
1276 rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
1277
1278 format = (perf_capabilities >> 8) & 0xf;
1279
1280 switch (format) {
1281 case 0:
1282 nr_pebs_fields = 18;
1283 break;
1284 case 1:
1285 nr_pebs_fields = 22;
1286 break;
1287 default:
1288 printk(KERN_INFO
1289 "[ds] unknown PEBS format: %lu\n", format);
1290 nr_pebs_fields = 0;
1291 break;
1292 }
1293 }
1294
1295 memset(&ds_cfg, 0, sizeof(ds_cfg));
1296 ds_cfg = *cfg;
1297
1298 ds_cfg.sizeof_ptr_field =
1299 (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
1300
1301 ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
1302 ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
1303
1304 if (!cpu_has(cpu, X86_FEATURE_BTS)) {
1305 ds_cfg.sizeof_rec[ds_bts] = 0;
1306 printk(KERN_INFO "[ds] bts not available\n");
1307 }
1308 if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
1309 ds_cfg.sizeof_rec[ds_pebs] = 0;
1310 printk(KERN_INFO "[ds] pebs not available\n");
1311 }
1312
1313 printk(KERN_INFO "[ds] sizes: address: %u bit, ",
1314 8 * ds_cfg.sizeof_ptr_field);
1315 printk("bts/pebs record: %u/%u bytes\n",
1316 ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
1317
1318 WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
1319}
1320
1321void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
1322{
1323 /* Only configure the first cpu. Others are identical. */
1324 if (ds_cfg.name)
1325 return;
1326
1327 switch (c->x86) {
1328 case 0x6:
1329 switch (c->x86_model) {
1330 case 0x9:
1331 case 0xd: /* Pentium M */
1332 ds_configure(&ds_cfg_pentium_m, c);
1333 break;
1334 case 0xf:
1335 case 0x17: /* Core2 */
1336 case 0x1c: /* Atom */
1337 ds_configure(&ds_cfg_core2_atom, c);
1338 break;
1339 case 0x1a: /* Core i7 */
1340 ds_configure(&ds_cfg_core_i7, c);
1341 break;
1342 default:
1343 /* Sorry, don't know about them. */
1344 break;
1345 }
1346 break;
1347 case 0xf:
1348 switch (c->x86_model) {
1349 case 0x0:
1350 case 0x1:
1351 case 0x2: /* Netburst */
1352 ds_configure(&ds_cfg_netburst, c);
1353 break;
1354 default:
1355 /* Sorry, don't know about them. */
1356 break;
1357 }
1358 break;
1359 default:
1360 /* Sorry, don't know about them. */
1361 break;
1362 }
1363}
1364
1365static inline void ds_take_timestamp(struct ds_context *context,
1366 enum bts_qualifier qualifier,
1367 struct task_struct *task)
1368{
1369 struct bts_tracer *tracer = context->bts_master;
1370 struct bts_struct ts;
1371
1372 /* Prevent compilers from reading the tracer pointer twice. */
1373 barrier();
1374
1375 if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
1376 return;
1377
1378 memset(&ts, 0, sizeof(ts));
1379 ts.qualifier = qualifier;
1380 ts.variant.event.clock = trace_clock_global();
1381 ts.variant.event.pid = task->pid;
1382
1383 bts_write(tracer, &ts);
1384}
1385
1386/*
1387 * Change the DS configuration from tracing prev to tracing next.
1388 */
1389void ds_switch_to(struct task_struct *prev, struct task_struct *next)
1390{
1391 struct ds_context *prev_ctx = prev->thread.ds_ctx;
1392 struct ds_context *next_ctx = next->thread.ds_ctx;
1393 unsigned long debugctlmsr = next->thread.debugctlmsr;
1394
1395 /* Make sure all data is read before we start. */
1396 barrier();
1397
1398 if (prev_ctx) {
1399 update_debugctlmsr(0);
1400
1401 ds_take_timestamp(prev_ctx, bts_task_departs, prev);
1402 }
1403
1404 if (next_ctx) {
1405 ds_take_timestamp(next_ctx, bts_task_arrives, next);
1406
1407 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1408 }
1409
1410 update_debugctlmsr(debugctlmsr);
1411}
1412
1413static __init int ds_selftest(void)
1414{
1415 if (ds_cfg.sizeof_rec[ds_bts]) {
1416 int error;
1417
1418 error = ds_selftest_bts();
1419 if (error) {
1420 WARN(1, "[ds] selftest failed. disabling bts.\n");
1421 ds_cfg.sizeof_rec[ds_bts] = 0;
1422 }
1423 }
1424
1425 if (ds_cfg.sizeof_rec[ds_pebs]) {
1426 int error;
1427
1428 error = ds_selftest_pebs();
1429 if (error) {
1430 WARN(1, "[ds] selftest failed. disabling pebs.\n");
1431 ds_cfg.sizeof_rec[ds_pebs] = 0;
1432 }
1433 }
1434
1435 return 0;
1436}
1437device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
deleted file mode 100644
index 6bc7c199ab99..000000000000
--- a/arch/x86/kernel/ds_selftest.c
+++ /dev/null
@@ -1,408 +0,0 @@
1/*
2 * Debug Store support - selftest
3 *
4 *
5 * Copyright (C) 2009 Intel Corporation.
6 * Markus Metzger <markus.t.metzger@intel.com>, 2009
7 */
8
9#include "ds_selftest.h"
10
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/smp.h>
14#include <linux/cpu.h>
15
16#include <asm/ds.h>
17
18
19#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */
20#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */
21
22struct ds_selftest_bts_conf {
23 struct bts_tracer *tracer;
24 int error;
25 int (*suspend)(struct bts_tracer *);
26 int (*resume)(struct bts_tracer *);
27};
28
29static int ds_selftest_bts_consistency(const struct bts_trace *trace)
30{
31 int error = 0;
32
33 if (!trace) {
34 printk(KERN_CONT "failed to access trace...");
35 /* Bail out. Other tests are pointless. */
36 return -1;
37 }
38
39 if (!trace->read) {
40 printk(KERN_CONT "bts read not available...");
41 error = -1;
42 }
43
44 /* Do some sanity checks on the trace configuration. */
45 if (!trace->ds.n) {
46 printk(KERN_CONT "empty bts buffer...");
47 error = -1;
48 }
49 if (!trace->ds.size) {
50 printk(KERN_CONT "bad bts trace setup...");
51 error = -1;
52 }
53 if (trace->ds.end !=
54 (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
55 printk(KERN_CONT "bad bts buffer setup...");
56 error = -1;
57 }
58 /*
59 * We allow top in [begin; end], since its not clear when the
60 * overflow adjustment happens: after the increment or before the
61 * write.
62 */
63 if ((trace->ds.top < trace->ds.begin) ||
64 (trace->ds.end < trace->ds.top)) {
65 printk(KERN_CONT "bts top out of bounds...");
66 error = -1;
67 }
68
69 return error;
70}
71
72static int ds_selftest_bts_read(struct bts_tracer *tracer,
73 const struct bts_trace *trace,
74 const void *from, const void *to)
75{
76 const unsigned char *at;
77
78 /*
79 * Check a few things which do not belong to this test.
80 * They should be covered by other tests.
81 */
82 if (!trace)
83 return -1;
84
85 if (!trace->read)
86 return -1;
87
88 if (to < from)
89 return -1;
90
91 if (from < trace->ds.begin)
92 return -1;
93
94 if (trace->ds.end < to)
95 return -1;
96
97 if (!trace->ds.size)
98 return -1;
99
100 /* Now to the test itself. */
101 for (at = from; (void *)at < to; at += trace->ds.size) {
102 struct bts_struct bts;
103 unsigned long index;
104 int error;
105
106 if (((void *)at - trace->ds.begin) % trace->ds.size) {
107 printk(KERN_CONT
108 "read from non-integer index...");
109 return -1;
110 }
111 index = ((void *)at - trace->ds.begin) / trace->ds.size;
112
113 memset(&bts, 0, sizeof(bts));
114 error = trace->read(tracer, at, &bts);
115 if (error < 0) {
116 printk(KERN_CONT
117 "error reading bts trace at [%lu] (0x%p)...",
118 index, at);
119 return error;
120 }
121
122 switch (bts.qualifier) {
123 case BTS_BRANCH:
124 break;
125 default:
126 printk(KERN_CONT
127 "unexpected bts entry %llu at [%lu] (0x%p)...",
128 bts.qualifier, index, at);
129 return -1;
130 }
131 }
132
133 return 0;
134}
135
136static void ds_selftest_bts_cpu(void *arg)
137{
138 struct ds_selftest_bts_conf *conf = arg;
139 const struct bts_trace *trace;
140 void *top;
141
142 if (IS_ERR(conf->tracer)) {
143 conf->error = PTR_ERR(conf->tracer);
144 conf->tracer = NULL;
145
146 printk(KERN_CONT
147 "initialization failed (err: %d)...", conf->error);
148 return;
149 }
150
151 /* We should meanwhile have enough trace. */
152 conf->error = conf->suspend(conf->tracer);
153 if (conf->error < 0)
154 return;
155
156 /* Let's see if we can access the trace. */
157 trace = ds_read_bts(conf->tracer);
158
159 conf->error = ds_selftest_bts_consistency(trace);
160 if (conf->error < 0)
161 return;
162
163 /* If everything went well, we should have a few trace entries. */
164 if (trace->ds.top == trace->ds.begin) {
165 /*
166 * It is possible but highly unlikely that we got a
167 * buffer overflow and end up at exactly the same
168 * position we started from.
169 * Let's issue a warning, but continue.
170 */
171 printk(KERN_CONT "no trace/overflow...");
172 }
173
174 /* Let's try to read the trace we collected. */
175 conf->error =
176 ds_selftest_bts_read(conf->tracer, trace,
177 trace->ds.begin, trace->ds.top);
178 if (conf->error < 0)
179 return;
180
181 /*
182 * Let's read the trace again.
183 * Since we suspended tracing, we should get the same result.
184 */
185 top = trace->ds.top;
186
187 trace = ds_read_bts(conf->tracer);
188 conf->error = ds_selftest_bts_consistency(trace);
189 if (conf->error < 0)
190 return;
191
192 if (top != trace->ds.top) {
193 printk(KERN_CONT "suspend not working...");
194 conf->error = -1;
195 return;
196 }
197
198 /* Let's collect some more trace - see if resume is working. */
199 conf->error = conf->resume(conf->tracer);
200 if (conf->error < 0)
201 return;
202
203 conf->error = conf->suspend(conf->tracer);
204 if (conf->error < 0)
205 return;
206
207 trace = ds_read_bts(conf->tracer);
208
209 conf->error = ds_selftest_bts_consistency(trace);
210 if (conf->error < 0)
211 return;
212
213 if (trace->ds.top == top) {
214 /*
215 * It is possible but highly unlikely that we got a
216 * buffer overflow and end up at exactly the same
217 * position we started from.
218 * Let's issue a warning and check the full trace.
219 */
220 printk(KERN_CONT
221 "no resume progress/overflow...");
222
223 conf->error =
224 ds_selftest_bts_read(conf->tracer, trace,
225 trace->ds.begin, trace->ds.end);
226 } else if (trace->ds.top < top) {
227 /*
228 * We had a buffer overflow - the entire buffer should
229 * contain trace records.
230 */
231 conf->error =
232 ds_selftest_bts_read(conf->tracer, trace,
233 trace->ds.begin, trace->ds.end);
234 } else {
235 /*
236 * It is quite likely that the buffer did not overflow.
237 * Let's just check the delta trace.
238 */
239 conf->error =
240 ds_selftest_bts_read(conf->tracer, trace, top,
241 trace->ds.top);
242 }
243 if (conf->error < 0)
244 return;
245
246 conf->error = 0;
247}
248
249static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
250{
251 ds_suspend_bts(tracer);
252 return 0;
253}
254
255static int ds_resume_bts_wrap(struct bts_tracer *tracer)
256{
257 ds_resume_bts(tracer);
258 return 0;
259}
260
261static void ds_release_bts_noirq_wrap(void *tracer)
262{
263 (void)ds_release_bts_noirq(tracer);
264}
265
266static int ds_selftest_bts_bad_release_noirq(int cpu,
267 struct bts_tracer *tracer)
268{
269 int error = -EPERM;
270
271 /* Try to release the tracer on the wrong cpu. */
272 get_cpu();
273 if (cpu != smp_processor_id()) {
274 error = ds_release_bts_noirq(tracer);
275 if (error != -EPERM)
276 printk(KERN_CONT "release on wrong cpu...");
277 }
278 put_cpu();
279
280 return error ? 0 : -1;
281}
282
283static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
284{
285 struct bts_tracer *tracer;
286 int error;
287
288 /* Try to request cpu tracing while task tracing is active. */
289 tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
290 (size_t)-1, BTS_KERNEL);
291 error = PTR_ERR(tracer);
292 if (!IS_ERR(tracer)) {
293 ds_release_bts(tracer);
294 error = 0;
295 }
296
297 if (error != -EPERM)
298 printk(KERN_CONT "cpu/task tracing overlap...");
299
300 return error ? 0 : -1;
301}
302
303static int ds_selftest_bts_bad_request_task(void *buffer)
304{
305 struct bts_tracer *tracer;
306 int error;
307
308 /* Try to request cpu tracing while task tracing is active. */
309 tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
310 (size_t)-1, BTS_KERNEL);
311 error = PTR_ERR(tracer);
312 if (!IS_ERR(tracer)) {
313 error = 0;
314 ds_release_bts(tracer);
315 }
316
317 if (error != -EPERM)
318 printk(KERN_CONT "task/cpu tracing overlap...");
319
320 return error ? 0 : -1;
321}
322
323int ds_selftest_bts(void)
324{
325 struct ds_selftest_bts_conf conf;
326 unsigned char buffer[BUFFER_SIZE], *small_buffer;
327 unsigned long irq;
328 int cpu;
329
330 printk(KERN_INFO "[ds] bts selftest...");
331 conf.error = 0;
332
333 small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
334
335 get_online_cpus();
336 for_each_online_cpu(cpu) {
337 conf.suspend = ds_suspend_bts_wrap;
338 conf.resume = ds_resume_bts_wrap;
339 conf.tracer =
340 ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
341 NULL, (size_t)-1, BTS_KERNEL);
342 ds_selftest_bts_cpu(&conf);
343 if (conf.error >= 0)
344 conf.error = ds_selftest_bts_bad_request_task(buffer);
345 ds_release_bts(conf.tracer);
346 if (conf.error < 0)
347 goto out;
348
349 conf.suspend = ds_suspend_bts_noirq;
350 conf.resume = ds_resume_bts_noirq;
351 conf.tracer =
352 ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
353 NULL, (size_t)-1, BTS_KERNEL);
354 smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
355 if (conf.error >= 0) {
356 conf.error =
357 ds_selftest_bts_bad_release_noirq(cpu,
358 conf.tracer);
359 /* We must not release the tracer twice. */
360 if (conf.error < 0)
361 conf.tracer = NULL;
362 }
363 if (conf.error >= 0)
364 conf.error = ds_selftest_bts_bad_request_task(buffer);
365 smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
366 conf.tracer, 1);
367 if (conf.error < 0)
368 goto out;
369 }
370
371 conf.suspend = ds_suspend_bts_wrap;
372 conf.resume = ds_resume_bts_wrap;
373 conf.tracer =
374 ds_request_bts_task(current, buffer, BUFFER_SIZE,
375 NULL, (size_t)-1, BTS_KERNEL);
376 ds_selftest_bts_cpu(&conf);
377 if (conf.error >= 0)
378 conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
379 ds_release_bts(conf.tracer);
380 if (conf.error < 0)
381 goto out;
382
383 conf.suspend = ds_suspend_bts_noirq;
384 conf.resume = ds_resume_bts_noirq;
385 conf.tracer =
386 ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
387 NULL, (size_t)-1, BTS_KERNEL);
388 local_irq_save(irq);
389 ds_selftest_bts_cpu(&conf);
390 if (conf.error >= 0)
391 conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
392 ds_release_bts_noirq(conf.tracer);
393 local_irq_restore(irq);
394 if (conf.error < 0)
395 goto out;
396
397 conf.error = 0;
398 out:
399 put_online_cpus();
400 printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
401
402 return conf.error;
403}
404
405int ds_selftest_pebs(void)
406{
407 return 0;
408}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
deleted file mode 100644
index 2ba8745c6663..000000000000
--- a/arch/x86/kernel/ds_selftest.h
+++ /dev/null
@@ -1,15 +0,0 @@
1/*
2 * Debug Store support - selftest
3 *
4 *
5 * Copyright (C) 2009 Intel Corporation.
6 * Markus Metzger <markus.t.metzger@intel.com>, 2009
7 */
8
9#ifdef CONFIG_X86_DS_SELFTEST
10extern int ds_selftest_bts(void);
11extern int ds_selftest_pebs(void);
12#else
13static inline int ds_selftest_bts(void) { return 0; }
14static inline int ds_selftest_pebs(void) { return 0; }
15#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6d817554780a..c89a386930b7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void)
224 int cpu; 224 int cpu;
225 unsigned long flags; 225 unsigned long flags;
226 226
227 /* notify the hw-branch tracer so it may disable tracing and
228 add the last trace to the trace buffer -
229 the earlier this happens, the more useful the trace. */
230 trace_hw_branch_oops();
231
232 oops_enter(); 227 oops_enter();
233 228
234 /* racy, but better than risking deadlock. */ 229 /* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b43bbaebe2c0..f2f56c0967b6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -422,14 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
422 422
423static void __kprobes clear_btf(void) 423static void __kprobes clear_btf(void)
424{ 424{
425 if (test_thread_flag(TIF_DEBUGCTLMSR)) 425 if (test_thread_flag(TIF_BLOCKSTEP)) {
426 update_debugctlmsr(0); 426 unsigned long debugctl = get_debugctlmsr();
427
428 debugctl &= ~DEBUGCTLMSR_BTF;
429 update_debugctlmsr(debugctl);
430 }
427} 431}
428 432
429static void __kprobes restore_btf(void) 433static void __kprobes restore_btf(void)
430{ 434{
431 if (test_thread_flag(TIF_DEBUGCTLMSR)) 435 if (test_thread_flag(TIF_BLOCKSTEP)) {
432 update_debugctlmsr(current->thread.debugctlmsr); 436 unsigned long debugctl = get_debugctlmsr();
437
438 debugctl |= DEBUGCTLMSR_BTF;
439 update_debugctlmsr(debugctl);
440 }
433} 441}
434 442
435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 443void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 28ad9f4d8b94..eccdb57094e3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,7 +20,6 @@
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include <asm/i387.h> 22#include <asm/i387.h>
23#include <asm/ds.h>
24#include <asm/debugreg.h> 23#include <asm/debugreg.h>
25 24
26unsigned long idle_halt; 25unsigned long idle_halt;
@@ -50,8 +49,6 @@ void free_thread_xstate(struct task_struct *tsk)
50 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 49 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
51 tsk->thread.xstate = NULL; 50 tsk->thread.xstate = NULL;
52 } 51 }
53
54 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
55} 52}
56 53
57void free_thread_info(struct thread_info *ti) 54void free_thread_info(struct thread_info *ti)
@@ -198,11 +195,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
198 prev = &prev_p->thread; 195 prev = &prev_p->thread;
199 next = &next_p->thread; 196 next = &next_p->thread;
200 197
201 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || 198 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
202 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) 199 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
203 ds_switch_to(prev_p, next_p); 200 unsigned long debugctl = get_debugctlmsr();
204 else if (next->debugctlmsr != prev->debugctlmsr) 201
205 update_debugctlmsr(next->debugctlmsr); 202 debugctl &= ~DEBUGCTLMSR_BTF;
203 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
204 debugctl |= DEBUGCTLMSR_BTF;
205
206 update_debugctlmsr(debugctl);
207 }
206 208
207 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 209 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
208 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 210 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f6c62667e30c..75090c589b7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,7 +55,6 @@
55#include <asm/cpu.h> 55#include <asm/cpu.h>
56#include <asm/idle.h> 56#include <asm/idle.h>
57#include <asm/syscalls.h> 57#include <asm/syscalls.h>
58#include <asm/ds.h>
59#include <asm/debugreg.h> 58#include <asm/debugreg.h>
60 59
61asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
238 kfree(p->thread.io_bitmap_ptr); 237 kfree(p->thread.io_bitmap_ptr);
239 p->thread.io_bitmap_max = 0; 238 p->thread.io_bitmap_max = 0;
240 } 239 }
241
242 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
243 p->thread.ds_ctx = NULL;
244
245 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
246 p->thread.debugctlmsr = 0;
247
248 return err; 240 return err;
249} 241}
250 242
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dc9690b4c4cc..cc4258f2beb5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,7 +49,6 @@
49#include <asm/ia32.h> 49#include <asm/ia32.h>
50#include <asm/idle.h> 50#include <asm/idle.h>
51#include <asm/syscalls.h> 51#include <asm/syscalls.h>
52#include <asm/ds.h>
53#include <asm/debugreg.h> 52#include <asm/debugreg.h>
54 53
55asmlinkage extern void ret_from_fork(void); 54asmlinkage extern void ret_from_fork(void);
@@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
313 if (err) 312 if (err)
314 goto out; 313 goto out;
315 } 314 }
316
317 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
318 p->thread.ds_ctx = NULL;
319
320 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
321 p->thread.debugctlmsr = 0;
322
323 err = 0; 315 err = 0;
324out: 316out:
325 if (err && p->thread.io_bitmap_ptr) { 317 if (err && p->thread.io_bitmap_ptr) {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2e9b55027b7e..055be0afd330 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -2,9 +2,6 @@
2/* 2/*
3 * Pentium III FXSR, SSE support 3 * Pentium III FXSR, SSE support
4 * Gareth Hughes <gareth@valinux.com>, May 2000 4 * Gareth Hughes <gareth@valinux.com>, May 2000
5 *
6 * BTS tracing
7 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
8 */ 5 */
9 6
10#include <linux/kernel.h> 7#include <linux/kernel.h>
@@ -22,7 +19,6 @@
22#include <linux/audit.h> 19#include <linux/audit.h>
23#include <linux/seccomp.h> 20#include <linux/seccomp.h>
24#include <linux/signal.h> 21#include <linux/signal.h>
25#include <linux/workqueue.h>
26#include <linux/perf_event.h> 22#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
28 24
@@ -36,7 +32,6 @@
36#include <asm/desc.h> 32#include <asm/desc.h>
37#include <asm/prctl.h> 33#include <asm/prctl.h>
38#include <asm/proto.h> 34#include <asm/proto.h>
39#include <asm/ds.h>
40#include <asm/hw_breakpoint.h> 35#include <asm/hw_breakpoint.h>
41 36
42#include "tls.h" 37#include "tls.h"
@@ -789,342 +784,6 @@ static int ioperm_get(struct task_struct *target,
789 0, IO_BITMAP_BYTES); 784 0, IO_BITMAP_BYTES);
790} 785}
791 786
792#ifdef CONFIG_X86_PTRACE_BTS
793/*
794 * A branch trace store context.
795 *
796 * Contexts may only be installed by ptrace_bts_config() and only for
797 * ptraced tasks.
798 *
799 * Contexts are destroyed when the tracee is detached from the tracer.
800 * The actual destruction work requires interrupts enabled, so the
801 * work is deferred and will be scheduled during __ptrace_unlink().
802 *
803 * Contexts hold an additional task_struct reference on the traced
804 * task, as well as a reference on the tracer's mm.
805 *
806 * Ptrace already holds a task_struct for the duration of ptrace operations,
807 * but since destruction is deferred, it may be executed after both
808 * tracer and tracee exited.
809 */
810struct bts_context {
811 /* The branch trace handle. */
812 struct bts_tracer *tracer;
813
814 /* The buffer used to store the branch trace and its size. */
815 void *buffer;
816 unsigned int size;
817
818 /* The mm that paid for the above buffer. */
819 struct mm_struct *mm;
820
821 /* The task this context belongs to. */
822 struct task_struct *task;
823
824 /* The signal to send on a bts buffer overflow. */
825 unsigned int bts_ovfl_signal;
826
827 /* The work struct to destroy a context. */
828 struct work_struct work;
829};
830
831static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
832{
833 void *buffer = NULL;
834 int err = -ENOMEM;
835
836 err = account_locked_memory(current->mm, current->signal->rlim, size);
837 if (err < 0)
838 return err;
839
840 buffer = kzalloc(size, GFP_KERNEL);
841 if (!buffer)
842 goto out_refund;
843
844 context->buffer = buffer;
845 context->size = size;
846 context->mm = get_task_mm(current);
847
848 return 0;
849
850 out_refund:
851 refund_locked_memory(current->mm, size);
852 return err;
853}
854
855static inline void free_bts_buffer(struct bts_context *context)
856{
857 if (!context->buffer)
858 return;
859
860 kfree(context->buffer);
861 context->buffer = NULL;
862
863 refund_locked_memory(context->mm, context->size);
864 context->size = 0;
865
866 mmput(context->mm);
867 context->mm = NULL;
868}
869
870static void free_bts_context_work(struct work_struct *w)
871{
872 struct bts_context *context;
873
874 context = container_of(w, struct bts_context, work);
875
876 ds_release_bts(context->tracer);
877 put_task_struct(context->task);
878 free_bts_buffer(context);
879 kfree(context);
880}
881
882static inline void free_bts_context(struct bts_context *context)
883{
884 INIT_WORK(&context->work, free_bts_context_work);
885 schedule_work(&context->work);
886}
887
888static inline struct bts_context *alloc_bts_context(struct task_struct *task)
889{
890 struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
891 if (context) {
892 context->task = task;
893 task->bts = context;
894
895 get_task_struct(task);
896 }
897
898 return context;
899}
900
901static int ptrace_bts_read_record(struct task_struct *child, size_t index,
902 struct bts_struct __user *out)
903{
904 struct bts_context *context;
905 const struct bts_trace *trace;
906 struct bts_struct bts;
907 const unsigned char *at;
908 int error;
909
910 context = child->bts;
911 if (!context)
912 return -ESRCH;
913
914 trace = ds_read_bts(context->tracer);
915 if (!trace)
916 return -ESRCH;
917
918 at = trace->ds.top - ((index + 1) * trace->ds.size);
919 if ((void *)at < trace->ds.begin)
920 at += (trace->ds.n * trace->ds.size);
921
922 if (!trace->read)
923 return -EOPNOTSUPP;
924
925 error = trace->read(context->tracer, at, &bts);
926 if (error < 0)
927 return error;
928
929 if (copy_to_user(out, &bts, sizeof(bts)))
930 return -EFAULT;
931
932 return sizeof(bts);
933}
934
935static int ptrace_bts_drain(struct task_struct *child,
936 long size,
937 struct bts_struct __user *out)
938{
939 struct bts_context *context;
940 const struct bts_trace *trace;
941 const unsigned char *at;
942 int error, drained = 0;
943
944 context = child->bts;
945 if (!context)
946 return -ESRCH;
947
948 trace = ds_read_bts(context->tracer);
949 if (!trace)
950 return -ESRCH;
951
952 if (!trace->read)
953 return -EOPNOTSUPP;
954
955 if (size < (trace->ds.top - trace->ds.begin))
956 return -EIO;
957
958 for (at = trace->ds.begin; (void *)at < trace->ds.top;
959 out++, drained++, at += trace->ds.size) {
960 struct bts_struct bts;
961
962 error = trace->read(context->tracer, at, &bts);
963 if (error < 0)
964 return error;
965
966 if (copy_to_user(out, &bts, sizeof(bts)))
967 return -EFAULT;
968 }
969
970 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
971
972 error = ds_reset_bts(context->tracer);
973 if (error < 0)
974 return error;
975
976 return drained;
977}
978
979static int ptrace_bts_config(struct task_struct *child,
980 long cfg_size,
981 const struct ptrace_bts_config __user *ucfg)
982{
983 struct bts_context *context;
984 struct ptrace_bts_config cfg;
985 unsigned int flags = 0;
986
987 if (cfg_size < sizeof(cfg))
988 return -EIO;
989
990 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
991 return -EFAULT;
992
993 context = child->bts;
994 if (!context)
995 context = alloc_bts_context(child);
996 if (!context)
997 return -ENOMEM;
998
999 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
1000 if (!cfg.signal)
1001 return -EINVAL;
1002
1003 return -EOPNOTSUPP;
1004 context->bts_ovfl_signal = cfg.signal;
1005 }
1006
1007 ds_release_bts(context->tracer);
1008 context->tracer = NULL;
1009
1010 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
1011 int err;
1012
1013 free_bts_buffer(context);
1014 if (!cfg.size)
1015 return 0;
1016
1017 err = alloc_bts_buffer(context, cfg.size);
1018 if (err < 0)
1019 return err;
1020 }
1021
1022 if (cfg.flags & PTRACE_BTS_O_TRACE)
1023 flags |= BTS_USER;
1024
1025 if (cfg.flags & PTRACE_BTS_O_SCHED)
1026 flags |= BTS_TIMESTAMPS;
1027
1028 context->tracer =
1029 ds_request_bts_task(child, context->buffer, context->size,
1030 NULL, (size_t)-1, flags);
1031 if (unlikely(IS_ERR(context->tracer))) {
1032 int error = PTR_ERR(context->tracer);
1033
1034 free_bts_buffer(context);
1035 context->tracer = NULL;
1036 return error;
1037 }
1038
1039 return sizeof(cfg);
1040}
1041
1042static int ptrace_bts_status(struct task_struct *child,
1043 long cfg_size,
1044 struct ptrace_bts_config __user *ucfg)
1045{
1046 struct bts_context *context;
1047 const struct bts_trace *trace;
1048 struct ptrace_bts_config cfg;
1049
1050 context = child->bts;
1051 if (!context)
1052 return -ESRCH;
1053
1054 if (cfg_size < sizeof(cfg))
1055 return -EIO;
1056
1057 trace = ds_read_bts(context->tracer);
1058 if (!trace)
1059 return -ESRCH;
1060
1061 memset(&cfg, 0, sizeof(cfg));
1062 cfg.size = trace->ds.end - trace->ds.begin;
1063 cfg.signal = context->bts_ovfl_signal;
1064 cfg.bts_size = sizeof(struct bts_struct);
1065
1066 if (cfg.signal)
1067 cfg.flags |= PTRACE_BTS_O_SIGNAL;
1068
1069 if (trace->ds.flags & BTS_USER)
1070 cfg.flags |= PTRACE_BTS_O_TRACE;
1071
1072 if (trace->ds.flags & BTS_TIMESTAMPS)
1073 cfg.flags |= PTRACE_BTS_O_SCHED;
1074
1075 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
1076 return -EFAULT;
1077
1078 return sizeof(cfg);
1079}
1080
1081static int ptrace_bts_clear(struct task_struct *child)
1082{
1083 struct bts_context *context;
1084 const struct bts_trace *trace;
1085
1086 context = child->bts;
1087 if (!context)
1088 return -ESRCH;
1089
1090 trace = ds_read_bts(context->tracer);
1091 if (!trace)
1092 return -ESRCH;
1093
1094 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
1095
1096 return ds_reset_bts(context->tracer);
1097}
1098
1099static int ptrace_bts_size(struct task_struct *child)
1100{
1101 struct bts_context *context;
1102 const struct bts_trace *trace;
1103
1104 context = child->bts;
1105 if (!context)
1106 return -ESRCH;
1107
1108 trace = ds_read_bts(context->tracer);
1109 if (!trace)
1110 return -ESRCH;
1111
1112 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
1113}
1114
1115/*
1116 * Called from __ptrace_unlink() after the child has been moved back
1117 * to its original parent.
1118 */
1119void ptrace_bts_untrace(struct task_struct *child)
1120{
1121 if (unlikely(child->bts)) {
1122 free_bts_context(child->bts);
1123 child->bts = NULL;
1124 }
1125}
1126#endif /* CONFIG_X86_PTRACE_BTS */
1127
1128/* 787/*
1129 * Called by kernel/ptrace.c when detaching.. 788 * Called by kernel/ptrace.c when detaching..
1130 * 789 *
@@ -1252,39 +911,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1252 break; 911 break;
1253#endif 912#endif
1254 913
1255 /*
1256 * These bits need more cooking - not enabled yet:
1257 */
1258#ifdef CONFIG_X86_PTRACE_BTS
1259 case PTRACE_BTS_CONFIG:
1260 ret = ptrace_bts_config
1261 (child, data, (struct ptrace_bts_config __user *)addr);
1262 break;
1263
1264 case PTRACE_BTS_STATUS:
1265 ret = ptrace_bts_status
1266 (child, data, (struct ptrace_bts_config __user *)addr);
1267 break;
1268
1269 case PTRACE_BTS_SIZE:
1270 ret = ptrace_bts_size(child);
1271 break;
1272
1273 case PTRACE_BTS_GET:
1274 ret = ptrace_bts_read_record
1275 (child, data, (struct bts_struct __user *) addr);
1276 break;
1277
1278 case PTRACE_BTS_CLEAR:
1279 ret = ptrace_bts_clear(child);
1280 break;
1281
1282 case PTRACE_BTS_DRAIN:
1283 ret = ptrace_bts_drain
1284 (child, data, (struct bts_struct __user *) addr);
1285 break;
1286#endif /* CONFIG_X86_PTRACE_BTS */
1287
1288 default: 914 default:
1289 ret = ptrace_request(child, request, addr, data); 915 ret = ptrace_request(child, request, addr, data);
1290 break; 916 break;
@@ -1544,14 +1170,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1544 1170
1545 case PTRACE_GET_THREAD_AREA: 1171 case PTRACE_GET_THREAD_AREA:
1546 case PTRACE_SET_THREAD_AREA: 1172 case PTRACE_SET_THREAD_AREA:
1547#ifdef CONFIG_X86_PTRACE_BTS
1548 case PTRACE_BTS_CONFIG:
1549 case PTRACE_BTS_STATUS:
1550 case PTRACE_BTS_SIZE:
1551 case PTRACE_BTS_GET:
1552 case PTRACE_BTS_CLEAR:
1553 case PTRACE_BTS_DRAIN:
1554#endif /* CONFIG_X86_PTRACE_BTS */
1555 return arch_ptrace(child, request, addr, data); 1173 return arch_ptrace(child, request, addr, data);
1556 1174
1557 default: 1175 default:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 3149032ff107..58de45ee08b6 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child)
158} 158}
159 159
160/* 160/*
161 * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
162 */
163static void write_debugctlmsr(struct task_struct *child, unsigned long val)
164{
165 if (child->thread.debugctlmsr == val)
166 return;
167
168 child->thread.debugctlmsr = val;
169
170 if (child != current)
171 return;
172
173 update_debugctlmsr(val);
174}
175
176/*
177 * Enable single or block step. 161 * Enable single or block step.
178 */ 162 */
179static void enable_step(struct task_struct *child, bool block) 163static void enable_step(struct task_struct *child, bool block)
@@ -186,15 +170,17 @@ static void enable_step(struct task_struct *child, bool block)
186 * that uses user-mode single stepping itself. 170 * that uses user-mode single stepping itself.
187 */ 171 */
188 if (enable_single_step(child) && block) { 172 if (enable_single_step(child) && block) {
189 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 173 unsigned long debugctl = get_debugctlmsr();
190 write_debugctlmsr(child, 174
191 child->thread.debugctlmsr | DEBUGCTLMSR_BTF); 175 debugctl |= DEBUGCTLMSR_BTF;
192 } else { 176 update_debugctlmsr(debugctl);
193 write_debugctlmsr(child, 177 set_tsk_thread_flag(child, TIF_BLOCKSTEP);
194 child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); 178 } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
195 179 unsigned long debugctl = get_debugctlmsr();
196 if (!child->thread.debugctlmsr) 180
197 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 181 debugctl &= ~DEBUGCTLMSR_BTF;
182 update_debugctlmsr(debugctl);
183 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
198 } 184 }
199} 185}
200 186
@@ -213,11 +199,13 @@ void user_disable_single_step(struct task_struct *child)
213 /* 199 /*
214 * Make sure block stepping (BTF) is disabled. 200 * Make sure block stepping (BTF) is disabled.
215 */ 201 */
216 write_debugctlmsr(child, 202 if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
217 child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); 203 unsigned long debugctl = get_debugctlmsr();
218 204
219 if (!child->thread.debugctlmsr) 205 debugctl &= ~DEBUGCTLMSR_BTF;
220 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 206 update_debugctlmsr(debugctl);
207 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
208 }
221 209
222 /* Always clear TIF_SINGLESTEP... */ 210 /* Always clear TIF_SINGLESTEP... */
223 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 211 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1168e4454188..36f1bd9f8e76 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -543,11 +543,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
543 543
544 /* DR6 may or may not be cleared by the CPU */ 544 /* DR6 may or may not be cleared by the CPU */
545 set_debugreg(0, 6); 545 set_debugreg(0, 6);
546
546 /* 547 /*
547 * The processor cleared BTF, so don't mark that we need it set. 548 * The processor cleared BTF, so don't mark that we need it set.
548 */ 549 */
549 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 550 clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
550 tsk->thread.debugctlmsr = 0;
551 551
552 /* Store the virtualized DR6 value */ 552 /* Store the virtualized DR6 value */
553 tsk->thread.debugreg6 = dr6; 553 tsk->thread.debugreg6 = dr6;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6e5e75e0d7d3..0b896ac7e4bb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3652,8 +3652,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3652 3652
3653 /* We need to handle NMIs before interrupts are enabled */ 3653 /* We need to handle NMIs before interrupts are enabled */
3654 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && 3654 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3655 (exit_intr_info & INTR_INFO_VALID_MASK)) 3655 (exit_intr_info & INTR_INFO_VALID_MASK)) {
3656 kvm_before_handle_nmi(&vmx->vcpu);
3656 asm("int $2"); 3657 asm("int $2");
3658 kvm_after_handle_nmi(&vmx->vcpu);
3659 }
3657 3660
3658 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3661 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3659 3662
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 30efeead4511..58a96e6a234c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -40,6 +40,7 @@
40#include <linux/user-return-notifier.h> 40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h> 41#include <linux/srcu.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/perf_event.h>
43#include <trace/events/kvm.h> 44#include <trace/events/kvm.h>
44 45
45#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
@@ -3955,6 +3956,47 @@ static void kvm_timer_init(void)
3955 } 3956 }
3956} 3957}
3957 3958
3959static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
3960
3961static int kvm_is_in_guest(void)
3962{
3963 return percpu_read(current_vcpu) != NULL;
3964}
3965
3966static int kvm_is_user_mode(void)
3967{
3968 int user_mode = 3;
3969 if (percpu_read(current_vcpu))
3970 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
3971 return user_mode != 0;
3972}
3973
3974static unsigned long kvm_get_guest_ip(void)
3975{
3976 unsigned long ip = 0;
3977 if (percpu_read(current_vcpu))
3978 ip = kvm_rip_read(percpu_read(current_vcpu));
3979 return ip;
3980}
3981
3982static struct perf_guest_info_callbacks kvm_guest_cbs = {
3983 .is_in_guest = kvm_is_in_guest,
3984 .is_user_mode = kvm_is_user_mode,
3985 .get_guest_ip = kvm_get_guest_ip,
3986};
3987
3988void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
3989{
3990 percpu_write(current_vcpu, vcpu);
3991}
3992EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
3993
3994void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
3995{
3996 percpu_write(current_vcpu, NULL);
3997}
3998EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
3999
3958int kvm_arch_init(void *opaque) 4000int kvm_arch_init(void *opaque)
3959{ 4001{
3960 int r; 4002 int r;
@@ -3991,6 +4033,8 @@ int kvm_arch_init(void *opaque)
3991 4033
3992 kvm_timer_init(); 4034 kvm_timer_init();
3993 4035
4036 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4037
3994 return 0; 4038 return 0;
3995 4039
3996out: 4040out:
@@ -3999,6 +4043,8 @@ out:
3999 4043
4000void kvm_arch_exit(void) 4044void kvm_arch_exit(void)
4001{ 4045{
4046 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
4047
4002 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 4048 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4003 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 4049 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4004 CPUFREQ_TRANSITION_NOTIFIER); 4050 CPUFREQ_TRANSITION_NOTIFIER);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2d101639bd8d..b7a404722d2b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -65,4 +65,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
66} 66}
67 67
68void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
69void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
70
68#endif 71#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 419386c24b82..cbaf8f2b83df 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -20,7 +20,7 @@ lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-$(CONFIG_KPROBES) += insn.o inat.o 23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
24 24
25obj-y += msr.o msr-reg.o msr-reg-export.o 25obj-y += msr.o msr-reg.o msr-reg-export.o
26 26
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2bf90fafa7b5..c8abc4d1bf35 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -239,11 +239,11 @@ static void arch_perfmon_setup_counters(void)
239 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && 239 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
240 current_cpu_data.x86_model == 15) { 240 current_cpu_data.x86_model == 15) {
241 eax.split.version_id = 2; 241 eax.split.version_id = 2;
242 eax.split.num_events = 2; 242 eax.split.num_counters = 2;
243 eax.split.bit_width = 40; 243 eax.split.bit_width = 40;
244 } 244 }
245 245
246 num_counters = eax.split.num_events; 246 num_counters = eax.split.num_counters;
247 247
248 op_arch_perfmon_spec.num_counters = num_counters; 248 op_arch_perfmon_spec.num_counters = num_counters;
249 op_arch_perfmon_spec.num_controls = num_counters; 249 op_arch_perfmon_spec.num_controls = num_counters;