aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2010-05-17 22:26:53 -0400
committerSteven Rostedt <rostedt@goodmis.org>2010-05-18 00:35:23 -0400
commitf0218b3e9974f06014b61be8987159f4a20e011e (patch)
tree29a593c4d71ab18cb0c450a34e79bf6bea66877e /arch/x86/kernel/cpu
parent1eaa4787a774c4896518c81f24e8bccaa2244924 (diff)
parent9d192e118a094087494997ea1c8a2faf39af38c5 (diff)
Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into trace/tip/tracing/core-6
Conflicts: include/trace/ftrace.h kernel/trace/trace_kprobe.c Acked-by: Masami Hiramatsu <mhiramat@redhat.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c815
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c357
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c641
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c218
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c857
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c31
9 files changed, 2348 insertions, 622 deletions
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d360b56e9825..b6215b9798e2 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
929 powernow_table[i].index = index; 929 powernow_table[i].index = index;
930 930
931 /* Frequency may be rounded for these */ 931 /* Frequency may be rounded for these */
932 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { 932 if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
933 || boot_cpu_data.x86 == 0x11) {
933 powernow_table[i].frequency = 934 powernow_table[i].frequency =
934 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); 935 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
935 } else 936 } else
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1366c7cfd483..f5e5390d3459 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -12,7 +12,6 @@
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15#include <asm/ds.h>
16#include <asm/bugs.h> 15#include <asm/bugs.h>
17#include <asm/cpu.h> 16#include <asm/cpu.h>
18 17
@@ -388,7 +387,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
388 set_cpu_cap(c, X86_FEATURE_BTS); 387 set_cpu_cap(c, X86_FEATURE_BTS);
389 if (!(l1 & (1<<12))) 388 if (!(l1 & (1<<12)))
390 set_cpu_cap(c, X86_FEATURE_PEBS); 389 set_cpu_cap(c, X86_FEATURE_PEBS);
391 ds_init_intel(c);
392 } 390 }
393 391
394 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) 392 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index db5bdc8addf8..fd4db0db3708 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,46 +31,51 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33 33
34static u64 perf_event_mask __read_mostly; 34#if 0
35#undef wrmsrl
36#define wrmsrl(msr, val) \
37do { \
38 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
39 (unsigned long)(val)); \
40 native_write_msr((msr), (u32)((u64)(val)), \
41 (u32)((u64)(val) >> 32)); \
42} while (0)
43#endif
35 44
36/* The maximal number of PEBS events: */ 45/*
37#define MAX_PEBS_EVENTS 4 46 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
47 */
48static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{
51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0;
54 struct page *page;
55 void *map;
56 int ret;
38 57
39/* The size of a BTS record in bytes: */ 58 do {
40#define BTS_RECORD_SIZE 24 59 ret = __get_user_pages_fast(addr, 1, 0, &page);
60 if (!ret)
61 break;
41 62
42/* The size of a per-cpu BTS buffer in bytes: */ 63 offset = addr & (PAGE_SIZE - 1);
43#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) 64 size = min(PAGE_SIZE - offset, n - len);
44 65
45/* The BTS overflow threshold in bytes from the end of the buffer: */ 66 map = kmap_atomic(page, type);
46#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) 67 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type);
69 put_page(page);
47 70
71 len += size;
72 to += size;
73 addr += size;
48 74
49/* 75 } while (len < n);
50 * Bits in the debugctlmsr controlling branch tracing.
51 */
52#define X86_DEBUGCTL_TR (1 << 6)
53#define X86_DEBUGCTL_BTS (1 << 7)
54#define X86_DEBUGCTL_BTINT (1 << 8)
55#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
56#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
57 76
58/* 77 return len;
59 * A debug store configuration. 78}
60 *
61 * We only support architectures that use 64bit fields.
62 */
63struct debug_store {
64 u64 bts_buffer_base;
65 u64 bts_index;
66 u64 bts_absolute_maximum;
67 u64 bts_interrupt_threshold;
68 u64 pebs_buffer_base;
69 u64 pebs_index;
70 u64 pebs_absolute_maximum;
71 u64 pebs_interrupt_threshold;
72 u64 pebs_event_reset[MAX_PEBS_EVENTS];
73};
74 79
75struct event_constraint { 80struct event_constraint {
76 union { 81 union {
@@ -89,18 +94,41 @@ struct amd_nb {
89 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
90}; 95};
91 96
97#define MAX_LBR_ENTRIES 16
98
92struct cpu_hw_events { 99struct cpu_hw_events {
100 /*
101 * Generic x86 PMC bits
102 */
93 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 103 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
94 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 104 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
95 unsigned long interrupts;
96 int enabled; 105 int enabled;
97 struct debug_store *ds;
98 106
99 int n_events; 107 int n_events;
100 int n_added; 108 int n_added;
101 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
102 u64 tags[X86_PMC_IDX_MAX]; 110 u64 tags[X86_PMC_IDX_MAX];
103 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
112
113 unsigned int group_flag;
114
115 /*
116 * Intel DebugStore bits
117 */
118 struct debug_store *ds;
119 u64 pebs_enabled;
120
121 /*
122 * Intel LBR bits
123 */
124 int lbr_users;
125 void *lbr_context;
126 struct perf_branch_stack lbr_stack;
127 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
128
129 /*
130 * AMD specific bits
131 */
104 struct amd_nb *amd_nb; 132 struct amd_nb *amd_nb;
105}; 133};
106 134
@@ -114,44 +142,75 @@ struct cpu_hw_events {
114#define EVENT_CONSTRAINT(c, n, m) \ 142#define EVENT_CONSTRAINT(c, n, m) \
115 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 143 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
116 144
145/*
146 * Constraint on the Event code.
147 */
117#define INTEL_EVENT_CONSTRAINT(c, n) \ 148#define INTEL_EVENT_CONSTRAINT(c, n) \
118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 149 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
119 150
151/*
152 * Constraint on the Event code + UMask + fixed-mask
153 *
154 * filter mask to validate fixed counter events.
155 * the following filters disqualify for fixed counters:
156 * - inv
157 * - edge
158 * - cnt-mask
159 * The other filters are supported by fixed counters.
160 * The any-thread option is supported starting with v3.
161 */
120#define FIXED_EVENT_CONSTRAINT(c, n) \ 162#define FIXED_EVENT_CONSTRAINT(c, n) \
121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 163 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
164
165/*
166 * Constraint on the Event code + UMask
167 */
168#define PEBS_EVENT_CONSTRAINT(c, n) \
169 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
122 170
123#define EVENT_CONSTRAINT_END \ 171#define EVENT_CONSTRAINT_END \
124 EVENT_CONSTRAINT(0, 0, 0) 172 EVENT_CONSTRAINT(0, 0, 0)
125 173
126#define for_each_event_constraint(e, c) \ 174#define for_each_event_constraint(e, c) \
127 for ((e) = (c); (e)->cmask; (e)++) 175 for ((e) = (c); (e)->weight; (e)++)
176
177union perf_capabilities {
178 struct {
179 u64 lbr_format : 6;
180 u64 pebs_trap : 1;
181 u64 pebs_arch_reg : 1;
182 u64 pebs_format : 4;
183 u64 smm_freeze : 1;
184 };
185 u64 capabilities;
186};
128 187
129/* 188/*
130 * struct x86_pmu - generic x86 pmu 189 * struct x86_pmu - generic x86 pmu
131 */ 190 */
132struct x86_pmu { 191struct x86_pmu {
192 /*
193 * Generic x86 PMC bits
194 */
133 const char *name; 195 const char *name;
134 int version; 196 int version;
135 int (*handle_irq)(struct pt_regs *); 197 int (*handle_irq)(struct pt_regs *);
136 void (*disable_all)(void); 198 void (*disable_all)(void);
137 void (*enable_all)(void); 199 void (*enable_all)(int added);
138 void (*enable)(struct perf_event *); 200 void (*enable)(struct perf_event *);
139 void (*disable)(struct perf_event *); 201 void (*disable)(struct perf_event *);
202 int (*hw_config)(struct perf_event *event);
203 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
140 unsigned eventsel; 204 unsigned eventsel;
141 unsigned perfctr; 205 unsigned perfctr;
142 u64 (*event_map)(int); 206 u64 (*event_map)(int);
143 u64 (*raw_event)(u64);
144 int max_events; 207 int max_events;
145 int num_events; 208 int num_counters;
146 int num_events_fixed; 209 int num_counters_fixed;
147 int event_bits; 210 int cntval_bits;
148 u64 event_mask; 211 u64 cntval_mask;
149 int apic; 212 int apic;
150 u64 max_period; 213 u64 max_period;
151 u64 intel_ctrl;
152 void (*enable_bts)(u64 config);
153 void (*disable_bts)(void);
154
155 struct event_constraint * 214 struct event_constraint *
156 (*get_event_constraints)(struct cpu_hw_events *cpuc, 215 (*get_event_constraints)(struct cpu_hw_events *cpuc,
157 struct perf_event *event); 216 struct perf_event *event);
@@ -159,11 +218,32 @@ struct x86_pmu {
159 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 218 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
160 struct perf_event *event); 219 struct perf_event *event);
161 struct event_constraint *event_constraints; 220 struct event_constraint *event_constraints;
221 void (*quirks)(void);
162 222
163 int (*cpu_prepare)(int cpu); 223 int (*cpu_prepare)(int cpu);
164 void (*cpu_starting)(int cpu); 224 void (*cpu_starting)(int cpu);
165 void (*cpu_dying)(int cpu); 225 void (*cpu_dying)(int cpu);
166 void (*cpu_dead)(int cpu); 226 void (*cpu_dead)(int cpu);
227
228 /*
229 * Intel Arch Perfmon v2+
230 */
231 u64 intel_ctrl;
232 union perf_capabilities intel_cap;
233
234 /*
235 * Intel DebugStore bits
236 */
237 int bts, pebs;
238 int pebs_record_size;
239 void (*drain_pebs)(struct pt_regs *regs);
240 struct event_constraint *pebs_constraints;
241
242 /*
243 * Intel LBR
244 */
245 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
246 int lbr_nr; /* hardware stack size */
167}; 247};
168 248
169static struct x86_pmu x86_pmu __read_mostly; 249static struct x86_pmu x86_pmu __read_mostly;
@@ -198,7 +278,7 @@ static u64
198x86_perf_event_update(struct perf_event *event) 278x86_perf_event_update(struct perf_event *event)
199{ 279{
200 struct hw_perf_event *hwc = &event->hw; 280 struct hw_perf_event *hwc = &event->hw;
201 int shift = 64 - x86_pmu.event_bits; 281 int shift = 64 - x86_pmu.cntval_bits;
202 u64 prev_raw_count, new_raw_count; 282 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx; 283 int idx = hwc->idx;
204 s64 delta; 284 s64 delta;
@@ -241,33 +321,32 @@ again:
241static atomic_t active_events; 321static atomic_t active_events;
242static DEFINE_MUTEX(pmc_reserve_mutex); 322static DEFINE_MUTEX(pmc_reserve_mutex);
243 323
324#ifdef CONFIG_X86_LOCAL_APIC
325
244static bool reserve_pmc_hardware(void) 326static bool reserve_pmc_hardware(void)
245{ 327{
246#ifdef CONFIG_X86_LOCAL_APIC
247 int i; 328 int i;
248 329
249 if (nmi_watchdog == NMI_LOCAL_APIC) 330 if (nmi_watchdog == NMI_LOCAL_APIC)
250 disable_lapic_nmi_watchdog(); 331 disable_lapic_nmi_watchdog();
251 332
252 for (i = 0; i < x86_pmu.num_events; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
253 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
254 goto perfctr_fail; 335 goto perfctr_fail;
255 } 336 }
256 337
257 for (i = 0; i < x86_pmu.num_events; i++) { 338 for (i = 0; i < x86_pmu.num_counters; i++) {
258 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 339 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
259 goto eventsel_fail; 340 goto eventsel_fail;
260 } 341 }
261#endif
262 342
263 return true; 343 return true;
264 344
265#ifdef CONFIG_X86_LOCAL_APIC
266eventsel_fail: 345eventsel_fail:
267 for (i--; i >= 0; i--) 346 for (i--; i >= 0; i--)
268 release_evntsel_nmi(x86_pmu.eventsel + i); 347 release_evntsel_nmi(x86_pmu.eventsel + i);
269 348
270 i = x86_pmu.num_events; 349 i = x86_pmu.num_counters;
271 350
272perfctr_fail: 351perfctr_fail:
273 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
@@ -277,128 +356,36 @@ perfctr_fail:
277 enable_lapic_nmi_watchdog(); 356 enable_lapic_nmi_watchdog();
278 357
279 return false; 358 return false;
280#endif
281} 359}
282 360
283static void release_pmc_hardware(void) 361static void release_pmc_hardware(void)
284{ 362{
285#ifdef CONFIG_X86_LOCAL_APIC
286 int i; 363 int i;
287 364
288 for (i = 0; i < x86_pmu.num_events; i++) { 365 for (i = 0; i < x86_pmu.num_counters; i++) {
289 release_perfctr_nmi(x86_pmu.perfctr + i); 366 release_perfctr_nmi(x86_pmu.perfctr + i);
290 release_evntsel_nmi(x86_pmu.eventsel + i); 367 release_evntsel_nmi(x86_pmu.eventsel + i);
291 } 368 }
292 369
293 if (nmi_watchdog == NMI_LOCAL_APIC) 370 if (nmi_watchdog == NMI_LOCAL_APIC)
294 enable_lapic_nmi_watchdog(); 371 enable_lapic_nmi_watchdog();
295#endif
296}
297
298static inline bool bts_available(void)
299{
300 return x86_pmu.enable_bts != NULL;
301} 372}
302 373
303static void init_debug_store_on_cpu(int cpu) 374#else
304{
305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306
307 if (!ds)
308 return;
309
310 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
311 (u32)((u64)(unsigned long)ds),
312 (u32)((u64)(unsigned long)ds >> 32));
313}
314
315static void fini_debug_store_on_cpu(int cpu)
316{
317 if (!per_cpu(cpu_hw_events, cpu).ds)
318 return;
319
320 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
321}
322
323static void release_bts_hardware(void)
324{
325 int cpu;
326
327 if (!bts_available())
328 return;
329
330 get_online_cpus();
331
332 for_each_online_cpu(cpu)
333 fini_debug_store_on_cpu(cpu);
334
335 for_each_possible_cpu(cpu) {
336 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
337
338 if (!ds)
339 continue;
340
341 per_cpu(cpu_hw_events, cpu).ds = NULL;
342
343 kfree((void *)(unsigned long)ds->bts_buffer_base);
344 kfree(ds);
345 }
346
347 put_online_cpus();
348}
349
350static int reserve_bts_hardware(void)
351{
352 int cpu, err = 0;
353
354 if (!bts_available())
355 return 0;
356
357 get_online_cpus();
358
359 for_each_possible_cpu(cpu) {
360 struct debug_store *ds;
361 void *buffer;
362
363 err = -ENOMEM;
364 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
365 if (unlikely(!buffer))
366 break;
367
368 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
369 if (unlikely(!ds)) {
370 kfree(buffer);
371 break;
372 }
373
374 ds->bts_buffer_base = (u64)(unsigned long)buffer;
375 ds->bts_index = ds->bts_buffer_base;
376 ds->bts_absolute_maximum =
377 ds->bts_buffer_base + BTS_BUFFER_SIZE;
378 ds->bts_interrupt_threshold =
379 ds->bts_absolute_maximum - BTS_OVFL_TH;
380
381 per_cpu(cpu_hw_events, cpu).ds = ds;
382 err = 0;
383 }
384 375
385 if (err) 376static bool reserve_pmc_hardware(void) { return true; }
386 release_bts_hardware(); 377static void release_pmc_hardware(void) {}
387 else {
388 for_each_online_cpu(cpu)
389 init_debug_store_on_cpu(cpu);
390 }
391 378
392 put_online_cpus(); 379#endif
393 380
394 return err; 381static int reserve_ds_buffers(void);
395} 382static void release_ds_buffers(void);
396 383
397static void hw_perf_event_destroy(struct perf_event *event) 384static void hw_perf_event_destroy(struct perf_event *event)
398{ 385{
399 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 386 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
400 release_pmc_hardware(); 387 release_pmc_hardware();
401 release_bts_hardware(); 388 release_ds_buffers();
402 mutex_unlock(&pmc_reserve_mutex); 389 mutex_unlock(&pmc_reserve_mutex);
403 } 390 }
404} 391}
@@ -441,54 +428,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
441 return 0; 428 return 0;
442} 429}
443 430
444/* 431static int x86_setup_perfctr(struct perf_event *event)
445 * Setup the hardware configuration for a given attr_type
446 */
447static int __hw_perf_event_init(struct perf_event *event)
448{ 432{
449 struct perf_event_attr *attr = &event->attr; 433 struct perf_event_attr *attr = &event->attr;
450 struct hw_perf_event *hwc = &event->hw; 434 struct hw_perf_event *hwc = &event->hw;
451 u64 config; 435 u64 config;
452 int err;
453
454 if (!x86_pmu_initialized())
455 return -ENODEV;
456
457 err = 0;
458 if (!atomic_inc_not_zero(&active_events)) {
459 mutex_lock(&pmc_reserve_mutex);
460 if (atomic_read(&active_events) == 0) {
461 if (!reserve_pmc_hardware())
462 err = -EBUSY;
463 else
464 err = reserve_bts_hardware();
465 }
466 if (!err)
467 atomic_inc(&active_events);
468 mutex_unlock(&pmc_reserve_mutex);
469 }
470 if (err)
471 return err;
472
473 event->destroy = hw_perf_event_destroy;
474
475 /*
476 * Generate PMC IRQs:
477 * (keep 'enabled' bit clear for now)
478 */
479 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
480
481 hwc->idx = -1;
482 hwc->last_cpu = -1;
483 hwc->last_tag = ~0ULL;
484
485 /*
486 * Count user and OS events unless requested not to.
487 */
488 if (!attr->exclude_user)
489 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
490 if (!attr->exclude_kernel)
491 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
492 436
493 if (!hwc->sample_period) { 437 if (!hwc->sample_period) {
494 hwc->sample_period = x86_pmu.max_period; 438 hwc->sample_period = x86_pmu.max_period;
@@ -505,16 +449,8 @@ static int __hw_perf_event_init(struct perf_event *event)
505 return -EOPNOTSUPP; 449 return -EOPNOTSUPP;
506 } 450 }
507 451
508 /* 452 if (attr->type == PERF_TYPE_RAW)
509 * Raw hw_event type provide the config in the hw_event structure
510 */
511 if (attr->type == PERF_TYPE_RAW) {
512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
516 return 0; 453 return 0;
517 }
518 454
519 if (attr->type == PERF_TYPE_HW_CACHE) 455 if (attr->type == PERF_TYPE_HW_CACHE)
520 return set_ext_hw_attr(hwc, attr); 456 return set_ext_hw_attr(hwc, attr);
@@ -539,11 +475,11 @@ static int __hw_perf_event_init(struct perf_event *event)
539 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 475 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
540 (hwc->sample_period == 1)) { 476 (hwc->sample_period == 1)) {
541 /* BTS is not supported by this architecture. */ 477 /* BTS is not supported by this architecture. */
542 if (!bts_available()) 478 if (!x86_pmu.bts)
543 return -EOPNOTSUPP; 479 return -EOPNOTSUPP;
544 480
545 /* BTS is currently only allowed for user-mode. */ 481 /* BTS is currently only allowed for user-mode. */
546 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 482 if (!attr->exclude_kernel)
547 return -EOPNOTSUPP; 483 return -EOPNOTSUPP;
548 } 484 }
549 485
@@ -552,12 +488,87 @@ static int __hw_perf_event_init(struct perf_event *event)
552 return 0; 488 return 0;
553} 489}
554 490
491static int x86_pmu_hw_config(struct perf_event *event)
492{
493 if (event->attr.precise_ip) {
494 int precise = 0;
495
496 /* Support for constant skid */
497 if (x86_pmu.pebs)
498 precise++;
499
500 /* Support for IP fixup */
501 if (x86_pmu.lbr_nr)
502 precise++;
503
504 if (event->attr.precise_ip > precise)
505 return -EOPNOTSUPP;
506 }
507
508 /*
509 * Generate PMC IRQs:
510 * (keep 'enabled' bit clear for now)
511 */
512 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
513
514 /*
515 * Count user and OS events unless requested not to
516 */
517 if (!event->attr.exclude_user)
518 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
519 if (!event->attr.exclude_kernel)
520 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
521
522 if (event->attr.type == PERF_TYPE_RAW)
523 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
524
525 return x86_setup_perfctr(event);
526}
527
528/*
529 * Setup the hardware configuration for a given attr_type
530 */
531static int __hw_perf_event_init(struct perf_event *event)
532{
533 int err;
534
535 if (!x86_pmu_initialized())
536 return -ENODEV;
537
538 err = 0;
539 if (!atomic_inc_not_zero(&active_events)) {
540 mutex_lock(&pmc_reserve_mutex);
541 if (atomic_read(&active_events) == 0) {
542 if (!reserve_pmc_hardware())
543 err = -EBUSY;
544 else {
545 err = reserve_ds_buffers();
546 if (err)
547 release_pmc_hardware();
548 }
549 }
550 if (!err)
551 atomic_inc(&active_events);
552 mutex_unlock(&pmc_reserve_mutex);
553 }
554 if (err)
555 return err;
556
557 event->destroy = hw_perf_event_destroy;
558
559 event->hw.idx = -1;
560 event->hw.last_cpu = -1;
561 event->hw.last_tag = ~0ULL;
562
563 return x86_pmu.hw_config(event);
564}
565
555static void x86_pmu_disable_all(void) 566static void x86_pmu_disable_all(void)
556{ 567{
557 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 568 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
558 int idx; 569 int idx;
559 570
560 for (idx = 0; idx < x86_pmu.num_events; idx++) { 571 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
561 u64 val; 572 u64 val;
562 573
563 if (!test_bit(idx, cpuc->active_mask)) 574 if (!test_bit(idx, cpuc->active_mask))
@@ -587,12 +598,12 @@ void hw_perf_disable(void)
587 x86_pmu.disable_all(); 598 x86_pmu.disable_all();
588} 599}
589 600
590static void x86_pmu_enable_all(void) 601static void x86_pmu_enable_all(int added)
591{ 602{
592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 603 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
593 int idx; 604 int idx;
594 605
595 for (idx = 0; idx < x86_pmu.num_events; idx++) { 606 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
596 struct perf_event *event = cpuc->events[idx]; 607 struct perf_event *event = cpuc->events[idx];
597 u64 val; 608 u64 val;
598 609
@@ -667,14 +678,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
667 * assign events to counters starting with most 678 * assign events to counters starting with most
668 * constrained events. 679 * constrained events.
669 */ 680 */
670 wmax = x86_pmu.num_events; 681 wmax = x86_pmu.num_counters;
671 682
672 /* 683 /*
673 * when fixed event counters are present, 684 * when fixed event counters are present,
674 * wmax is incremented by 1 to account 685 * wmax is incremented by 1 to account
675 * for one more choice 686 * for one more choice
676 */ 687 */
677 if (x86_pmu.num_events_fixed) 688 if (x86_pmu.num_counters_fixed)
678 wmax++; 689 wmax++;
679 690
680 for (w = 1, num = n; num && w <= wmax; w++) { 691 for (w = 1, num = n; num && w <= wmax; w++) {
@@ -724,7 +735,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
724 struct perf_event *event; 735 struct perf_event *event;
725 int n, max_count; 736 int n, max_count;
726 737
727 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; 738 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
728 739
729 /* current number of events already accepted */ 740 /* current number of events already accepted */
730 n = cpuc->n_events; 741 n = cpuc->n_events;
@@ -795,7 +806,7 @@ void hw_perf_enable(void)
795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 806 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
796 struct perf_event *event; 807 struct perf_event *event;
797 struct hw_perf_event *hwc; 808 struct hw_perf_event *hwc;
798 int i; 809 int i, added = cpuc->n_added;
799 810
800 if (!x86_pmu_initialized()) 811 if (!x86_pmu_initialized())
801 return; 812 return;
@@ -847,19 +858,20 @@ void hw_perf_enable(void)
847 cpuc->enabled = 1; 858 cpuc->enabled = 1;
848 barrier(); 859 barrier();
849 860
850 x86_pmu.enable_all(); 861 x86_pmu.enable_all(added);
851} 862}
852 863
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) 864static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
865 u64 enable_mask)
854{ 866{
855 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 867 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 868}
858 869
859static inline void x86_pmu_disable_event(struct perf_event *event) 870static inline void x86_pmu_disable_event(struct perf_event *event)
860{ 871{
861 struct hw_perf_event *hwc = &event->hw; 872 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); 873
874 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
863} 875}
864 876
865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 877static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -874,7 +886,7 @@ x86_perf_event_set_period(struct perf_event *event)
874 struct hw_perf_event *hwc = &event->hw; 886 struct hw_perf_event *hwc = &event->hw;
875 s64 left = atomic64_read(&hwc->period_left); 887 s64 left = atomic64_read(&hwc->period_left);
876 s64 period = hwc->sample_period; 888 s64 period = hwc->sample_period;
877 int err, ret = 0, idx = hwc->idx; 889 int ret = 0, idx = hwc->idx;
878 890
879 if (idx == X86_PMC_IDX_FIXED_BTS) 891 if (idx == X86_PMC_IDX_FIXED_BTS)
880 return 0; 892 return 0;
@@ -912,8 +924,8 @@ x86_perf_event_set_period(struct perf_event *event)
912 */ 924 */
913 atomic64_set(&hwc->prev_count, (u64)-left); 925 atomic64_set(&hwc->prev_count, (u64)-left);
914 926
915 err = checking_wrmsrl(hwc->event_base + idx, 927 wrmsrl(hwc->event_base + idx,
916 (u64)(-left) & x86_pmu.event_mask); 928 (u64)(-left) & x86_pmu.cntval_mask);
917 929
918 perf_event_update_userpage(event); 930 perf_event_update_userpage(event);
919 931
@@ -924,7 +936,8 @@ static void x86_pmu_enable_event(struct perf_event *event)
924{ 936{
925 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 937 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
926 if (cpuc->enabled) 938 if (cpuc->enabled)
927 __x86_pmu_enable_event(&event->hw); 939 __x86_pmu_enable_event(&event->hw,
940 ARCH_PERFMON_EVENTSEL_ENABLE);
928} 941}
929 942
930/* 943/*
@@ -950,7 +963,15 @@ static int x86_pmu_enable(struct perf_event *event)
950 if (n < 0) 963 if (n < 0)
951 return n; 964 return n;
952 965
953 ret = x86_schedule_events(cpuc, n, assign); 966 /*
967 * If group events scheduling transaction was started,
968 * skip the schedulability test here, it will be peformed
969 * at commit time(->commit_txn) as a whole
970 */
971 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
972 goto out;
973
974 ret = x86_pmu.schedule_events(cpuc, n, assign);
954 if (ret) 975 if (ret)
955 return ret; 976 return ret;
956 /* 977 /*
@@ -959,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event)
959 */ 980 */
960 memcpy(cpuc->assign, assign, n*sizeof(int)); 981 memcpy(cpuc->assign, assign, n*sizeof(int));
961 982
983out:
962 cpuc->n_events = n; 984 cpuc->n_events = n;
963 cpuc->n_added += n - n0; 985 cpuc->n_added += n - n0;
964 986
@@ -991,11 +1013,12 @@ static void x86_pmu_unthrottle(struct perf_event *event)
991void perf_event_print_debug(void) 1013void perf_event_print_debug(void)
992{ 1014{
993 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 1015 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1016 u64 pebs;
994 struct cpu_hw_events *cpuc; 1017 struct cpu_hw_events *cpuc;
995 unsigned long flags; 1018 unsigned long flags;
996 int cpu, idx; 1019 int cpu, idx;
997 1020
998 if (!x86_pmu.num_events) 1021 if (!x86_pmu.num_counters)
999 return; 1022 return;
1000 1023
1001 local_irq_save(flags); 1024 local_irq_save(flags);
@@ -1008,16 +1031,18 @@ void perf_event_print_debug(void)
1008 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1031 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1009 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 1032 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1010 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 1033 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1034 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1011 1035
1012 pr_info("\n"); 1036 pr_info("\n");
1013 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 1037 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1014 pr_info("CPU#%d: status: %016llx\n", cpu, status); 1038 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1015 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1039 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1016 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1040 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1041 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1017 } 1042 }
1018 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1043 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1019 1044
1020 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1045 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1021 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1046 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1022 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1047 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1023 1048
@@ -1030,7 +1055,7 @@ void perf_event_print_debug(void)
1030 pr_info("CPU#%d: gen-PMC%d left: %016llx\n", 1055 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1031 cpu, idx, prev_left); 1056 cpu, idx, prev_left);
1032 } 1057 }
1033 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 1058 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1034 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); 1059 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1035 1060
1036 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", 1061 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1095,7 +1120,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1095 1120
1096 cpuc = &__get_cpu_var(cpu_hw_events); 1121 cpuc = &__get_cpu_var(cpu_hw_events);
1097 1122
1098 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1123 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1099 if (!test_bit(idx, cpuc->active_mask)) 1124 if (!test_bit(idx, cpuc->active_mask))
1100 continue; 1125 continue;
1101 1126
@@ -1103,7 +1128,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1103 hwc = &event->hw; 1128 hwc = &event->hw;
1104 1129
1105 val = x86_perf_event_update(event); 1130 val = x86_perf_event_update(event);
1106 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1131 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1107 continue; 1132 continue;
1108 1133
1109 /* 1134 /*
@@ -1146,7 +1171,6 @@ void set_perf_event_pending(void)
1146 1171
1147void perf_events_lapic_init(void) 1172void perf_events_lapic_init(void)
1148{ 1173{
1149#ifdef CONFIG_X86_LOCAL_APIC
1150 if (!x86_pmu.apic || !x86_pmu_initialized()) 1174 if (!x86_pmu.apic || !x86_pmu_initialized())
1151 return; 1175 return;
1152 1176
@@ -1154,7 +1178,6 @@ void perf_events_lapic_init(void)
1154 * Always use NMI for PMU 1178 * Always use NMI for PMU
1155 */ 1179 */
1156 apic_write(APIC_LVTPC, APIC_DM_NMI); 1180 apic_write(APIC_LVTPC, APIC_DM_NMI);
1157#endif
1158} 1181}
1159 1182
1160static int __kprobes 1183static int __kprobes
@@ -1178,9 +1201,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1178 1201
1179 regs = args->regs; 1202 regs = args->regs;
1180 1203
1181#ifdef CONFIG_X86_LOCAL_APIC
1182 apic_write(APIC_LVTPC, APIC_DM_NMI); 1204 apic_write(APIC_LVTPC, APIC_DM_NMI);
1183#endif
1184 /* 1205 /*
1185 * Can't rely on the handled return value to say it was our NMI, two 1206 * Can't rely on the handled return value to say it was our NMI, two
1186 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1207 * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1217,118 +1238,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1217 return &unconstrained; 1238 return &unconstrained;
1218} 1239}
1219 1240
1220static int x86_event_sched_in(struct perf_event *event,
1221 struct perf_cpu_context *cpuctx)
1222{
1223 int ret = 0;
1224
1225 event->state = PERF_EVENT_STATE_ACTIVE;
1226 event->oncpu = smp_processor_id();
1227 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
1228
1229 if (!is_x86_event(event))
1230 ret = event->pmu->enable(event);
1231
1232 if (!ret && !is_software_event(event))
1233 cpuctx->active_oncpu++;
1234
1235 if (!ret && event->attr.exclusive)
1236 cpuctx->exclusive = 1;
1237
1238 return ret;
1239}
1240
1241static void x86_event_sched_out(struct perf_event *event,
1242 struct perf_cpu_context *cpuctx)
1243{
1244 event->state = PERF_EVENT_STATE_INACTIVE;
1245 event->oncpu = -1;
1246
1247 if (!is_x86_event(event))
1248 event->pmu->disable(event);
1249
1250 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
1251
1252 if (!is_software_event(event))
1253 cpuctx->active_oncpu--;
1254
1255 if (event->attr.exclusive || !cpuctx->active_oncpu)
1256 cpuctx->exclusive = 0;
1257}
1258
1259/*
1260 * Called to enable a whole group of events.
1261 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1262 * Assumes the caller has disabled interrupts and has
1263 * frozen the PMU with hw_perf_save_disable.
1264 *
1265 * called with PMU disabled. If successful and return value 1,
1266 * then guaranteed to call perf_enable() and hw_perf_enable()
1267 */
1268int hw_perf_group_sched_in(struct perf_event *leader,
1269 struct perf_cpu_context *cpuctx,
1270 struct perf_event_context *ctx)
1271{
1272 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1273 struct perf_event *sub;
1274 int assign[X86_PMC_IDX_MAX];
1275 int n0, n1, ret;
1276
1277 /* n0 = total number of events */
1278 n0 = collect_events(cpuc, leader, true);
1279 if (n0 < 0)
1280 return n0;
1281
1282 ret = x86_schedule_events(cpuc, n0, assign);
1283 if (ret)
1284 return ret;
1285
1286 ret = x86_event_sched_in(leader, cpuctx);
1287 if (ret)
1288 return ret;
1289
1290 n1 = 1;
1291 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1292 if (sub->state > PERF_EVENT_STATE_OFF) {
1293 ret = x86_event_sched_in(sub, cpuctx);
1294 if (ret)
1295 goto undo;
1296 ++n1;
1297 }
1298 }
1299 /*
1300 * copy new assignment, now we know it is possible
1301 * will be used by hw_perf_enable()
1302 */
1303 memcpy(cpuc->assign, assign, n0*sizeof(int));
1304
1305 cpuc->n_events = n0;
1306 cpuc->n_added += n1;
1307 ctx->nr_active += n1;
1308
1309 /*
1310 * 1 means successful and events are active
1311 * This is not quite true because we defer
1312 * actual activation until hw_perf_enable() but
1313 * this way we* ensure caller won't try to enable
1314 * individual events
1315 */
1316 return 1;
1317undo:
1318 x86_event_sched_out(leader, cpuctx);
1319 n0 = 1;
1320 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1321 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1322 x86_event_sched_out(sub, cpuctx);
1323 if (++n0 == n1)
1324 break;
1325 }
1326 }
1327 return ret;
1328}
1329
1330#include "perf_event_amd.c" 1241#include "perf_event_amd.c"
1331#include "perf_event_p6.c" 1242#include "perf_event_p6.c"
1243#include "perf_event_p4.c"
1244#include "perf_event_intel_lbr.c"
1245#include "perf_event_intel_ds.c"
1332#include "perf_event_intel.c" 1246#include "perf_event_intel.c"
1333 1247
1334static int __cpuinit 1248static int __cpuinit
@@ -1402,48 +1316,50 @@ void __init init_hw_perf_events(void)
1402 1316
1403 pr_cont("%s PMU driver.\n", x86_pmu.name); 1317 pr_cont("%s PMU driver.\n", x86_pmu.name);
1404 1318
1405 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1319 if (x86_pmu.quirks)
1320 x86_pmu.quirks();
1321
1322 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1406 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1323 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1407 x86_pmu.num_events, X86_PMC_MAX_GENERIC); 1324 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1408 x86_pmu.num_events = X86_PMC_MAX_GENERIC; 1325 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1409 } 1326 }
1410 perf_event_mask = (1 << x86_pmu.num_events) - 1; 1327 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1411 perf_max_events = x86_pmu.num_events; 1328 perf_max_events = x86_pmu.num_counters;
1412 1329
1413 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { 1330 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1414 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1331 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1415 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); 1332 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1416 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; 1333 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1417 } 1334 }
1418 1335
1419 perf_event_mask |= 1336 x86_pmu.intel_ctrl |=
1420 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; 1337 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1421 x86_pmu.intel_ctrl = perf_event_mask;
1422 1338
1423 perf_events_lapic_init(); 1339 perf_events_lapic_init();
1424 register_die_notifier(&perf_event_nmi_notifier); 1340 register_die_notifier(&perf_event_nmi_notifier);
1425 1341
1426 unconstrained = (struct event_constraint) 1342 unconstrained = (struct event_constraint)
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1343 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1428 0, x86_pmu.num_events); 1344 0, x86_pmu.num_counters);
1429 1345
1430 if (x86_pmu.event_constraints) { 1346 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) { 1347 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK) 1348 if (c->cmask != X86_RAW_EVENT_MASK)
1433 continue; 1349 continue;
1434 1350
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; 1351 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1436 c->weight += x86_pmu.num_events; 1352 c->weight += x86_pmu.num_counters;
1437 } 1353 }
1438 } 1354 }
1439 1355
1440 pr_info("... version: %d\n", x86_pmu.version); 1356 pr_info("... version: %d\n", x86_pmu.version);
1441 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1357 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1442 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1358 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1443 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); 1359 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1360 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1361 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1446 pr_info("... event mask: %016Lx\n", perf_event_mask); 1362 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1447 1363
1448 perf_cpu_notifier(x86_pmu_notifier); 1364 perf_cpu_notifier(x86_pmu_notifier);
1449} 1365}
@@ -1453,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event)
1453 x86_perf_event_update(event); 1369 x86_perf_event_update(event);
1454} 1370}
1455 1371
1372/*
1373 * Start group events scheduling transaction
1374 * Set the flag to make pmu::enable() not perform the
1375 * schedulability test, it will be performed at commit time
1376 */
1377static void x86_pmu_start_txn(const struct pmu *pmu)
1378{
1379 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1380
1381 cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
1382}
1383
1384/*
1385 * Stop group events scheduling transaction
1386 * Clear the flag and pmu::enable() will perform the
1387 * schedulability test.
1388 */
1389static void x86_pmu_cancel_txn(const struct pmu *pmu)
1390{
1391 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1392
1393 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
1394}
1395
1396/*
1397 * Commit group events scheduling transaction
1398 * Perform the group schedulability test as a whole
1399 * Return 0 if success
1400 */
1401static int x86_pmu_commit_txn(const struct pmu *pmu)
1402{
1403 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1404 int assign[X86_PMC_IDX_MAX];
1405 int n, ret;
1406
1407 n = cpuc->n_events;
1408
1409 if (!x86_pmu_initialized())
1410 return -EAGAIN;
1411
1412 ret = x86_pmu.schedule_events(cpuc, n, assign);
1413 if (ret)
1414 return ret;
1415
1416 /*
1417 * copy new assignment, now we know it is possible
1418 * will be used by hw_perf_enable()
1419 */
1420 memcpy(cpuc->assign, assign, n*sizeof(int));
1421
1422 return 0;
1423}
1424
1456static const struct pmu pmu = { 1425static const struct pmu pmu = {
1457 .enable = x86_pmu_enable, 1426 .enable = x86_pmu_enable,
1458 .disable = x86_pmu_disable, 1427 .disable = x86_pmu_disable,
@@ -1460,9 +1429,38 @@ static const struct pmu pmu = {
1460 .stop = x86_pmu_stop, 1429 .stop = x86_pmu_stop,
1461 .read = x86_pmu_read, 1430 .read = x86_pmu_read,
1462 .unthrottle = x86_pmu_unthrottle, 1431 .unthrottle = x86_pmu_unthrottle,
1432 .start_txn = x86_pmu_start_txn,
1433 .cancel_txn = x86_pmu_cancel_txn,
1434 .commit_txn = x86_pmu_commit_txn,
1463}; 1435};
1464 1436
1465/* 1437/*
1438 * validate that we can schedule this event
1439 */
1440static int validate_event(struct perf_event *event)
1441{
1442 struct cpu_hw_events *fake_cpuc;
1443 struct event_constraint *c;
1444 int ret = 0;
1445
1446 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1447 if (!fake_cpuc)
1448 return -ENOMEM;
1449
1450 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1451
1452 if (!c || !c->weight)
1453 ret = -ENOSPC;
1454
1455 if (x86_pmu.put_event_constraints)
1456 x86_pmu.put_event_constraints(fake_cpuc, event);
1457
1458 kfree(fake_cpuc);
1459
1460 return ret;
1461}
1462
1463/*
1466 * validate a single event group 1464 * validate a single event group
1467 * 1465 *
1468 * validation include: 1466 * validation include:
@@ -1502,7 +1500,7 @@ static int validate_group(struct perf_event *event)
1502 1500
1503 fake_cpuc->n_events = n; 1501 fake_cpuc->n_events = n;
1504 1502
1505 ret = x86_schedule_events(fake_cpuc, n, NULL); 1503 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1506 1504
1507out_free: 1505out_free:
1508 kfree(fake_cpuc); 1506 kfree(fake_cpuc);
@@ -1527,6 +1525,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1527 1525
1528 if (event->group_leader != event) 1526 if (event->group_leader != event)
1529 err = validate_group(event); 1527 err = validate_group(event);
1528 else
1529 err = validate_event(event);
1530 1530
1531 event->pmu = tmp; 1531 event->pmu = tmp;
1532 } 1532 }
@@ -1574,8 +1574,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1574{ 1574{
1575 struct perf_callchain_entry *entry = data; 1575 struct perf_callchain_entry *entry = data;
1576 1576
1577 if (reliable) 1577 callchain_store(entry, addr);
1578 callchain_store(entry, addr);
1579} 1578}
1580 1579
1581static const struct stacktrace_ops backtrace_ops = { 1580static const struct stacktrace_ops backtrace_ops = {
@@ -1597,41 +1596,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1597 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1596 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1598} 1597}
1599 1598
1600/*
1601 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1602 */
1603static unsigned long
1604copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1605{
1606 unsigned long offset, addr = (unsigned long)from;
1607 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1608 unsigned long size, len = 0;
1609 struct page *page;
1610 void *map;
1611 int ret;
1612
1613 do {
1614 ret = __get_user_pages_fast(addr, 1, 0, &page);
1615 if (!ret)
1616 break;
1617
1618 offset = addr & (PAGE_SIZE - 1);
1619 size = min(PAGE_SIZE - offset, n - len);
1620
1621 map = kmap_atomic(page, type);
1622 memcpy(to, map+offset, size);
1623 kunmap_atomic(map, type);
1624 put_page(page);
1625
1626 len += size;
1627 to += size;
1628 addr += size;
1629
1630 } while (len < n);
1631
1632 return len;
1633}
1634
1635#ifdef CONFIG_COMPAT 1599#ifdef CONFIG_COMPAT
1636static inline int 1600static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1601perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -1727,6 +1691,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1727{ 1691{
1728 struct perf_callchain_entry *entry; 1692 struct perf_callchain_entry *entry;
1729 1693
1694 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1695 /* TODO: We don't support guest os callchain now */
1696 return NULL;
1697 }
1698
1730 if (in_nmi()) 1699 if (in_nmi())
1731 entry = &__get_cpu_var(pmc_nmi_entry); 1700 entry = &__get_cpu_var(pmc_nmi_entry);
1732 else 1701 else
@@ -1750,3 +1719,37 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1750 regs->cs = __KERNEL_CS; 1719 regs->cs = __KERNEL_CS;
1751 local_save_flags(regs->flags); 1720 local_save_flags(regs->flags);
1752} 1721}
1722
1723unsigned long perf_instruction_pointer(struct pt_regs *regs)
1724{
1725 unsigned long ip;
1726
1727 if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1728 ip = perf_guest_cbs->get_guest_ip();
1729 else
1730 ip = instruction_pointer(regs);
1731
1732 return ip;
1733}
1734
1735unsigned long perf_misc_flags(struct pt_regs *regs)
1736{
1737 int misc = 0;
1738
1739 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1740 if (perf_guest_cbs->is_user_mode())
1741 misc |= PERF_RECORD_MISC_GUEST_USER;
1742 else
1743 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1744 } else {
1745 if (user_mode(regs))
1746 misc |= PERF_RECORD_MISC_USER;
1747 else
1748 misc |= PERF_RECORD_MISC_KERNEL;
1749 }
1750
1751 if (regs->flags & PERF_EFLAGS_EXACT)
1752 misc |= PERF_RECORD_MISC_EXACT_IP;
1753
1754 return misc;
1755}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index db6f7d4056e1..611df11ba15e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -2,7 +2,7 @@
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock); 3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4 4
5static __initconst u64 amd_hw_cache_event_ids 5static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event)
111 return amd_perfmon_event_map[hw_event]; 111 return amd_perfmon_event_map[hw_event];
112} 112}
113 113
114static u64 amd_pmu_raw_event(u64 hw_event) 114static int amd_pmu_hw_config(struct perf_event *event)
115{ 115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL 116 int ret = x86_pmu_hw_config(event);
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL 117
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL 118 if (ret)
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL 119 return ret;
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL 120
121 121 if (event->attr.type != PERF_TYPE_RAW)
122#define K7_EVNTSEL_MASK \ 122 return 0;
123 (K7_EVNTSEL_EVENT_MASK | \ 123
124 K7_EVNTSEL_UNIT_MASK | \ 124 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
125 K7_EVNTSEL_EDGE_MASK | \ 125
126 K7_EVNTSEL_INV_MASK | \ 126 return 0;
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130} 127}
131 128
132/* 129/*
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
165 * be removed on one CPU at a time AND PMU is disabled 162 * be removed on one CPU at a time AND PMU is disabled
166 * when we come here 163 * when we come here
167 */ 164 */
168 for (i = 0; i < x86_pmu.num_events; i++) { 165 for (i = 0; i < x86_pmu.num_counters; i++) {
169 if (nb->owners[i] == event) { 166 if (nb->owners[i] == event) {
170 cmpxchg(nb->owners+i, event, NULL); 167 cmpxchg(nb->owners+i, event, NULL);
171 break; 168 break;
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
215 struct hw_perf_event *hwc = &event->hw; 212 struct hw_perf_event *hwc = &event->hw;
216 struct amd_nb *nb = cpuc->amd_nb; 213 struct amd_nb *nb = cpuc->amd_nb;
217 struct perf_event *old = NULL; 214 struct perf_event *old = NULL;
218 int max = x86_pmu.num_events; 215 int max = x86_pmu.num_counters;
219 int i, j, k = -1; 216 int i, j, k = -1;
220 217
221 /* 218 /*
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
293 /* 290 /*
294 * initialize all possible NB constraints 291 * initialize all possible NB constraints
295 */ 292 */
296 for (i = 0; i < x86_pmu.num_events; i++) { 293 for (i = 0; i < x86_pmu.num_counters; i++) {
297 __set_bit(i, nb->event_constraints[i].idxmsk); 294 __set_bit(i, nb->event_constraints[i].idxmsk);
298 nb->event_constraints[i].weight = 1; 295 nb->event_constraints[i].weight = 1;
299 } 296 }
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu)
371 raw_spin_unlock(&amd_nb_lock); 368 raw_spin_unlock(&amd_nb_lock);
372} 369}
373 370
374static __initconst struct x86_pmu amd_pmu = { 371static __initconst const struct x86_pmu amd_pmu = {
375 .name = "AMD", 372 .name = "AMD",
376 .handle_irq = x86_pmu_handle_irq, 373 .handle_irq = x86_pmu_handle_irq,
377 .disable_all = x86_pmu_disable_all, 374 .disable_all = x86_pmu_disable_all,
378 .enable_all = x86_pmu_enable_all, 375 .enable_all = x86_pmu_enable_all,
379 .enable = x86_pmu_enable_event, 376 .enable = x86_pmu_enable_event,
380 .disable = x86_pmu_disable_event, 377 .disable = x86_pmu_disable_event,
378 .hw_config = amd_pmu_hw_config,
379 .schedule_events = x86_schedule_events,
381 .eventsel = MSR_K7_EVNTSEL0, 380 .eventsel = MSR_K7_EVNTSEL0,
382 .perfctr = MSR_K7_PERFCTR0, 381 .perfctr = MSR_K7_PERFCTR0,
383 .event_map = amd_pmu_event_map, 382 .event_map = amd_pmu_event_map,
384 .raw_event = amd_pmu_raw_event,
385 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 383 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
386 .num_events = 4, 384 .num_counters = 4,
387 .event_bits = 48, 385 .cntval_bits = 48,
388 .event_mask = (1ULL << 48) - 1, 386 .cntval_mask = (1ULL << 48) - 1,
389 .apic = 1, 387 .apic = 1,
390 /* use highest bit to detect overflow */ 388 /* use highest bit to detect overflow */
391 .max_period = (1ULL << 47) - 1, 389 .max_period = (1ULL << 47) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9c794ac87837..fdbc652d3feb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event)
88 return intel_perfmon_event_map[hw_event]; 88 return intel_perfmon_event_map[hw_event];
89} 89}
90 90
91static __initconst u64 westmere_hw_cache_event_ids 91static __initconst const u64 westmere_hw_cache_event_ids
92 [PERF_COUNT_HW_CACHE_MAX] 92 [PERF_COUNT_HW_CACHE_MAX]
93 [PERF_COUNT_HW_CACHE_OP_MAX] 93 [PERF_COUNT_HW_CACHE_OP_MAX]
94 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 94 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids
179 }, 179 },
180}; 180};
181 181
182static __initconst u64 nehalem_hw_cache_event_ids 182static __initconst const u64 nehalem_hw_cache_event_ids
183 [PERF_COUNT_HW_CACHE_MAX] 183 [PERF_COUNT_HW_CACHE_MAX]
184 [PERF_COUNT_HW_CACHE_OP_MAX] 184 [PERF_COUNT_HW_CACHE_OP_MAX]
185 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 185 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids
270 }, 270 },
271}; 271};
272 272
273static __initconst u64 core2_hw_cache_event_ids 273static __initconst const u64 core2_hw_cache_event_ids
274 [PERF_COUNT_HW_CACHE_MAX] 274 [PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX] 275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 276 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids
361 }, 361 },
362}; 362};
363 363
364static __initconst u64 atom_hw_cache_event_ids 364static __initconst const u64 atom_hw_cache_event_ids
365 [PERF_COUNT_HW_CACHE_MAX] 365 [PERF_COUNT_HW_CACHE_MAX]
366 [PERF_COUNT_HW_CACHE_OP_MAX] 366 [PERF_COUNT_HW_CACHE_OP_MAX]
367 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 367 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids
452 }, 452 },
453}; 453};
454 454
455static u64 intel_pmu_raw_event(u64 hw_event)
456{
457#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
458#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
459#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
460#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
461#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
462
463#define CORE_EVNTSEL_MASK \
464 (INTEL_ARCH_EVTSEL_MASK | \
465 INTEL_ARCH_UNIT_MASK | \
466 INTEL_ARCH_EDGE_MASK | \
467 INTEL_ARCH_INV_MASK | \
468 INTEL_ARCH_CNT_MASK)
469
470 return hw_event & CORE_EVNTSEL_MASK;
471}
472
473static void intel_pmu_enable_bts(u64 config)
474{
475 unsigned long debugctlmsr;
476
477 debugctlmsr = get_debugctlmsr();
478
479 debugctlmsr |= X86_DEBUGCTL_TR;
480 debugctlmsr |= X86_DEBUGCTL_BTS;
481 debugctlmsr |= X86_DEBUGCTL_BTINT;
482
483 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
484 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
485
486 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
487 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
488
489 update_debugctlmsr(debugctlmsr);
490}
491
492static void intel_pmu_disable_bts(void)
493{
494 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
495 unsigned long debugctlmsr;
496
497 if (!cpuc->ds)
498 return;
499
500 debugctlmsr = get_debugctlmsr();
501
502 debugctlmsr &=
503 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
504 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
505
506 update_debugctlmsr(debugctlmsr);
507}
508
509static void intel_pmu_disable_all(void) 455static void intel_pmu_disable_all(void)
510{ 456{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void)
514 460
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 461 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
516 intel_pmu_disable_bts(); 462 intel_pmu_disable_bts();
463
464 intel_pmu_pebs_disable_all();
465 intel_pmu_lbr_disable_all();
517} 466}
518 467
519static void intel_pmu_enable_all(void) 468static void intel_pmu_enable_all(int added)
520{ 469{
521 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 470 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
522 471
472 intel_pmu_pebs_enable_all();
473 intel_pmu_lbr_enable_all();
523 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 474 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
524 475
525 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 476 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -533,6 +484,42 @@ static void intel_pmu_enable_all(void)
533 } 484 }
534} 485}
535 486
487/*
488 * Workaround for:
489 * Intel Errata AAK100 (model 26)
490 * Intel Errata AAP53 (model 30)
491 * Intel Errata BD53 (model 44)
492 *
493 * These chips need to be 'reset' when adding counters by programming
494 * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
495 * either in sequence on the same PMC or on different PMCs.
496 */
497static void intel_pmu_nhm_enable_all(int added)
498{
499 if (added) {
500 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
501 int i;
502
503 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
504 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
505 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
506
507 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
508 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
509
510 for (i = 0; i < 3; i++) {
511 struct perf_event *event = cpuc->events[i];
512
513 if (!event)
514 continue;
515
516 __x86_pmu_enable_event(&event->hw,
517 ARCH_PERFMON_EVENTSEL_ENABLE);
518 }
519 }
520 intel_pmu_enable_all(added);
521}
522
536static inline u64 intel_pmu_get_status(void) 523static inline u64 intel_pmu_get_status(void)
537{ 524{
538 u64 status; 525 u64 status;
@@ -547,8 +534,7 @@ static inline void intel_pmu_ack_status(u64 ack)
547 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 534 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
548} 535}
549 536
550static inline void 537static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
551intel_pmu_disable_fixed(struct hw_perf_event *hwc)
552{ 538{
553 int idx = hwc->idx - X86_PMC_IDX_FIXED; 539 int idx = hwc->idx - X86_PMC_IDX_FIXED;
554 u64 ctrl_val, mask; 540 u64 ctrl_val, mask;
@@ -557,71 +543,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc)
557 543
558 rdmsrl(hwc->config_base, ctrl_val); 544 rdmsrl(hwc->config_base, ctrl_val);
559 ctrl_val &= ~mask; 545 ctrl_val &= ~mask;
560 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 546 wrmsrl(hwc->config_base, ctrl_val);
561}
562
563static void intel_pmu_drain_bts_buffer(void)
564{
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct debug_store *ds = cpuc->ds;
567 struct bts_record {
568 u64 from;
569 u64 to;
570 u64 flags;
571 };
572 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
573 struct bts_record *at, *top;
574 struct perf_output_handle handle;
575 struct perf_event_header header;
576 struct perf_sample_data data;
577 struct pt_regs regs;
578
579 if (!event)
580 return;
581
582 if (!ds)
583 return;
584
585 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
586 top = (struct bts_record *)(unsigned long)ds->bts_index;
587
588 if (top <= at)
589 return;
590
591 ds->bts_index = ds->bts_buffer_base;
592
593 perf_sample_data_init(&data, 0);
594
595 data.period = event->hw.last_period;
596 regs.ip = 0;
597
598 /*
599 * Prepare a generic sample, i.e. fill in the invariant fields.
600 * We will overwrite the from and to address before we output
601 * the sample.
602 */
603 perf_prepare_sample(&header, &data, event, &regs);
604
605 if (perf_output_begin(&handle, event,
606 header.size * (top - at), 1, 1))
607 return;
608
609 for (; at < top; at++) {
610 data.ip = at->from;
611 data.addr = at->to;
612
613 perf_output_sample(&handle, &header, &data, event);
614 }
615
616 perf_output_end(&handle);
617
618 /* There's new data available. */
619 event->hw.interrupts++;
620 event->pending_kill = POLL_IN;
621} 547}
622 548
623static inline void 549static void intel_pmu_disable_event(struct perf_event *event)
624intel_pmu_disable_event(struct perf_event *event)
625{ 550{
626 struct hw_perf_event *hwc = &event->hw; 551 struct hw_perf_event *hwc = &event->hw;
627 552
@@ -637,14 +562,15 @@ intel_pmu_disable_event(struct perf_event *event)
637 } 562 }
638 563
639 x86_pmu_disable_event(event); 564 x86_pmu_disable_event(event);
565
566 if (unlikely(event->attr.precise_ip))
567 intel_pmu_pebs_disable(event);
640} 568}
641 569
642static inline void 570static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
643intel_pmu_enable_fixed(struct hw_perf_event *hwc)
644{ 571{
645 int idx = hwc->idx - X86_PMC_IDX_FIXED; 572 int idx = hwc->idx - X86_PMC_IDX_FIXED;
646 u64 ctrl_val, bits, mask; 573 u64 ctrl_val, bits, mask;
647 int err;
648 574
649 /* 575 /*
650 * Enable IRQ generation (0x8), 576 * Enable IRQ generation (0x8),
@@ -669,7 +595,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc)
669 rdmsrl(hwc->config_base, ctrl_val); 595 rdmsrl(hwc->config_base, ctrl_val);
670 ctrl_val &= ~mask; 596 ctrl_val &= ~mask;
671 ctrl_val |= bits; 597 ctrl_val |= bits;
672 err = checking_wrmsrl(hwc->config_base, ctrl_val); 598 wrmsrl(hwc->config_base, ctrl_val);
673} 599}
674 600
675static void intel_pmu_enable_event(struct perf_event *event) 601static void intel_pmu_enable_event(struct perf_event *event)
@@ -689,7 +615,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
689 return; 615 return;
690 } 616 }
691 617
692 __x86_pmu_enable_event(hwc); 618 if (unlikely(event->attr.precise_ip))
619 intel_pmu_pebs_enable(event);
620
621 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
693} 622}
694 623
695/* 624/*
@@ -708,20 +637,20 @@ static void intel_pmu_reset(void)
708 unsigned long flags; 637 unsigned long flags;
709 int idx; 638 int idx;
710 639
711 if (!x86_pmu.num_events) 640 if (!x86_pmu.num_counters)
712 return; 641 return;
713 642
714 local_irq_save(flags); 643 local_irq_save(flags);
715 644
716 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 645 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
717 646
718 for (idx = 0; idx < x86_pmu.num_events; idx++) { 647 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
719 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 648 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
720 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 649 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
721 } 650 }
722 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 651 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
723 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 652 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
724 } 653
725 if (ds) 654 if (ds)
726 ds->bts_index = ds->bts_buffer_base; 655 ds->bts_index = ds->bts_buffer_base;
727 656
@@ -747,7 +676,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
747 intel_pmu_drain_bts_buffer(); 676 intel_pmu_drain_bts_buffer();
748 status = intel_pmu_get_status(); 677 status = intel_pmu_get_status();
749 if (!status) { 678 if (!status) {
750 intel_pmu_enable_all(); 679 intel_pmu_enable_all(0);
751 return 0; 680 return 0;
752 } 681 }
753 682
@@ -762,6 +691,15 @@ again:
762 691
763 inc_irq_stat(apic_perf_irqs); 692 inc_irq_stat(apic_perf_irqs);
764 ack = status; 693 ack = status;
694
695 intel_pmu_lbr_read();
696
697 /*
698 * PEBS overflow sets bit 62 in the global status register
699 */
700 if (__test_and_clear_bit(62, (unsigned long *)&status))
701 x86_pmu.drain_pebs(regs);
702
765 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 703 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
766 struct perf_event *event = cpuc->events[bit]; 704 struct perf_event *event = cpuc->events[bit];
767 705
@@ -787,26 +725,22 @@ again:
787 goto again; 725 goto again;
788 726
789done: 727done:
790 intel_pmu_enable_all(); 728 intel_pmu_enable_all(0);
791 return 1; 729 return 1;
792} 730}
793 731
794static struct event_constraint bts_constraint =
795 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
796
797static struct event_constraint * 732static struct event_constraint *
798intel_special_constraints(struct perf_event *event) 733intel_bts_constraints(struct perf_event *event)
799{ 734{
800 unsigned int hw_event; 735 struct hw_perf_event *hwc = &event->hw;
801 736 unsigned int hw_event, bts_event;
802 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
803 737
804 if (unlikely((hw_event == 738 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
805 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && 739 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
806 (event->hw.sample_period == 1))) {
807 740
741 if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
808 return &bts_constraint; 742 return &bts_constraint;
809 } 743
810 return NULL; 744 return NULL;
811} 745}
812 746
@@ -815,24 +749,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
815{ 749{
816 struct event_constraint *c; 750 struct event_constraint *c;
817 751
818 c = intel_special_constraints(event); 752 c = intel_bts_constraints(event);
753 if (c)
754 return c;
755
756 c = intel_pebs_constraints(event);
819 if (c) 757 if (c)
820 return c; 758 return c;
821 759
822 return x86_get_event_constraints(cpuc, event); 760 return x86_get_event_constraints(cpuc, event);
823} 761}
824 762
825static __initconst struct x86_pmu core_pmu = { 763static int intel_pmu_hw_config(struct perf_event *event)
764{
765 int ret = x86_pmu_hw_config(event);
766
767 if (ret)
768 return ret;
769
770 if (event->attr.type != PERF_TYPE_RAW)
771 return 0;
772
773 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
774 return 0;
775
776 if (x86_pmu.version < 3)
777 return -EINVAL;
778
779 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
780 return -EACCES;
781
782 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
783
784 return 0;
785}
786
787static __initconst const struct x86_pmu core_pmu = {
826 .name = "core", 788 .name = "core",
827 .handle_irq = x86_pmu_handle_irq, 789 .handle_irq = x86_pmu_handle_irq,
828 .disable_all = x86_pmu_disable_all, 790 .disable_all = x86_pmu_disable_all,
829 .enable_all = x86_pmu_enable_all, 791 .enable_all = x86_pmu_enable_all,
830 .enable = x86_pmu_enable_event, 792 .enable = x86_pmu_enable_event,
831 .disable = x86_pmu_disable_event, 793 .disable = x86_pmu_disable_event,
794 .hw_config = x86_pmu_hw_config,
795 .schedule_events = x86_schedule_events,
832 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 796 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
833 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 797 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
834 .event_map = intel_pmu_event_map, 798 .event_map = intel_pmu_event_map,
835 .raw_event = intel_pmu_raw_event,
836 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 799 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
837 .apic = 1, 800 .apic = 1,
838 /* 801 /*
@@ -845,17 +808,32 @@ static __initconst struct x86_pmu core_pmu = {
845 .event_constraints = intel_core_event_constraints, 808 .event_constraints = intel_core_event_constraints,
846}; 809};
847 810
848static __initconst struct x86_pmu intel_pmu = { 811static void intel_pmu_cpu_starting(int cpu)
812{
813 init_debug_store_on_cpu(cpu);
814 /*
815 * Deal with CPUs that don't clear their LBRs on power-up.
816 */
817 intel_pmu_lbr_reset();
818}
819
820static void intel_pmu_cpu_dying(int cpu)
821{
822 fini_debug_store_on_cpu(cpu);
823}
824
825static __initconst const struct x86_pmu intel_pmu = {
849 .name = "Intel", 826 .name = "Intel",
850 .handle_irq = intel_pmu_handle_irq, 827 .handle_irq = intel_pmu_handle_irq,
851 .disable_all = intel_pmu_disable_all, 828 .disable_all = intel_pmu_disable_all,
852 .enable_all = intel_pmu_enable_all, 829 .enable_all = intel_pmu_enable_all,
853 .enable = intel_pmu_enable_event, 830 .enable = intel_pmu_enable_event,
854 .disable = intel_pmu_disable_event, 831 .disable = intel_pmu_disable_event,
832 .hw_config = intel_pmu_hw_config,
833 .schedule_events = x86_schedule_events,
855 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 834 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
856 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 835 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
857 .event_map = intel_pmu_event_map, 836 .event_map = intel_pmu_event_map,
858 .raw_event = intel_pmu_raw_event,
859 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 837 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
860 .apic = 1, 838 .apic = 1,
861 /* 839 /*
@@ -864,14 +842,38 @@ static __initconst struct x86_pmu intel_pmu = {
864 * the generic event period: 842 * the generic event period:
865 */ 843 */
866 .max_period = (1ULL << 31) - 1, 844 .max_period = (1ULL << 31) - 1,
867 .enable_bts = intel_pmu_enable_bts,
868 .disable_bts = intel_pmu_disable_bts,
869 .get_event_constraints = intel_get_event_constraints, 845 .get_event_constraints = intel_get_event_constraints,
870 846
871 .cpu_starting = init_debug_store_on_cpu, 847 .cpu_starting = intel_pmu_cpu_starting,
872 .cpu_dying = fini_debug_store_on_cpu, 848 .cpu_dying = intel_pmu_cpu_dying,
873}; 849};
874 850
851static void intel_clovertown_quirks(void)
852{
853 /*
854 * PEBS is unreliable due to:
855 *
856 * AJ67 - PEBS may experience CPL leaks
857 * AJ68 - PEBS PMI may be delayed by one event
858 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
859 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
860 *
861 * AJ67 could be worked around by restricting the OS/USR flags.
862 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
863 *
864 * AJ106 could possibly be worked around by not allowing LBR
865 * usage from PEBS, including the fixup.
866 * AJ68 could possibly be worked around by always programming
867 * a pebs_event_reset[0] value and coping with the lost events.
868 *
869 * But taken together it might just make sense to not enable PEBS on
870 * these chips.
871 */
872 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
873 x86_pmu.pebs = 0;
874 x86_pmu.pebs_constraints = NULL;
875}
876
875static __init int intel_pmu_init(void) 877static __init int intel_pmu_init(void)
876{ 878{
877 union cpuid10_edx edx; 879 union cpuid10_edx edx;
@@ -881,12 +883,13 @@ static __init int intel_pmu_init(void)
881 int version; 883 int version;
882 884
883 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 885 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
884 /* check for P6 processor family */ 886 switch (boot_cpu_data.x86) {
885 if (boot_cpu_data.x86 == 6) { 887 case 0x6:
886 return p6_pmu_init(); 888 return p6_pmu_init();
887 } else { 889 case 0xf:
890 return p4_pmu_init();
891 }
888 return -ENODEV; 892 return -ENODEV;
889 }
890 } 893 }
891 894
892 /* 895 /*
@@ -904,16 +907,28 @@ static __init int intel_pmu_init(void)
904 x86_pmu = intel_pmu; 907 x86_pmu = intel_pmu;
905 908
906 x86_pmu.version = version; 909 x86_pmu.version = version;
907 x86_pmu.num_events = eax.split.num_events; 910 x86_pmu.num_counters = eax.split.num_counters;
908 x86_pmu.event_bits = eax.split.bit_width; 911 x86_pmu.cntval_bits = eax.split.bit_width;
909 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; 912 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
910 913
911 /* 914 /*
912 * Quirk: v2 perfmon does not report fixed-purpose events, so 915 * Quirk: v2 perfmon does not report fixed-purpose events, so
913 * assume at least 3 events: 916 * assume at least 3 events:
914 */ 917 */
915 if (version > 1) 918 if (version > 1)
916 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); 919 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
920
921 /*
922 * v2 and above have a perf capabilities MSR
923 */
924 if (version > 1) {
925 u64 capabilities;
926
927 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
928 x86_pmu.intel_cap.capabilities = capabilities;
929 }
930
931 intel_ds_init();
917 932
918 /* 933 /*
919 * Install the hw-cache-events table: 934 * Install the hw-cache-events table:
@@ -924,12 +939,15 @@ static __init int intel_pmu_init(void)
924 break; 939 break;
925 940
926 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 941 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
942 x86_pmu.quirks = intel_clovertown_quirks;
927 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 943 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
928 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 944 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
929 case 29: /* six-core 45 nm xeon "Dunnington" */ 945 case 29: /* six-core 45 nm xeon "Dunnington" */
930 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 946 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
931 sizeof(hw_cache_event_ids)); 947 sizeof(hw_cache_event_ids));
932 948
949 intel_pmu_lbr_init_core();
950
933 x86_pmu.event_constraints = intel_core2_event_constraints; 951 x86_pmu.event_constraints = intel_core2_event_constraints;
934 pr_cont("Core2 events, "); 952 pr_cont("Core2 events, ");
935 break; 953 break;
@@ -940,13 +958,19 @@ static __init int intel_pmu_init(void)
940 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 958 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
941 sizeof(hw_cache_event_ids)); 959 sizeof(hw_cache_event_ids));
942 960
961 intel_pmu_lbr_init_nhm();
962
943 x86_pmu.event_constraints = intel_nehalem_event_constraints; 963 x86_pmu.event_constraints = intel_nehalem_event_constraints;
944 pr_cont("Nehalem/Corei7 events, "); 964 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
965 pr_cont("Nehalem events, ");
945 break; 966 break;
967
946 case 28: /* Atom */ 968 case 28: /* Atom */
947 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 969 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
948 sizeof(hw_cache_event_ids)); 970 sizeof(hw_cache_event_ids));
949 971
972 intel_pmu_lbr_init_atom();
973
950 x86_pmu.event_constraints = intel_gen_event_constraints; 974 x86_pmu.event_constraints = intel_gen_event_constraints;
951 pr_cont("Atom events, "); 975 pr_cont("Atom events, ");
952 break; 976 break;
@@ -956,7 +980,10 @@ static __init int intel_pmu_init(void)
956 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 980 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
957 sizeof(hw_cache_event_ids)); 981 sizeof(hw_cache_event_ids));
958 982
983 intel_pmu_lbr_init_nhm();
984
959 x86_pmu.event_constraints = intel_westmere_event_constraints; 985 x86_pmu.event_constraints = intel_westmere_event_constraints;
986 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
960 pr_cont("Westmere events, "); 987 pr_cont("Westmere events, ");
961 break; 988 break;
962 989
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
new file mode 100644
index 000000000000..18018d1311cd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -0,0 +1,641 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/* The maximal number of PEBS events: */
4#define MAX_PEBS_EVENTS 4
5
6/* The size of a BTS record in bytes: */
7#define BTS_RECORD_SIZE 24
8
9#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10#define PEBS_BUFFER_SIZE PAGE_SIZE
11
12/*
13 * pebs_record_32 for p4 and core not supported
14
15struct pebs_record_32 {
16 u32 flags, ip;
17 u32 ax, bc, cx, dx;
18 u32 si, di, bp, sp;
19};
20
21 */
22
23struct pebs_record_core {
24 u64 flags, ip;
25 u64 ax, bx, cx, dx;
26 u64 si, di, bp, sp;
27 u64 r8, r9, r10, r11;
28 u64 r12, r13, r14, r15;
29};
30
31struct pebs_record_nhm {
32 u64 flags, ip;
33 u64 ax, bx, cx, dx;
34 u64 si, di, bp, sp;
35 u64 r8, r9, r10, r11;
36 u64 r12, r13, r14, r15;
37 u64 status, dla, dse, lat;
38};
39
40/*
41 * A debug store configuration.
42 *
43 * We only support architectures that use 64bit fields.
44 */
45struct debug_store {
46 u64 bts_buffer_base;
47 u64 bts_index;
48 u64 bts_absolute_maximum;
49 u64 bts_interrupt_threshold;
50 u64 pebs_buffer_base;
51 u64 pebs_index;
52 u64 pebs_absolute_maximum;
53 u64 pebs_interrupt_threshold;
54 u64 pebs_event_reset[MAX_PEBS_EVENTS];
55};
56
57static void init_debug_store_on_cpu(int cpu)
58{
59 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
60
61 if (!ds)
62 return;
63
64 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
65 (u32)((u64)(unsigned long)ds),
66 (u32)((u64)(unsigned long)ds >> 32));
67}
68
69static void fini_debug_store_on_cpu(int cpu)
70{
71 if (!per_cpu(cpu_hw_events, cpu).ds)
72 return;
73
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75}
76
77static void release_ds_buffers(void)
78{
79 int cpu;
80
81 if (!x86_pmu.bts && !x86_pmu.pebs)
82 return;
83
84 get_online_cpus();
85
86 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu);
88
89 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
91
92 if (!ds)
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 }
101
102 put_online_cpus();
103}
104
105static int reserve_ds_buffers(void)
106{
107 int cpu, err = 0;
108
109 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0;
111
112 get_online_cpus();
113
114 for_each_possible_cpu(cpu) {
115 struct debug_store *ds;
116 void *buffer;
117 int max, thresh;
118
119 err = -ENOMEM;
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
121 if (unlikely(!ds))
122 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds;
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140
141 if (x86_pmu.pebs) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
143 if (unlikely(!buffer))
144 break;
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158
159 err = 0;
160 }
161
162 if (err)
163 release_ds_buffers();
164 else {
165 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu);
167 }
168
169 put_online_cpus();
170
171 return err;
172}
173
174/*
175 * BTS
176 */
177
178static struct event_constraint bts_constraint =
179 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
180
181static void intel_pmu_enable_bts(u64 config)
182{
183 unsigned long debugctlmsr;
184
185 debugctlmsr = get_debugctlmsr();
186
187 debugctlmsr |= DEBUGCTLMSR_TR;
188 debugctlmsr |= DEBUGCTLMSR_BTS;
189 debugctlmsr |= DEBUGCTLMSR_BTINT;
190
191 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
192 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
193
194 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
195 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
196
197 update_debugctlmsr(debugctlmsr);
198}
199
200static void intel_pmu_disable_bts(void)
201{
202 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
203 unsigned long debugctlmsr;
204
205 if (!cpuc->ds)
206 return;
207
208 debugctlmsr = get_debugctlmsr();
209
210 debugctlmsr &=
211 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
212 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
213
214 update_debugctlmsr(debugctlmsr);
215}
216
217static void intel_pmu_drain_bts_buffer(void)
218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds;
221 struct bts_record {
222 u64 from;
223 u64 to;
224 u64 flags;
225 };
226 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
227 struct bts_record *at, *top;
228 struct perf_output_handle handle;
229 struct perf_event_header header;
230 struct perf_sample_data data;
231 struct pt_regs regs;
232
233 if (!event)
234 return;
235
236 if (!ds)
237 return;
238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241
242 if (top <= at)
243 return;
244
245 ds->bts_index = ds->bts_buffer_base;
246
247 perf_sample_data_init(&data, 0);
248 data.period = event->hw.last_period;
249 regs.ip = 0;
250
251 /*
252 * Prepare a generic sample, i.e. fill in the invariant fields.
253 * We will overwrite the from and to address before we output
254 * the sample.
255 */
256 perf_prepare_sample(&header, &data, event, &regs);
257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return;
260
261 for (; at < top; at++) {
262 data.ip = at->from;
263 data.addr = at->to;
264
265 perf_output_sample(&handle, &header, &data, event);
266 }
267
268 perf_output_end(&handle);
269
270 /* There's new data available. */
271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN;
273}
274
275/*
276 * PEBS
277 */
278
279static struct event_constraint intel_core_pebs_events[] = {
280 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
281 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
282 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
283 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
284 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
285 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
286 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
287 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
288 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
289 EVENT_CONSTRAINT_END
290};
291
292static struct event_constraint intel_nehalem_pebs_events[] = {
293 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
294 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
295 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
296 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
297 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
298 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
299 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
300 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
301 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
302 EVENT_CONSTRAINT_END
303};
304
305static struct event_constraint *
306intel_pebs_constraints(struct perf_event *event)
307{
308 struct event_constraint *c;
309
310 if (!event->attr.precise_ip)
311 return NULL;
312
313 if (x86_pmu.pebs_constraints) {
314 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
315 if ((event->hw.config & c->cmask) == c->code)
316 return c;
317 }
318 }
319
320 return &emptyconstraint;
321}
322
323static void intel_pmu_pebs_enable(struct perf_event *event)
324{
325 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
326 struct hw_perf_event *hwc = &event->hw;
327
328 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
329
330 cpuc->pebs_enabled |= 1ULL << hwc->idx;
331 WARN_ON_ONCE(cpuc->enabled);
332
333 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
334 intel_pmu_lbr_enable(event);
335}
336
337static void intel_pmu_pebs_disable(struct perf_event *event)
338{
339 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
340 struct hw_perf_event *hwc = &event->hw;
341
342 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
343 if (cpuc->enabled)
344 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
345
346 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
347
348 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
349 intel_pmu_lbr_disable(event);
350}
351
352static void intel_pmu_pebs_enable_all(void)
353{
354 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
355
356 if (cpuc->pebs_enabled)
357 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
358}
359
360static void intel_pmu_pebs_disable_all(void)
361{
362 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
363
364 if (cpuc->pebs_enabled)
365 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
366}
367
368#include <asm/insn.h>
369
370static inline bool kernel_ip(unsigned long ip)
371{
372#ifdef CONFIG_X86_32
373 return ip > PAGE_OFFSET;
374#else
375 return (long)ip < 0;
376#endif
377}
378
379static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
380{
381 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
382 unsigned long from = cpuc->lbr_entries[0].from;
383 unsigned long old_to, to = cpuc->lbr_entries[0].to;
384 unsigned long ip = regs->ip;
385
386 /*
387 * We don't need to fixup if the PEBS assist is fault like
388 */
389 if (!x86_pmu.intel_cap.pebs_trap)
390 return 1;
391
392 /*
393 * No LBR entry, no basic block, no rewinding
394 */
395 if (!cpuc->lbr_stack.nr || !from || !to)
396 return 0;
397
398 /*
399 * Basic blocks should never cross user/kernel boundaries
400 */
401 if (kernel_ip(ip) != kernel_ip(to))
402 return 0;
403
404 /*
405 * unsigned math, either ip is before the start (impossible) or
406 * the basic block is larger than 1 page (sanity)
407 */
408 if ((ip - to) > PAGE_SIZE)
409 return 0;
410
411 /*
412 * We sampled a branch insn, rewind using the LBR stack
413 */
414 if (ip == to) {
415 regs->ip = from;
416 return 1;
417 }
418
419 do {
420 struct insn insn;
421 u8 buf[MAX_INSN_SIZE];
422 void *kaddr;
423
424 old_to = to;
425 if (!kernel_ip(ip)) {
426 int bytes, size = MAX_INSN_SIZE;
427
428 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
429 if (bytes != size)
430 return 0;
431
432 kaddr = buf;
433 } else
434 kaddr = (void *)to;
435
436 kernel_insn_init(&insn, kaddr);
437 insn_get_length(&insn);
438 to += insn.length;
439 } while (to < ip);
440
441 if (to == ip) {
442 regs->ip = old_to;
443 return 1;
444 }
445
446 /*
447 * Even though we decoded the basic block, the instruction stream
448 * never matched the given IP, either the TO or the IP got corrupted.
449 */
450 return 0;
451}
452
453static int intel_pmu_save_and_restart(struct perf_event *event);
454
455static void __intel_pmu_pebs_event(struct perf_event *event,
456 struct pt_regs *iregs, void *__pebs)
457{
458 /*
459 * We cast to pebs_record_core since that is a subset of
460 * both formats and we don't use the other fields in this
461 * routine.
462 */
463 struct pebs_record_core *pebs = __pebs;
464 struct perf_sample_data data;
465 struct pt_regs regs;
466
467 if (!intel_pmu_save_and_restart(event))
468 return;
469
470 perf_sample_data_init(&data, 0);
471 data.period = event->hw.last_period;
472
473 /*
474 * We use the interrupt regs as a base because the PEBS record
475 * does not contain a full regs set, specifically it seems to
476 * lack segment descriptors, which get used by things like
477 * user_mode().
478 *
479 * In the simple case fix up only the IP and BP,SP regs, for
480 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
481 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
482 */
483 regs = *iregs;
484 regs.ip = pebs->ip;
485 regs.bp = pebs->bp;
486 regs.sp = pebs->sp;
487
488 if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
489 regs.flags |= PERF_EFLAGS_EXACT;
490 else
491 regs.flags &= ~PERF_EFLAGS_EXACT;
492
493 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event);
495}
496
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
498{
499 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
500 struct debug_store *ds = cpuc->ds;
501 struct perf_event *event = cpuc->events[0]; /* PMC0 only */
502 struct pebs_record_core *at, *top;
503 int n;
504
505 if (!ds || !x86_pmu.pebs)
506 return;
507
508 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
509 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
510
511 /*
512 * Whatever else happens, drain the thing
513 */
514 ds->pebs_index = ds->pebs_buffer_base;
515
516 if (!test_bit(0, cpuc->active_mask))
517 return;
518
519 WARN_ON_ONCE(!event);
520
521 if (!event->attr.precise_ip)
522 return;
523
524 n = top - at;
525 if (n <= 0)
526 return;
527
528 /*
529 * Should not happen, we program the threshold at 1 and do not
530 * set a reset value.
531 */
532 WARN_ON_ONCE(n > 1);
533 at += n - 1;
534
535 __intel_pmu_pebs_event(event, iregs, at);
536}
537
538static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
539{
540 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
541 struct debug_store *ds = cpuc->ds;
542 struct pebs_record_nhm *at, *top;
543 struct perf_event *event = NULL;
544 u64 status = 0;
545 int bit, n;
546
547 if (!ds || !x86_pmu.pebs)
548 return;
549
550 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
551 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
552
553 ds->pebs_index = ds->pebs_buffer_base;
554
555 n = top - at;
556 if (n <= 0)
557 return;
558
559 /*
560 * Should not happen, we program the threshold at 1 and do not
561 * set a reset value.
562 */
563 WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
564
565 for ( ; at < top; at++) {
566 for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
567 event = cpuc->events[bit];
568 if (!test_bit(bit, cpuc->active_mask))
569 continue;
570
571 WARN_ON_ONCE(!event);
572
573 if (!event->attr.precise_ip)
574 continue;
575
576 if (__test_and_set_bit(bit, (unsigned long *)&status))
577 continue;
578
579 break;
580 }
581
582 if (!event || bit >= MAX_PEBS_EVENTS)
583 continue;
584
585 __intel_pmu_pebs_event(event, iregs, at);
586 }
587}
588
589/*
590 * BTS, PEBS probe and setup
591 */
592
593static void intel_ds_init(void)
594{
595 /*
596 * No support for 32bit formats
597 */
598 if (!boot_cpu_has(X86_FEATURE_DTES64))
599 return;
600
601 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
602 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
603 if (x86_pmu.pebs) {
604 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
605 int format = x86_pmu.intel_cap.pebs_format;
606
607 switch (format) {
608 case 0:
609 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
610 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
611 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
612 x86_pmu.pebs_constraints = intel_core_pebs_events;
613 break;
614
615 case 1:
616 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
617 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
618 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
619 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
620 break;
621
622 default:
623 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
624 x86_pmu.pebs = 0;
625 break;
626 }
627 }
628}
629
630#else /* CONFIG_CPU_SUP_INTEL */
631
632static int reserve_ds_buffers(void)
633{
634 return 0;
635}
636
637static void release_ds_buffers(void)
638{
639}
640
641#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..d202c1bece1a
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,218 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3enum {
4 LBR_FORMAT_32 = 0x00,
5 LBR_FORMAT_LIP = 0x01,
6 LBR_FORMAT_EIP = 0x02,
7 LBR_FORMAT_EIP_FLAGS = 0x03,
8};
9
10/*
11 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
12 * otherwise it becomes near impossible to get a reliable stack.
13 */
14
15static void __intel_pmu_lbr_enable(void)
16{
17 u64 debugctl;
18
19 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
20 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
21 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
22}
23
24static void __intel_pmu_lbr_disable(void)
25{
26 u64 debugctl;
27
28 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
29 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
30 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
31}
32
33static void intel_pmu_lbr_reset_32(void)
34{
35 int i;
36
37 for (i = 0; i < x86_pmu.lbr_nr; i++)
38 wrmsrl(x86_pmu.lbr_from + i, 0);
39}
40
41static void intel_pmu_lbr_reset_64(void)
42{
43 int i;
44
45 for (i = 0; i < x86_pmu.lbr_nr; i++) {
46 wrmsrl(x86_pmu.lbr_from + i, 0);
47 wrmsrl(x86_pmu.lbr_to + i, 0);
48 }
49}
50
51static void intel_pmu_lbr_reset(void)
52{
53 if (!x86_pmu.lbr_nr)
54 return;
55
56 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
57 intel_pmu_lbr_reset_32();
58 else
59 intel_pmu_lbr_reset_64();
60}
61
62static void intel_pmu_lbr_enable(struct perf_event *event)
63{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65
66 if (!x86_pmu.lbr_nr)
67 return;
68
69 WARN_ON_ONCE(cpuc->enabled);
70
71 /*
72 * Reset the LBR stack if we changed task context to
73 * avoid data leaks.
74 */
75
76 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
77 intel_pmu_lbr_reset();
78 cpuc->lbr_context = event->ctx;
79 }
80
81 cpuc->lbr_users++;
82}
83
84static void intel_pmu_lbr_disable(struct perf_event *event)
85{
86 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
87
88 if (!x86_pmu.lbr_nr)
89 return;
90
91 cpuc->lbr_users--;
92 WARN_ON_ONCE(cpuc->lbr_users < 0);
93
94 if (cpuc->enabled && !cpuc->lbr_users)
95 __intel_pmu_lbr_disable();
96}
97
98static void intel_pmu_lbr_enable_all(void)
99{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101
102 if (cpuc->lbr_users)
103 __intel_pmu_lbr_enable();
104}
105
106static void intel_pmu_lbr_disable_all(void)
107{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109
110 if (cpuc->lbr_users)
111 __intel_pmu_lbr_disable();
112}
113
114static inline u64 intel_pmu_lbr_tos(void)
115{
116 u64 tos;
117
118 rdmsrl(x86_pmu.lbr_tos, tos);
119
120 return tos;
121}
122
123static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
124{
125 unsigned long mask = x86_pmu.lbr_nr - 1;
126 u64 tos = intel_pmu_lbr_tos();
127 int i;
128
129 for (i = 0; i < x86_pmu.lbr_nr; i++) {
130 unsigned long lbr_idx = (tos - i) & mask;
131 union {
132 struct {
133 u32 from;
134 u32 to;
135 };
136 u64 lbr;
137 } msr_lastbranch;
138
139 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
140
141 cpuc->lbr_entries[i].from = msr_lastbranch.from;
142 cpuc->lbr_entries[i].to = msr_lastbranch.to;
143 cpuc->lbr_entries[i].flags = 0;
144 }
145 cpuc->lbr_stack.nr = i;
146}
147
148#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
149
150/*
151 * Due to lack of segmentation in Linux the effective address (offset)
152 * is the same as the linear address, allowing us to merge the LIP and EIP
153 * LBR formats.
154 */
155static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
156{
157 unsigned long mask = x86_pmu.lbr_nr - 1;
158 int lbr_format = x86_pmu.intel_cap.lbr_format;
159 u64 tos = intel_pmu_lbr_tos();
160 int i;
161
162 for (i = 0; i < x86_pmu.lbr_nr; i++) {
163 unsigned long lbr_idx = (tos - i) & mask;
164 u64 from, to, flags = 0;
165
166 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
167 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
168
169 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
170 flags = !!(from & LBR_FROM_FLAG_MISPRED);
171 from = (u64)((((s64)from) << 1) >> 1);
172 }
173
174 cpuc->lbr_entries[i].from = from;
175 cpuc->lbr_entries[i].to = to;
176 cpuc->lbr_entries[i].flags = flags;
177 }
178 cpuc->lbr_stack.nr = i;
179}
180
181static void intel_pmu_lbr_read(void)
182{
183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
184
185 if (!cpuc->lbr_users)
186 return;
187
188 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
189 intel_pmu_lbr_read_32(cpuc);
190 else
191 intel_pmu_lbr_read_64(cpuc);
192}
193
194static void intel_pmu_lbr_init_core(void)
195{
196 x86_pmu.lbr_nr = 4;
197 x86_pmu.lbr_tos = 0x01c9;
198 x86_pmu.lbr_from = 0x40;
199 x86_pmu.lbr_to = 0x60;
200}
201
202static void intel_pmu_lbr_init_nhm(void)
203{
204 x86_pmu.lbr_nr = 16;
205 x86_pmu.lbr_tos = 0x01c9;
206 x86_pmu.lbr_from = 0x680;
207 x86_pmu.lbr_to = 0x6c0;
208}
209
210static void intel_pmu_lbr_init_atom(void)
211{
212 x86_pmu.lbr_nr = 8;
213 x86_pmu.lbr_tos = 0x01c9;
214 x86_pmu.lbr_from = 0x40;
215 x86_pmu.lbr_to = 0x60;
216}
217
218#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
new file mode 100644
index 000000000000..424fc8de68e4
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -0,0 +1,857 @@
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 *
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
6 *
7 * For licencing details see kernel-base/COPYING
8 */
9
10#ifdef CONFIG_CPU_SUP_INTEL
11
12#include <asm/perf_event_p4.h>
13
14#define P4_CNTR_LIMIT 3
15/*
16 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 */
18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22};
23
24struct p4_cache_event_bind {
25 unsigned int metric_pebs;
26 unsigned int metric_vert;
27};
28
29#define P4_GEN_CACHE_EVENT_BIND(name) \
30 [P4_CACHE__##name] = { \
31 .metric_pebs = P4_PEBS__##name, \
32 .metric_vert = P4_VERT__##name, \
33 }
34
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
40};
41
42/*
43 * Note that we don't use CCCR1 here, there is an
44 * exception for P4_BSQ_ALLOCATION but we just have
45 * no workaround
46 *
47 * consider this binding as resources which particular
48 * event may borrow, it doesn't contain EventMask,
49 * Tags and friends -- they are left to a caller
50 */
51static struct p4_event_bind p4_event_bind_map[] = {
52 [P4_EVENT_TC_DELIVER_MODE] = {
53 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
54 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
55 .cntr = { {4, 5, -1}, {6, 7, -1} },
56 },
57 [P4_EVENT_BPU_FETCH_REQUEST] = {
58 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
59 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
60 .cntr = { {0, -1, -1}, {2, -1, -1} },
61 },
62 [P4_EVENT_ITLB_REFERENCE] = {
63 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
64 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
65 .cntr = { {0, -1, -1}, {2, -1, -1} },
66 },
67 [P4_EVENT_MEMORY_CANCEL] = {
68 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
69 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
70 .cntr = { {8, 9, -1}, {10, 11, -1} },
71 },
72 [P4_EVENT_MEMORY_COMPLETE] = {
73 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
74 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
75 .cntr = { {8, 9, -1}, {10, 11, -1} },
76 },
77 [P4_EVENT_LOAD_PORT_REPLAY] = {
78 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
79 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
80 .cntr = { {8, 9, -1}, {10, 11, -1} },
81 },
82 [P4_EVENT_STORE_PORT_REPLAY] = {
83 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
84 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
85 .cntr = { {8, 9, -1}, {10, 11, -1} },
86 },
87 [P4_EVENT_MOB_LOAD_REPLAY] = {
88 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
89 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
90 .cntr = { {0, -1, -1}, {2, -1, -1} },
91 },
92 [P4_EVENT_PAGE_WALK_TYPE] = {
93 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
94 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
95 .cntr = { {0, -1, -1}, {2, -1, -1} },
96 },
97 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
98 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
99 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
100 .cntr = { {0, -1, -1}, {2, -1, -1} },
101 },
102 [P4_EVENT_IOQ_ALLOCATION] = {
103 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
104 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
105 .cntr = { {0, -1, -1}, {2, -1, -1} },
106 },
107 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
108 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
109 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
110 .cntr = { {2, -1, -1}, {3, -1, -1} },
111 },
112 [P4_EVENT_FSB_DATA_ACTIVITY] = {
113 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
114 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
115 .cntr = { {0, -1, -1}, {2, -1, -1} },
116 },
117 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
118 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
119 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
120 .cntr = { {0, -1, -1}, {1, -1, -1} },
121 },
122 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
123 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
124 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
125 .cntr = { {2, -1, -1}, {3, -1, -1} },
126 },
127 [P4_EVENT_SSE_INPUT_ASSIST] = {
128 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
129 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
130 .cntr = { {8, 9, -1}, {10, 11, -1} },
131 },
132 [P4_EVENT_PACKED_SP_UOP] = {
133 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
134 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
135 .cntr = { {8, 9, -1}, {10, 11, -1} },
136 },
137 [P4_EVENT_PACKED_DP_UOP] = {
138 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
139 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
140 .cntr = { {8, 9, -1}, {10, 11, -1} },
141 },
142 [P4_EVENT_SCALAR_SP_UOP] = {
143 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
144 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
145 .cntr = { {8, 9, -1}, {10, 11, -1} },
146 },
147 [P4_EVENT_SCALAR_DP_UOP] = {
148 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
149 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
150 .cntr = { {8, 9, -1}, {10, 11, -1} },
151 },
152 [P4_EVENT_64BIT_MMX_UOP] = {
153 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
154 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
155 .cntr = { {8, 9, -1}, {10, 11, -1} },
156 },
157 [P4_EVENT_128BIT_MMX_UOP] = {
158 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
159 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
160 .cntr = { {8, 9, -1}, {10, 11, -1} },
161 },
162 [P4_EVENT_X87_FP_UOP] = {
163 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
164 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
165 .cntr = { {8, 9, -1}, {10, 11, -1} },
166 },
167 [P4_EVENT_TC_MISC] = {
168 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
169 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
170 .cntr = { {4, 5, -1}, {6, 7, -1} },
171 },
172 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
173 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
174 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
175 .cntr = { {0, -1, -1}, {2, -1, -1} },
176 },
177 [P4_EVENT_TC_MS_XFER] = {
178 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
179 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
180 .cntr = { {4, 5, -1}, {6, 7, -1} },
181 },
182 [P4_EVENT_UOP_QUEUE_WRITES] = {
183 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
184 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
185 .cntr = { {4, 5, -1}, {6, 7, -1} },
186 },
187 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
188 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
189 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
190 .cntr = { {4, 5, -1}, {6, 7, -1} },
191 },
192 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
193 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
194 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
195 .cntr = { {4, 5, -1}, {6, 7, -1} },
196 },
197 [P4_EVENT_RESOURCE_STALL] = {
198 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
199 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
200 .cntr = { {12, 13, 16}, {14, 15, 17} },
201 },
202 [P4_EVENT_WC_BUFFER] = {
203 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
204 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
205 .cntr = { {8, 9, -1}, {10, 11, -1} },
206 },
207 [P4_EVENT_B2B_CYCLES] = {
208 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
209 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
210 .cntr = { {0, -1, -1}, {2, -1, -1} },
211 },
212 [P4_EVENT_BNR] = {
213 .opcode = P4_OPCODE(P4_EVENT_BNR),
214 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
215 .cntr = { {0, -1, -1}, {2, -1, -1} },
216 },
217 [P4_EVENT_SNOOP] = {
218 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
219 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
220 .cntr = { {0, -1, -1}, {2, -1, -1} },
221 },
222 [P4_EVENT_RESPONSE] = {
223 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
224 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
225 .cntr = { {0, -1, -1}, {2, -1, -1} },
226 },
227 [P4_EVENT_FRONT_END_EVENT] = {
228 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
229 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
230 .cntr = { {12, 13, 16}, {14, 15, 17} },
231 },
232 [P4_EVENT_EXECUTION_EVENT] = {
233 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
234 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
235 .cntr = { {12, 13, 16}, {14, 15, 17} },
236 },
237 [P4_EVENT_REPLAY_EVENT] = {
238 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
239 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
240 .cntr = { {12, 13, 16}, {14, 15, 17} },
241 },
242 [P4_EVENT_INSTR_RETIRED] = {
243 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
244 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
245 .cntr = { {12, 13, 16}, {14, 15, 17} },
246 },
247 [P4_EVENT_UOPS_RETIRED] = {
248 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
249 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
250 .cntr = { {12, 13, 16}, {14, 15, 17} },
251 },
252 [P4_EVENT_UOP_TYPE] = {
253 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
254 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
255 .cntr = { {12, 13, 16}, {14, 15, 17} },
256 },
257 [P4_EVENT_BRANCH_RETIRED] = {
258 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
259 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
260 .cntr = { {12, 13, 16}, {14, 15, 17} },
261 },
262 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
263 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
264 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
265 .cntr = { {12, 13, 16}, {14, 15, 17} },
266 },
267 [P4_EVENT_X87_ASSIST] = {
268 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
269 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
270 .cntr = { {12, 13, 16}, {14, 15, 17} },
271 },
272 [P4_EVENT_MACHINE_CLEAR] = {
273 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
274 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
275 .cntr = { {12, 13, 16}, {14, 15, 17} },
276 },
277 [P4_EVENT_INSTR_COMPLETED] = {
278 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
279 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
280 .cntr = { {12, 13, 16}, {14, 15, 17} },
281 },
282};
283
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289
290static __initconst const u64 p4_hw_cache_event_ids
291 [PERF_COUNT_HW_CACHE_MAX]
292 [PERF_COUNT_HW_CACHE_OP_MAX]
293 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
294{
295 [ C(L1D ) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired),
300 },
301 },
302 [ C(LL ) ] = {
303 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired),
307 },
308},
309 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired),
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired),
319 },
320 },
321 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss),
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
340 /* non-halted CPU clocks */
341 [PERF_COUNT_HW_CPU_CYCLES] =
342 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
343 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
344
345 /*
346 * retired instructions
347 * in a sake of simplicity we don't use the FSB tagging
348 */
349 [PERF_COUNT_HW_INSTRUCTIONS] =
350 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
351 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
352 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
353
354 /* cache hits */
355 [PERF_COUNT_HW_CACHE_REFERENCES] =
356 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
359 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
360 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
361 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
362 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
363
364 /* cache misses */
365 [PERF_COUNT_HW_CACHE_MISSES] =
366 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
367 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
368 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
369 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
370
371 /* branch instructions retired */
372 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
373 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
374 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
375 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
376 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
377 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
378
379 /* mispredicted branches retired */
380 [PERF_COUNT_HW_BRANCH_MISSES] =
381 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
382 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
383
384 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
385 [PERF_COUNT_HW_BUS_CYCLES] =
386 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
387 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
388 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
389 p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
390};
391
392static struct p4_event_bind *p4_config_get_bind(u64 config)
393{
394 unsigned int evnt = p4_config_unpack_event(config);
395 struct p4_event_bind *bind = NULL;
396
397 if (evnt < ARRAY_SIZE(p4_event_bind_map))
398 bind = &p4_event_bind_map[evnt];
399
400 return bind;
401}
402
403static u64 p4_pmu_event_map(int hw_event)
404{
405 struct p4_event_bind *bind;
406 unsigned int esel;
407 u64 config;
408
409 config = p4_general_events[hw_event];
410 bind = p4_config_get_bind(config);
411 esel = P4_OPCODE_ESEL(bind->opcode);
412 config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
413
414 return config;
415}
416
417static int p4_hw_config(struct perf_event *event)
418{
419 int cpu = get_cpu();
420 int rc = 0;
421 unsigned int evnt;
422 u32 escr, cccr;
423
424 /*
425 * the reason we use cpu that early is that: if we get scheduled
426 * first time on the same cpu -- we will not need swap thread
427 * specific flags in config (and will save some cpu cycles)
428 */
429
430 cccr = p4_default_cccr_conf(cpu);
431 escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
432 event->attr.exclude_user);
433 event->hw.config = p4_config_pack_escr(escr) |
434 p4_config_pack_cccr(cccr);
435
436 if (p4_ht_active() && p4_ht_thread(cpu))
437 event->hw.config = p4_set_ht_bit(event->hw.config);
438
439 if (event->attr.type == PERF_TYPE_RAW) {
440
441 /* user data may have out-of-bound event index */
442 evnt = p4_config_unpack_event(event->attr.config);
443 if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
444 rc = -EINVAL;
445 goto out;
446 }
447
448 /*
449 * We don't control raw events so it's up to the caller
450 * to pass sane values (and we don't count the thread number
451 * on HT machine but allow HT-compatible specifics to be
452 * passed on)
453 *
454 * XXX: HT wide things should check perf_paranoid_cpu() &&
455 * CAP_SYS_ADMIN
456 */
457 event->hw.config |= event->attr.config &
458 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
459 p4_config_pack_cccr(P4_CCCR_MASK_HT));
460 }
461
462 rc = x86_setup_perfctr(event);
463out:
464 put_cpu();
465 return rc;
466}
467
468static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
469{
470 unsigned long dummy;
471
472 rdmsrl(hwc->config_base + hwc->idx, dummy);
473 if (dummy & P4_CCCR_OVF) {
474 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
475 ((u64)dummy) & ~P4_CCCR_OVF);
476 }
477}
478
479static inline void p4_pmu_disable_event(struct perf_event *event)
480{
481 struct hw_perf_event *hwc = &event->hw;
482
483 /*
484 * If event gets disabled while counter is in overflowed
485 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
486 * asserted again and again
487 */
488 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
489 (u64)(p4_config_unpack_cccr(hwc->config)) &
490 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
491}
492
493static void p4_pmu_disable_all(void)
494{
495 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
496 int idx;
497
498 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
499 struct perf_event *event = cpuc->events[idx];
500 if (!test_bit(idx, cpuc->active_mask))
501 continue;
502 p4_pmu_disable_event(event);
503 }
504}
505
506static void p4_pmu_enable_event(struct perf_event *event)
507{
508 struct hw_perf_event *hwc = &event->hw;
509 int thread = p4_ht_config_thread(hwc->config);
510 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
511 unsigned int idx = p4_config_unpack_event(hwc->config);
512 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
513 struct p4_event_bind *bind;
514 struct p4_cache_event_bind *bind_cache;
515 u64 escr_addr, cccr;
516
517 bind = &p4_event_bind_map[idx];
518 escr_addr = (u64)bind->escr_msr[thread];
519
520 /*
521 * - we dont support cascaded counters yet
522 * - and counter 1 is broken (erratum)
523 */
524 WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
525 WARN_ON_ONCE(hwc->idx == 1);
526
527 /* we need a real Event value */
528 escr_conf &= ~P4_ESCR_EVENT_MASK;
529 escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
530
531 cccr = p4_config_unpack_cccr(hwc->config);
532
533 /*
534 * it could be Cache event so that we need to
535 * set metrics into additional MSRs
536 */
537 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
538 if (idx_cache > P4_CACHE__NONE &&
539 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
540 bind_cache = &p4_cache_event_bind_map[idx_cache];
541 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
542 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
543 }
544
545 (void)checking_wrmsrl(escr_addr, escr_conf);
546 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
547 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
548}
549
550static void p4_pmu_enable_all(int added)
551{
552 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
553 int idx;
554
555 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
556 struct perf_event *event = cpuc->events[idx];
557 if (!test_bit(idx, cpuc->active_mask))
558 continue;
559 p4_pmu_enable_event(event);
560 }
561}
562
563static int p4_pmu_handle_irq(struct pt_regs *regs)
564{
565 struct perf_sample_data data;
566 struct cpu_hw_events *cpuc;
567 struct perf_event *event;
568 struct hw_perf_event *hwc;
569 int idx, handled = 0;
570 u64 val;
571
572 data.addr = 0;
573 data.raw = NULL;
574
575 cpuc = &__get_cpu_var(cpu_hw_events);
576
577 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
578
579 if (!test_bit(idx, cpuc->active_mask))
580 continue;
581
582 event = cpuc->events[idx];
583 hwc = &event->hw;
584
585 WARN_ON_ONCE(hwc->idx != idx);
586
587 /*
588 * FIXME: Redundant call, actually not needed
589 * but just to check if we're screwed
590 */
591 p4_pmu_clear_cccr_ovf(hwc);
592
593 val = x86_perf_event_update(event);
594 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
595 continue;
596
597 /*
598 * event overflow
599 */
600 handled = 1;
601 data.period = event->hw.last_period;
602
603 if (!x86_perf_event_set_period(event))
604 continue;
605 if (perf_event_overflow(event, 1, &data, regs))
606 p4_pmu_disable_event(event);
607 }
608
609 if (handled) {
610 /* p4 quirk: unmask it again */
611 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
612 inc_irq_stat(apic_perf_irqs);
613 }
614
615 return handled;
616}
617
618/*
619 * swap thread specific fields according to a thread
620 * we are going to run on
621 */
622static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
623{
624 u32 escr, cccr;
625
626 /*
627 * we either lucky and continue on same cpu or no HT support
628 */
629 if (!p4_should_swap_ts(hwc->config, cpu))
630 return;
631
632 /*
633 * the event is migrated from an another logical
634 * cpu, so we need to swap thread specific flags
635 */
636
637 escr = p4_config_unpack_escr(hwc->config);
638 cccr = p4_config_unpack_cccr(hwc->config);
639
640 if (p4_ht_thread(cpu)) {
641 cccr &= ~P4_CCCR_OVF_PMI_T0;
642 cccr |= P4_CCCR_OVF_PMI_T1;
643 if (escr & P4_ESCR_T0_OS) {
644 escr &= ~P4_ESCR_T0_OS;
645 escr |= P4_ESCR_T1_OS;
646 }
647 if (escr & P4_ESCR_T0_USR) {
648 escr &= ~P4_ESCR_T0_USR;
649 escr |= P4_ESCR_T1_USR;
650 }
651 hwc->config = p4_config_pack_escr(escr);
652 hwc->config |= p4_config_pack_cccr(cccr);
653 hwc->config |= P4_CONFIG_HT;
654 } else {
655 cccr &= ~P4_CCCR_OVF_PMI_T1;
656 cccr |= P4_CCCR_OVF_PMI_T0;
657 if (escr & P4_ESCR_T1_OS) {
658 escr &= ~P4_ESCR_T1_OS;
659 escr |= P4_ESCR_T0_OS;
660 }
661 if (escr & P4_ESCR_T1_USR) {
662 escr &= ~P4_ESCR_T1_USR;
663 escr |= P4_ESCR_T0_USR;
664 }
665 hwc->config = p4_config_pack_escr(escr);
666 hwc->config |= p4_config_pack_cccr(cccr);
667 hwc->config &= ~P4_CONFIG_HT;
668 }
669}
670
671/*
672 * ESCR address hashing is tricky, ESCRs are not sequential
673 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and
674 * the metric between any ESCRs is laid in range [0xa0,0xe1]
675 *
676 * so we make ~70% filled hashtable
677 */
678
679#define P4_ESCR_MSR_BASE 0x000003a0
680#define P4_ESCR_MSR_MAX 0x000003e1
681#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
682#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
683#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
684
685static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
686 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
687 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
688 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
689 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
690 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
691 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
692 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
693 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
694 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
695 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
696 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
697 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
698 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
699 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
700 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
701 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
702 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
703 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
704 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
705 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
706 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
707 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
708 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
709 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
710 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
711 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
712 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
713 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
714 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
715 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
716 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
717 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
718 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
719 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
720 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
721 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
722 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
723 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
724 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
725 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
726 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
727 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
728 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
729 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
730 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
731 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
732};
733
734static int p4_get_escr_idx(unsigned int addr)
735{
736 unsigned int idx = P4_ESCR_MSR_IDX(addr);
737
738 if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
739 !p4_escr_table[idx])) {
740 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
741 return -1;
742 }
743
744 return idx;
745}
746
747static int p4_next_cntr(int thread, unsigned long *used_mask,
748 struct p4_event_bind *bind)
749{
750 int i, j;
751
752 for (i = 0; i < P4_CNTR_LIMIT; i++) {
753 j = bind->cntr[thread][i];
754 if (j != -1 && !test_bit(j, used_mask))
755 return j;
756 }
757
758 return -1;
759}
760
761static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
762{
763 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
764 unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
765 int cpu = raw_smp_processor_id();
766 struct hw_perf_event *hwc;
767 struct p4_event_bind *bind;
768 unsigned int i, thread, num;
769 int cntr_idx, escr_idx;
770
771 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
772 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
773
774 for (i = 0, num = n; i < n; i++, num--) {
775
776 hwc = &cpuc->event_list[i]->hw;
777 thread = p4_ht_thread(cpu);
778 bind = p4_config_get_bind(hwc->config);
779 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
780 if (unlikely(escr_idx == -1))
781 goto done;
782
783 if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
784 cntr_idx = hwc->idx;
785 if (assign)
786 assign[i] = hwc->idx;
787 goto reserve;
788 }
789
790 cntr_idx = p4_next_cntr(thread, used_mask, bind);
791 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
792 goto done;
793
794 p4_pmu_swap_config_ts(hwc, cpu);
795 if (assign)
796 assign[i] = cntr_idx;
797reserve:
798 set_bit(cntr_idx, used_mask);
799 set_bit(escr_idx, escr_mask);
800 }
801
802done:
803 return num ? -ENOSPC : 0;
804}
805
806static __initconst const struct x86_pmu p4_pmu = {
807 .name = "Netburst P4/Xeon",
808 .handle_irq = p4_pmu_handle_irq,
809 .disable_all = p4_pmu_disable_all,
810 .enable_all = p4_pmu_enable_all,
811 .enable = p4_pmu_enable_event,
812 .disable = p4_pmu_disable_event,
813 .eventsel = MSR_P4_BPU_CCCR0,
814 .perfctr = MSR_P4_BPU_PERFCTR0,
815 .event_map = p4_pmu_event_map,
816 .max_events = ARRAY_SIZE(p4_general_events),
817 .get_event_constraints = x86_get_event_constraints,
818 /*
819 * IF HT disabled we may need to use all
820 * ARCH_P4_MAX_CCCR counters simulaneously
821 * though leave it restricted at moment assuming
822 * HT is on
823 */
824 .num_counters = ARCH_P4_MAX_CCCR,
825 .apic = 1,
826 .cntval_bits = 40,
827 .cntval_mask = (1ULL << 40) - 1,
828 .max_period = (1ULL << 39) - 1,
829 .hw_config = p4_hw_config,
830 .schedule_events = p4_pmu_schedule_events,
831};
832
833static __init int p4_pmu_init(void)
834{
835 unsigned int low, high;
836
837 /* If we get stripped -- indexig fails */
838 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
839
840 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
841 if (!(low & (1 << 7))) {
842 pr_cont("unsupported Netburst CPU model %d ",
843 boot_cpu_data.x86_model);
844 return -ENODEV;
845 }
846
847 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
848 sizeof(hw_cache_event_ids));
849
850 pr_cont("Netburst events, ");
851
852 x86_pmu = p4_pmu;
853
854 return 0;
855}
856
857#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index a330485d14da..34ba07be2cda 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event)
27 */ 27 */
28#define P6_NOP_EVENT 0x0000002EULL 28#define P6_NOP_EVENT 0x0000002EULL
29 29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] = 30static struct event_constraint p6_event_constraints[] =
49{ 31{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ 32 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void)
66 wrmsrl(MSR_P6_EVNTSEL0, val); 48 wrmsrl(MSR_P6_EVNTSEL0, val);
67} 49}
68 50
69static void p6_pmu_enable_all(void) 51static void p6_pmu_enable_all(int added)
70{ 52{
71 unsigned long val; 53 unsigned long val;
72 54
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
102 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 84 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
103} 85}
104 86
105static __initconst struct x86_pmu p6_pmu = { 87static __initconst const struct x86_pmu p6_pmu = {
106 .name = "p6", 88 .name = "p6",
107 .handle_irq = x86_pmu_handle_irq, 89 .handle_irq = x86_pmu_handle_irq,
108 .disable_all = p6_pmu_disable_all, 90 .disable_all = p6_pmu_disable_all,
109 .enable_all = p6_pmu_enable_all, 91 .enable_all = p6_pmu_enable_all,
110 .enable = p6_pmu_enable_event, 92 .enable = p6_pmu_enable_event,
111 .disable = p6_pmu_disable_event, 93 .disable = p6_pmu_disable_event,
94 .hw_config = x86_pmu_hw_config,
95 .schedule_events = x86_schedule_events,
112 .eventsel = MSR_P6_EVNTSEL0, 96 .eventsel = MSR_P6_EVNTSEL0,
113 .perfctr = MSR_P6_PERFCTR0, 97 .perfctr = MSR_P6_PERFCTR0,
114 .event_map = p6_pmu_event_map, 98 .event_map = p6_pmu_event_map,
115 .raw_event = p6_pmu_raw_event,
116 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 99 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
117 .apic = 1, 100 .apic = 1,
118 .max_period = (1ULL << 31) - 1, 101 .max_period = (1ULL << 31) - 1,
119 .version = 0, 102 .version = 0,
120 .num_events = 2, 103 .num_counters = 2,
121 /* 104 /*
122 * Events have 40 bits implemented. However they are designed such 105 * Events have 40 bits implemented. However they are designed such
123 * that bits [32-39] are sign extensions of bit 31. As such the 106 * that bits [32-39] are sign extensions of bit 31. As such the
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = {
125 * 108 *
126 * See IA-32 Intel Architecture Software developer manual Vol 3B 109 * See IA-32 Intel Architecture Software developer manual Vol 3B
127 */ 110 */
128 .event_bits = 32, 111 .cntval_bits = 32,
129 .event_mask = (1ULL << 32) - 1, 112 .cntval_mask = (1ULL << 32) - 1,
130 .get_event_constraints = x86_get_event_constraints, 113 .get_event_constraints = x86_get_event_constraints,
131 .event_constraints = p6_event_constraints, 114 .event_constraints = p6_event_constraints,
132}; 115};