aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile7
-rw-r--r--arch/x86/kernel/cpu/bugs.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c152
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c311
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c184
-rw-r--r--arch/x86/kernel/cpu/perf_event.c511
-rw-r--r--arch/x86/kernel/cpu/perf_event.h505
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c52
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c294
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c508
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c83
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c129
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c9
19 files changed, 1771 insertions, 1052 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6042981d0309..fe6eb197f848 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -28,10 +28,15 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
28 28
29obj-$(CONFIG_PERF_EVENTS) += perf_event.o 29obj-$(CONFIG_PERF_EVENTS) += perf_event.o
30 30
31ifdef CONFIG_PERF_EVENTS
32obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
33obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
34endif
35
31obj-$(CONFIG_X86_MCE) += mcheck/ 36obj-$(CONFIG_X86_MCE) += mcheck/
32obj-$(CONFIG_MTRR) += mtrr/ 37obj-$(CONFIG_MTRR) += mtrr/
33 38
34obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 39obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
35 40
36quiet_cmd_mkcapflags = MKCAP $@ 41quiet_cmd_mkcapflags = MKCAP $@
37 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ 42 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 525514cf33c3..46674fbb62ba 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -62,6 +62,8 @@ static void __init check_fpu(void)
62 return; 62 return;
63 } 63 }
64 64
65 kernel_fpu_begin();
66
65 /* 67 /*
66 * trap_init() enabled FXSR and company _before_ testing for FP 68 * trap_init() enabled FXSR and company _before_ testing for FP
67 * problems here. 69 * problems here.
@@ -80,6 +82,8 @@ static void __init check_fpu(void)
80 : "=m" (*&fdiv_bug) 82 : "=m" (*&fdiv_bug)
81 : "m" (*&x), "m" (*&y)); 83 : "m" (*&x), "m" (*&y));
82 84
85 kernel_fpu_end();
86
83 boot_cpu_data.fdiv_bug = fdiv_bug; 87 boot_cpu_data.fdiv_bug = fdiv_bug;
84 if (boot_cpu_data.fdiv_bug) 88 if (boot_cpu_data.fdiv_bug)
85 printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); 89 printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8ed394a8eb6e..ec63df54d138 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,7 +21,7 @@
21#include <linux/topology.h> 21#include <linux/topology.h>
22#include <linux/cpumask.h> 22#include <linux/cpumask.h>
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/atomic.h> 24#include <linux/atomic.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/setup.h> 26#include <asm/setup.h>
27#include <asm/apic.h> 27#include <asm/apic.h>
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 8095f8611f8a..755f64fb0743 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -32,11 +32,11 @@
32 */ 32 */
33static const __initconst struct hypervisor_x86 * const hypervisors[] = 33static const __initconst struct hypervisor_x86 * const hypervisors[] =
34{ 34{
35 &x86_hyper_vmware,
36 &x86_hyper_ms_hyperv,
37#ifdef CONFIG_XEN_PVHVM 35#ifdef CONFIG_XEN_PVHVM
38 &x86_hyper_xen_hvm, 36 &x86_hyper_xen_hvm,
39#endif 37#endif
38 &x86_hyper_vmware,
39 &x86_hyper_ms_hyperv,
40}; 40};
41 41
42const struct hypervisor_x86 *x86_hyper; 42const struct hypervisor_x86 *x86_hyper;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 0ed633c5048b..6199232161cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
78 78
79static cpumask_var_t mce_inject_cpumask; 79static cpumask_var_t mce_inject_cpumask;
80 80
81static int mce_raise_notify(struct notifier_block *self, 81static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
82 unsigned long val, void *data)
83{ 82{
84 struct die_args *args = (struct die_args *)data;
85 int cpu = smp_processor_id(); 83 int cpu = smp_processor_id();
86 struct mce *m = &__get_cpu_var(injectm); 84 struct mce *m = &__get_cpu_var(injectm);
87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 85 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
88 return NOTIFY_DONE; 86 return NMI_DONE;
89 cpumask_clear_cpu(cpu, mce_inject_cpumask); 87 cpumask_clear_cpu(cpu, mce_inject_cpumask);
90 if (m->inject_flags & MCJ_EXCEPTION) 88 if (m->inject_flags & MCJ_EXCEPTION)
91 raise_exception(m, args->regs); 89 raise_exception(m, regs);
92 else if (m->status) 90 else if (m->status)
93 raise_poll(m); 91 raise_poll(m);
94 return NOTIFY_STOP; 92 return NMI_HANDLED;
95} 93}
96 94
97static struct notifier_block mce_raise_nb = {
98 .notifier_call = mce_raise_notify,
99 .priority = NMI_LOCAL_NORMAL_PRIOR,
100};
101
102/* Inject mce on current CPU */ 95/* Inject mce on current CPU */
103static int raise_local(void) 96static int raise_local(void)
104{ 97{
@@ -216,7 +209,8 @@ static int inject_init(void)
216 return -ENOMEM; 209 return -ENOMEM;
217 printk(KERN_INFO "Machine check injector initialized\n"); 210 printk(KERN_INFO "Machine check injector initialized\n");
218 mce_chrdev_ops.write = mce_write; 211 mce_chrdev_ops.write = mce_write;
219 register_die_notifier(&mce_raise_nb); 212 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
213 "mce_notify");
220 return 0; 214 return 0;
221} 215}
222 216
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 1e8d66c1336a..7395d5f4272d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -43,61 +43,105 @@ static struct severity {
43 unsigned char covered; 43 unsigned char covered;
44 char *msg; 44 char *msg;
45} severities[] = { 45} severities[] = {
46#define KERNEL .context = IN_KERNEL 46#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
47#define USER .context = IN_USER 47#define KERNEL .context = IN_KERNEL
48#define SER .ser = SER_REQUIRED 48#define USER .context = IN_USER
49#define NOSER .ser = NO_SER 49#define SER .ser = SER_REQUIRED
50#define SEV(s) .sev = MCE_ ## s ## _SEVERITY 50#define NOSER .ser = NO_SER
51#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } 51#define BITCLR(x) .mask = x, .result = 0
52#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } 52#define BITSET(x) .mask = x, .result = x
53#define MCGMASK(x, res, s, m, r...) \ 53#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
54 { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } 54#define MASK(x, y) .mask = x, .result = y
55#define MASK(x, y, s, m, r...) \
56 { .mask = x, .result = y, SEV(s), .msg = m, ## r }
57#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) 55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
58#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) 56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
59#define MCACOD 0xffff 57#define MCACOD 0xffff
60 58
61 BITCLR(MCI_STATUS_VAL, NO, "Invalid"), 59 MCESEV(
62 BITCLR(MCI_STATUS_EN, NO, "Not enabled"), 60 NO, "Invalid",
63 BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), 61 BITCLR(MCI_STATUS_VAL)
62 ),
63 MCESEV(
64 NO, "Not enabled",
65 BITCLR(MCI_STATUS_EN)
66 ),
67 MCESEV(
68 PANIC, "Processor context corrupt",
69 BITSET(MCI_STATUS_PCC)
70 ),
64 /* When MCIP is not set something is very confused */ 71 /* When MCIP is not set something is very confused */
65 MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), 72 MCESEV(
73 PANIC, "MCIP not set in MCA handler",
74 MCGMASK(MCG_STATUS_MCIP, 0)
75 ),
66 /* Neither return not error IP -- no chance to recover -> PANIC */ 76 /* Neither return not error IP -- no chance to recover -> PANIC */
67 MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, 77 MCESEV(
68 "Neither restart nor error IP"), 78 PANIC, "Neither restart nor error IP",
69 MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", 79 MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
70 KERNEL), 80 ),
71 BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), 81 MCESEV(
72 MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, 82 PANIC, "In kernel and no restart IP",
73 "Spurious not enabled", SER), 83 KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
84 ),
85 MCESEV(
86 KEEP, "Corrected error",
87 NOSER, BITCLR(MCI_STATUS_UC)
88 ),
74 89
75 /* ignore OVER for UCNA */ 90 /* ignore OVER for UCNA */
76 MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, 91 MCESEV(
77 "Uncorrected no action required", SER), 92 KEEP, "Uncorrected no action required",
78 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, 93 SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
79 "Illegal combination (UCNA with AR=1)", SER), 94 ),
80 MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), 95 MCESEV(
96 PANIC, "Illegal combination (UCNA with AR=1)",
97 SER,
98 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
99 ),
100 MCESEV(
101 KEEP, "Non signalled machine check",
102 SER, BITCLR(MCI_STATUS_S)
103 ),
81 104
82 /* AR add known MCACODs here */ 105 /* AR add known MCACODs here */
83 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, 106 MCESEV(
84 "Action required with lost events", SER), 107 PANIC, "Action required with lost events",
85 MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, 108 SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
86 "Action required; unknown MCACOD", SER), 109 ),
110 MCESEV(
111 PANIC, "Action required: unknown MCACOD",
112 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
113 ),
87 114
88 /* known AO MCACODs: */ 115 /* known AO MCACODs: */
89 MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, 116 MCESEV(
90 "Action optional: memory scrubbing error", SER), 117 AO, "Action optional: memory scrubbing error",
91 MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, 118 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0)
92 "Action optional: last level cache writeback error", SER), 119 ),
93 120 MCESEV(
94 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, 121 AO, "Action optional: last level cache writeback error",
95 "Action optional unknown MCACOD", SER), 122 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a)
96 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, 123 ),
97 "Action optional with lost events", SER), 124 MCESEV(
98 BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), 125 SOME, "Action optional: unknown MCACOD",
99 BITSET(MCI_STATUS_UC, UC, "Uncorrected"), 126 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
100 BITSET(0, SOME, "No match") /* always matches. keep at end */ 127 ),
128 MCESEV(
129 SOME, "Action optional with lost events",
130 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
131 ),
132
133 MCESEV(
134 PANIC, "Overflowed uncorrected",
135 BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
136 ),
137 MCESEV(
138 UC, "Uncorrected",
139 BITSET(MCI_STATUS_UC)
140 ),
141 MCESEV(
142 SOME, "No match",
143 BITSET(0)
144 ) /* always matches. keep at end */
101}; 145};
102 146
103/* 147/*
@@ -112,15 +156,15 @@ static int error_context(struct mce *m)
112 return IN_KERNEL; 156 return IN_KERNEL;
113} 157}
114 158
115int mce_severity(struct mce *a, int tolerant, char **msg) 159int mce_severity(struct mce *m, int tolerant, char **msg)
116{ 160{
117 enum context ctx = error_context(a); 161 enum context ctx = error_context(m);
118 struct severity *s; 162 struct severity *s;
119 163
120 for (s = severities;; s++) { 164 for (s = severities;; s++) {
121 if ((a->status & s->mask) != s->result) 165 if ((m->status & s->mask) != s->result)
122 continue; 166 continue;
123 if ((a->mcgstatus & s->mcgmask) != s->mcgres) 167 if ((m->mcgstatus & s->mcgmask) != s->mcgres)
124 continue; 168 continue;
125 if (s->ser == SER_REQUIRED && !mce_ser) 169 if (s->ser == SER_REQUIRED && !mce_ser)
126 continue; 170 continue;
@@ -197,15 +241,15 @@ static const struct file_operations severities_coverage_fops = {
197 241
198static int __init severities_debugfs_init(void) 242static int __init severities_debugfs_init(void)
199{ 243{
200 struct dentry *dmce = NULL, *fseverities_coverage = NULL; 244 struct dentry *dmce, *fsev;
201 245
202 dmce = mce_get_debugfs_dir(); 246 dmce = mce_get_debugfs_dir();
203 if (dmce == NULL) 247 if (!dmce)
204 goto err_out; 248 goto err_out;
205 fseverities_coverage = debugfs_create_file("severities-coverage", 249
206 0444, dmce, NULL, 250 fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
207 &severities_coverage_fops); 251 &severities_coverage_fops);
208 if (fseverities_coverage == NULL) 252 if (!fsev)
209 goto err_out; 253 goto err_out;
210 254
211 return 0; 255 return 0;
@@ -214,4 +258,4 @@ err_out:
214 return -ENOMEM; 258 return -ENOMEM;
215} 259}
216late_initcall(severities_debugfs_init); 260late_initcall(severities_debugfs_init);
217#endif 261#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff1ae9b6464d..fce51ad1f362 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -10,7 +10,6 @@
10#include <linux/thread_info.h> 10#include <linux/thread_info.h>
11#include <linux/capability.h> 11#include <linux/capability.h>
12#include <linux/miscdevice.h> 12#include <linux/miscdevice.h>
13#include <linux/interrupt.h>
14#include <linux/ratelimit.h> 13#include <linux/ratelimit.h>
15#include <linux/kallsyms.h> 14#include <linux/kallsyms.h>
16#include <linux/rcupdate.h> 15#include <linux/rcupdate.h>
@@ -38,23 +37,20 @@
38#include <linux/mm.h> 37#include <linux/mm.h>
39#include <linux/debugfs.h> 38#include <linux/debugfs.h>
40#include <linux/edac_mce.h> 39#include <linux/edac_mce.h>
40#include <linux/irq_work.h>
41 41
42#include <asm/processor.h> 42#include <asm/processor.h>
43#include <asm/hw_irq.h>
44#include <asm/apic.h>
45#include <asm/idle.h>
46#include <asm/ipi.h>
47#include <asm/mce.h> 43#include <asm/mce.h>
48#include <asm/msr.h> 44#include <asm/msr.h>
49 45
50#include "mce-internal.h" 46#include "mce-internal.h"
51 47
52static DEFINE_MUTEX(mce_read_mutex); 48static DEFINE_MUTEX(mce_chrdev_read_mutex);
53 49
54#define rcu_dereference_check_mce(p) \ 50#define rcu_dereference_check_mce(p) \
55 rcu_dereference_index_check((p), \ 51 rcu_dereference_index_check((p), \
56 rcu_read_lock_sched_held() || \ 52 rcu_read_lock_sched_held() || \
57 lockdep_is_held(&mce_read_mutex)) 53 lockdep_is_held(&mce_chrdev_read_mutex))
58 54
59#define CREATE_TRACE_POINTS 55#define CREATE_TRACE_POINTS
60#include <trace/events/mce.h> 56#include <trace/events/mce.h>
@@ -94,7 +90,8 @@ static unsigned long mce_need_notify;
94static char mce_helper[128]; 90static char mce_helper[128];
95static char *mce_helper_argv[2] = { mce_helper, NULL }; 91static char *mce_helper_argv[2] = { mce_helper, NULL };
96 92
97static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 93static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
94
98static DEFINE_PER_CPU(struct mce, mces_seen); 95static DEFINE_PER_CPU(struct mce, mces_seen);
99static int cpu_missing; 96static int cpu_missing;
100 97
@@ -373,6 +370,31 @@ static void mce_wrmsrl(u32 msr, u64 v)
373} 370}
374 371
375/* 372/*
373 * Collect all global (w.r.t. this processor) status about this machine
374 * check into our "mce" struct so that we can use it later to assess
375 * the severity of the problem as we read per-bank specific details.
376 */
377static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
378{
379 mce_setup(m);
380
381 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
382 if (regs) {
383 /*
384 * Get the address of the instruction at the time of
385 * the machine check error.
386 */
387 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
388 m->ip = regs->ip;
389 m->cs = regs->cs;
390 }
391 /* Use accurate RIP reporting if available. */
392 if (rip_msr)
393 m->ip = mce_rdmsrl(rip_msr);
394 }
395}
396
397/*
376 * Simple lockless ring to communicate PFNs from the exception handler with the 398 * Simple lockless ring to communicate PFNs from the exception handler with the
377 * process context work function. This is vastly simplified because there's 399 * process context work function. This is vastly simplified because there's
378 * only a single reader and a single writer. 400 * only a single reader and a single writer.
@@ -443,40 +465,13 @@ static void mce_schedule_work(void)
443 } 465 }
444} 466}
445 467
446/* 468DEFINE_PER_CPU(struct irq_work, mce_irq_work);
447 * Get the address of the instruction at the time of the machine check
448 * error.
449 */
450static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
451{
452
453 if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
454 m->ip = regs->ip;
455 m->cs = regs->cs;
456 } else {
457 m->ip = 0;
458 m->cs = 0;
459 }
460 if (rip_msr)
461 m->ip = mce_rdmsrl(rip_msr);
462}
463 469
464#ifdef CONFIG_X86_LOCAL_APIC 470static void mce_irq_work_cb(struct irq_work *entry)
465/*
466 * Called after interrupts have been reenabled again
467 * when a MCE happened during an interrupts off region
468 * in the kernel.
469 */
470asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
471{ 471{
472 ack_APIC_irq();
473 exit_idle();
474 irq_enter();
475 mce_notify_irq(); 472 mce_notify_irq();
476 mce_schedule_work(); 473 mce_schedule_work();
477 irq_exit();
478} 474}
479#endif
480 475
481static void mce_report_event(struct pt_regs *regs) 476static void mce_report_event(struct pt_regs *regs)
482{ 477{
@@ -492,29 +487,7 @@ static void mce_report_event(struct pt_regs *regs)
492 return; 487 return;
493 } 488 }
494 489
495#ifdef CONFIG_X86_LOCAL_APIC 490 irq_work_queue(&__get_cpu_var(mce_irq_work));
496 /*
497 * Without APIC do not notify. The event will be picked
498 * up eventually.
499 */
500 if (!cpu_has_apic)
501 return;
502
503 /*
504 * When interrupts are disabled we cannot use
505 * kernel services safely. Trigger an self interrupt
506 * through the APIC to instead do the notification
507 * after interrupts are reenabled again.
508 */
509 apic->send_IPI_self(MCE_SELF_VECTOR);
510
511 /*
512 * Wait for idle afterwards again so that we don't leave the
513 * APIC in a non idle state because the normal APIC writes
514 * cannot exclude us.
515 */
516 apic_wait_icr_idle();
517#endif
518} 491}
519 492
520DEFINE_PER_CPU(unsigned, mce_poll_count); 493DEFINE_PER_CPU(unsigned, mce_poll_count);
@@ -541,9 +514,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
541 514
542 percpu_inc(mce_poll_count); 515 percpu_inc(mce_poll_count);
543 516
544 mce_setup(&m); 517 mce_gather_info(&m, NULL);
545 518
546 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
547 for (i = 0; i < banks; i++) { 519 for (i = 0; i < banks; i++) {
548 if (!mce_banks[i].ctl || !test_bit(i, *b)) 520 if (!mce_banks[i].ctl || !test_bit(i, *b))
549 continue; 521 continue;
@@ -879,9 +851,9 @@ static int mce_usable_address(struct mce *m)
879{ 851{
880 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) 852 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
881 return 0; 853 return 0;
882 if ((m->misc & 0x3f) > PAGE_SHIFT) 854 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
883 return 0; 855 return 0;
884 if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) 856 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
885 return 0; 857 return 0;
886 return 1; 858 return 1;
887} 859}
@@ -936,15 +908,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
936 908
937 percpu_inc(mce_exception_count); 909 percpu_inc(mce_exception_count);
938 910
939 if (notify_die(DIE_NMI, "machine check", regs, error_code,
940 18, SIGKILL) == NOTIFY_STOP)
941 goto out;
942 if (!banks) 911 if (!banks)
943 goto out; 912 goto out;
944 913
945 mce_setup(&m); 914 mce_gather_info(&m, regs);
946 915
947 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
948 final = &__get_cpu_var(mces_seen); 916 final = &__get_cpu_var(mces_seen);
949 *final = m; 917 *final = m;
950 918
@@ -1028,7 +996,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1028 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) 996 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
1029 mce_ring_add(m.addr >> PAGE_SHIFT); 997 mce_ring_add(m.addr >> PAGE_SHIFT);
1030 998
1031 mce_get_rip(&m, regs);
1032 mce_log(&m); 999 mce_log(&m);
1033 1000
1034 if (severity > worst) { 1001 if (severity > worst) {
@@ -1170,6 +1137,15 @@ static void mce_start_timer(unsigned long data)
1170 add_timer_on(t, smp_processor_id()); 1137 add_timer_on(t, smp_processor_id());
1171} 1138}
1172 1139
1140/* Must not be called in IRQ context where del_timer_sync() can deadlock */
1141static void mce_timer_delete_all(void)
1142{
1143 int cpu;
1144
1145 for_each_online_cpu(cpu)
1146 del_timer_sync(&per_cpu(mce_timer, cpu));
1147}
1148
1173static void mce_do_trigger(struct work_struct *work) 1149static void mce_do_trigger(struct work_struct *work)
1174{ 1150{
1175 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 1151 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
@@ -1190,7 +1166,8 @@ int mce_notify_irq(void)
1190 clear_thread_flag(TIF_MCE_NOTIFY); 1166 clear_thread_flag(TIF_MCE_NOTIFY);
1191 1167
1192 if (test_and_clear_bit(0, &mce_need_notify)) { 1168 if (test_and_clear_bit(0, &mce_need_notify)) {
1193 wake_up_interruptible(&mce_wait); 1169 /* wake processes polling /dev/mcelog */
1170 wake_up_interruptible(&mce_chrdev_wait);
1194 1171
1195 /* 1172 /*
1196 * There is no risk of missing notifications because 1173 * There is no risk of missing notifications because
@@ -1363,18 +1340,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1363 return 0; 1340 return 0;
1364} 1341}
1365 1342
1366static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) 1343static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
1367{ 1344{
1368 if (c->x86 != 5) 1345 if (c->x86 != 5)
1369 return; 1346 return 0;
1347
1370 switch (c->x86_vendor) { 1348 switch (c->x86_vendor) {
1371 case X86_VENDOR_INTEL: 1349 case X86_VENDOR_INTEL:
1372 intel_p5_mcheck_init(c); 1350 intel_p5_mcheck_init(c);
1351 return 1;
1373 break; 1352 break;
1374 case X86_VENDOR_CENTAUR: 1353 case X86_VENDOR_CENTAUR:
1375 winchip_mcheck_init(c); 1354 winchip_mcheck_init(c);
1355 return 1;
1376 break; 1356 break;
1377 } 1357 }
1358
1359 return 0;
1378} 1360}
1379 1361
1380static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) 1362static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
@@ -1428,7 +1410,8 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
1428 if (mce_disabled) 1410 if (mce_disabled)
1429 return; 1411 return;
1430 1412
1431 __mcheck_cpu_ancient_init(c); 1413 if (__mcheck_cpu_ancient_init(c))
1414 return;
1432 1415
1433 if (!mce_available(c)) 1416 if (!mce_available(c))
1434 return; 1417 return;
@@ -1444,44 +1427,45 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
1444 __mcheck_cpu_init_vendor(c); 1427 __mcheck_cpu_init_vendor(c);
1445 __mcheck_cpu_init_timer(); 1428 __mcheck_cpu_init_timer();
1446 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); 1429 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
1447 1430 init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
1448} 1431}
1449 1432
1450/* 1433/*
1451 * Character device to read and clear the MCE log. 1434 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
1452 */ 1435 */
1453 1436
1454static DEFINE_SPINLOCK(mce_state_lock); 1437static DEFINE_SPINLOCK(mce_chrdev_state_lock);
1455static int open_count; /* #times opened */ 1438static int mce_chrdev_open_count; /* #times opened */
1456static int open_exclu; /* already open exclusive? */ 1439static int mce_chrdev_open_exclu; /* already open exclusive? */
1457 1440
1458static int mce_open(struct inode *inode, struct file *file) 1441static int mce_chrdev_open(struct inode *inode, struct file *file)
1459{ 1442{
1460 spin_lock(&mce_state_lock); 1443 spin_lock(&mce_chrdev_state_lock);
1461 1444
1462 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { 1445 if (mce_chrdev_open_exclu ||
1463 spin_unlock(&mce_state_lock); 1446 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
1447 spin_unlock(&mce_chrdev_state_lock);
1464 1448
1465 return -EBUSY; 1449 return -EBUSY;
1466 } 1450 }
1467 1451
1468 if (file->f_flags & O_EXCL) 1452 if (file->f_flags & O_EXCL)
1469 open_exclu = 1; 1453 mce_chrdev_open_exclu = 1;
1470 open_count++; 1454 mce_chrdev_open_count++;
1471 1455
1472 spin_unlock(&mce_state_lock); 1456 spin_unlock(&mce_chrdev_state_lock);
1473 1457
1474 return nonseekable_open(inode, file); 1458 return nonseekable_open(inode, file);
1475} 1459}
1476 1460
1477static int mce_release(struct inode *inode, struct file *file) 1461static int mce_chrdev_release(struct inode *inode, struct file *file)
1478{ 1462{
1479 spin_lock(&mce_state_lock); 1463 spin_lock(&mce_chrdev_state_lock);
1480 1464
1481 open_count--; 1465 mce_chrdev_open_count--;
1482 open_exclu = 0; 1466 mce_chrdev_open_exclu = 0;
1483 1467
1484 spin_unlock(&mce_state_lock); 1468 spin_unlock(&mce_chrdev_state_lock);
1485 1469
1486 return 0; 1470 return 0;
1487} 1471}
@@ -1530,8 +1514,8 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
1530 return 0; 1514 return 0;
1531} 1515}
1532 1516
1533static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 1517static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
1534 loff_t *off) 1518 size_t usize, loff_t *off)
1535{ 1519{
1536 char __user *buf = ubuf; 1520 char __user *buf = ubuf;
1537 unsigned long *cpu_tsc; 1521 unsigned long *cpu_tsc;
@@ -1542,7 +1526,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1542 if (!cpu_tsc) 1526 if (!cpu_tsc)
1543 return -ENOMEM; 1527 return -ENOMEM;
1544 1528
1545 mutex_lock(&mce_read_mutex); 1529 mutex_lock(&mce_chrdev_read_mutex);
1546 1530
1547 if (!mce_apei_read_done) { 1531 if (!mce_apei_read_done) {
1548 err = __mce_read_apei(&buf, usize); 1532 err = __mce_read_apei(&buf, usize);
@@ -1562,19 +1546,18 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1562 do { 1546 do {
1563 for (i = prev; i < next; i++) { 1547 for (i = prev; i < next; i++) {
1564 unsigned long start = jiffies; 1548 unsigned long start = jiffies;
1549 struct mce *m = &mcelog.entry[i];
1565 1550
1566 while (!mcelog.entry[i].finished) { 1551 while (!m->finished) {
1567 if (time_after_eq(jiffies, start + 2)) { 1552 if (time_after_eq(jiffies, start + 2)) {
1568 memset(mcelog.entry + i, 0, 1553 memset(m, 0, sizeof(*m));
1569 sizeof(struct mce));
1570 goto timeout; 1554 goto timeout;
1571 } 1555 }
1572 cpu_relax(); 1556 cpu_relax();
1573 } 1557 }
1574 smp_rmb(); 1558 smp_rmb();
1575 err |= copy_to_user(buf, mcelog.entry + i, 1559 err |= copy_to_user(buf, m, sizeof(*m));
1576 sizeof(struct mce)); 1560 buf += sizeof(*m);
1577 buf += sizeof(struct mce);
1578timeout: 1561timeout:
1579 ; 1562 ;
1580 } 1563 }
@@ -1594,13 +1577,13 @@ timeout:
1594 on_each_cpu(collect_tscs, cpu_tsc, 1); 1577 on_each_cpu(collect_tscs, cpu_tsc, 1);
1595 1578
1596 for (i = next; i < MCE_LOG_LEN; i++) { 1579 for (i = next; i < MCE_LOG_LEN; i++) {
1597 if (mcelog.entry[i].finished && 1580 struct mce *m = &mcelog.entry[i];
1598 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 1581
1599 err |= copy_to_user(buf, mcelog.entry+i, 1582 if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
1600 sizeof(struct mce)); 1583 err |= copy_to_user(buf, m, sizeof(*m));
1601 smp_rmb(); 1584 smp_rmb();
1602 buf += sizeof(struct mce); 1585 buf += sizeof(*m);
1603 memset(&mcelog.entry[i], 0, sizeof(struct mce)); 1586 memset(m, 0, sizeof(*m));
1604 } 1587 }
1605 } 1588 }
1606 1589
@@ -1608,15 +1591,15 @@ timeout:
1608 err = -EFAULT; 1591 err = -EFAULT;
1609 1592
1610out: 1593out:
1611 mutex_unlock(&mce_read_mutex); 1594 mutex_unlock(&mce_chrdev_read_mutex);
1612 kfree(cpu_tsc); 1595 kfree(cpu_tsc);
1613 1596
1614 return err ? err : buf - ubuf; 1597 return err ? err : buf - ubuf;
1615} 1598}
1616 1599
1617static unsigned int mce_poll(struct file *file, poll_table *wait) 1600static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
1618{ 1601{
1619 poll_wait(file, &mce_wait, wait); 1602 poll_wait(file, &mce_chrdev_wait, wait);
1620 if (rcu_access_index(mcelog.next)) 1603 if (rcu_access_index(mcelog.next))
1621 return POLLIN | POLLRDNORM; 1604 return POLLIN | POLLRDNORM;
1622 if (!mce_apei_read_done && apei_check_mce()) 1605 if (!mce_apei_read_done && apei_check_mce())
@@ -1624,7 +1607,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
1624 return 0; 1607 return 0;
1625} 1608}
1626 1609
1627static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 1610static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
1611 unsigned long arg)
1628{ 1612{
1629 int __user *p = (int __user *)arg; 1613 int __user *p = (int __user *)arg;
1630 1614
@@ -1652,16 +1636,16 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
1652 1636
1653/* Modified in mce-inject.c, so not static or const */ 1637/* Modified in mce-inject.c, so not static or const */
1654struct file_operations mce_chrdev_ops = { 1638struct file_operations mce_chrdev_ops = {
1655 .open = mce_open, 1639 .open = mce_chrdev_open,
1656 .release = mce_release, 1640 .release = mce_chrdev_release,
1657 .read = mce_read, 1641 .read = mce_chrdev_read,
1658 .poll = mce_poll, 1642 .poll = mce_chrdev_poll,
1659 .unlocked_ioctl = mce_ioctl, 1643 .unlocked_ioctl = mce_chrdev_ioctl,
1660 .llseek = no_llseek, 1644 .llseek = no_llseek,
1661}; 1645};
1662EXPORT_SYMBOL_GPL(mce_chrdev_ops); 1646EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1663 1647
1664static struct miscdevice mce_log_device = { 1648static struct miscdevice mce_chrdev_device = {
1665 MISC_MCELOG_MINOR, 1649 MISC_MCELOG_MINOR,
1666 "mcelog", 1650 "mcelog",
1667 &mce_chrdev_ops, 1651 &mce_chrdev_ops,
@@ -1719,7 +1703,7 @@ int __init mcheck_init(void)
1719} 1703}
1720 1704
1721/* 1705/*
1722 * Sysfs support 1706 * mce_syscore: PM support
1723 */ 1707 */
1724 1708
1725/* 1709/*
@@ -1739,12 +1723,12 @@ static int mce_disable_error_reporting(void)
1739 return 0; 1723 return 0;
1740} 1724}
1741 1725
1742static int mce_suspend(void) 1726static int mce_syscore_suspend(void)
1743{ 1727{
1744 return mce_disable_error_reporting(); 1728 return mce_disable_error_reporting();
1745} 1729}
1746 1730
1747static void mce_shutdown(void) 1731static void mce_syscore_shutdown(void)
1748{ 1732{
1749 mce_disable_error_reporting(); 1733 mce_disable_error_reporting();
1750} 1734}
@@ -1754,21 +1738,24 @@ static void mce_shutdown(void)
1754 * Only one CPU is active at this time, the others get re-added later using 1738 * Only one CPU is active at this time, the others get re-added later using
1755 * CPU hotplug: 1739 * CPU hotplug:
1756 */ 1740 */
1757static void mce_resume(void) 1741static void mce_syscore_resume(void)
1758{ 1742{
1759 __mcheck_cpu_init_generic(); 1743 __mcheck_cpu_init_generic();
1760 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); 1744 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
1761} 1745}
1762 1746
1763static struct syscore_ops mce_syscore_ops = { 1747static struct syscore_ops mce_syscore_ops = {
1764 .suspend = mce_suspend, 1748 .suspend = mce_syscore_suspend,
1765 .shutdown = mce_shutdown, 1749 .shutdown = mce_syscore_shutdown,
1766 .resume = mce_resume, 1750 .resume = mce_syscore_resume,
1767}; 1751};
1768 1752
1753/*
1754 * mce_sysdev: Sysfs support
1755 */
1756
1769static void mce_cpu_restart(void *data) 1757static void mce_cpu_restart(void *data)
1770{ 1758{
1771 del_timer_sync(&__get_cpu_var(mce_timer));
1772 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1759 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1773 return; 1760 return;
1774 __mcheck_cpu_init_generic(); 1761 __mcheck_cpu_init_generic();
@@ -1778,16 +1765,15 @@ static void mce_cpu_restart(void *data)
1778/* Reinit MCEs after user configuration changes */ 1765/* Reinit MCEs after user configuration changes */
1779static void mce_restart(void) 1766static void mce_restart(void)
1780{ 1767{
1768 mce_timer_delete_all();
1781 on_each_cpu(mce_cpu_restart, NULL, 1); 1769 on_each_cpu(mce_cpu_restart, NULL, 1);
1782} 1770}
1783 1771
1784/* Toggle features for corrected errors */ 1772/* Toggle features for corrected errors */
1785static void mce_disable_ce(void *all) 1773static void mce_disable_cmci(void *data)
1786{ 1774{
1787 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1775 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1788 return; 1776 return;
1789 if (all)
1790 del_timer_sync(&__get_cpu_var(mce_timer));
1791 cmci_clear(); 1777 cmci_clear();
1792} 1778}
1793 1779
@@ -1801,11 +1787,11 @@ static void mce_enable_ce(void *all)
1801 __mcheck_cpu_init_timer(); 1787 __mcheck_cpu_init_timer();
1802} 1788}
1803 1789
1804static struct sysdev_class mce_sysclass = { 1790static struct sysdev_class mce_sysdev_class = {
1805 .name = "machinecheck", 1791 .name = "machinecheck",
1806}; 1792};
1807 1793
1808DEFINE_PER_CPU(struct sys_device, mce_dev); 1794DEFINE_PER_CPU(struct sys_device, mce_sysdev);
1809 1795
1810__cpuinitdata 1796__cpuinitdata
1811void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1797void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -1870,7 +1856,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
1870 if (mce_ignore_ce ^ !!new) { 1856 if (mce_ignore_ce ^ !!new) {
1871 if (new) { 1857 if (new) {
1872 /* disable ce features */ 1858 /* disable ce features */
1873 on_each_cpu(mce_disable_ce, (void *)1, 1); 1859 mce_timer_delete_all();
1860 on_each_cpu(mce_disable_cmci, NULL, 1);
1874 mce_ignore_ce = 1; 1861 mce_ignore_ce = 1;
1875 } else { 1862 } else {
1876 /* enable ce features */ 1863 /* enable ce features */
@@ -1893,7 +1880,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
1893 if (mce_cmci_disabled ^ !!new) { 1880 if (mce_cmci_disabled ^ !!new) {
1894 if (new) { 1881 if (new) {
1895 /* disable cmci */ 1882 /* disable cmci */
1896 on_each_cpu(mce_disable_ce, NULL, 1); 1883 on_each_cpu(mce_disable_cmci, NULL, 1);
1897 mce_cmci_disabled = 1; 1884 mce_cmci_disabled = 1;
1898 } else { 1885 } else {
1899 /* enable cmci */ 1886 /* enable cmci */
@@ -1934,7 +1921,7 @@ static struct sysdev_ext_attribute attr_cmci_disabled = {
1934 &mce_cmci_disabled 1921 &mce_cmci_disabled
1935}; 1922};
1936 1923
1937static struct sysdev_attribute *mce_attrs[] = { 1924static struct sysdev_attribute *mce_sysdev_attrs[] = {
1938 &attr_tolerant.attr, 1925 &attr_tolerant.attr,
1939 &attr_check_interval.attr, 1926 &attr_check_interval.attr,
1940 &attr_trigger, 1927 &attr_trigger,
@@ -1945,66 +1932,67 @@ static struct sysdev_attribute *mce_attrs[] = {
1945 NULL 1932 NULL
1946}; 1933};
1947 1934
1948static cpumask_var_t mce_dev_initialized; 1935static cpumask_var_t mce_sysdev_initialized;
1949 1936
1950/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ 1937/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
1951static __cpuinit int mce_create_device(unsigned int cpu) 1938static __cpuinit int mce_sysdev_create(unsigned int cpu)
1952{ 1939{
1940 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
1953 int err; 1941 int err;
1954 int i, j; 1942 int i, j;
1955 1943
1956 if (!mce_available(&boot_cpu_data)) 1944 if (!mce_available(&boot_cpu_data))
1957 return -EIO; 1945 return -EIO;
1958 1946
1959 memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); 1947 memset(&sysdev->kobj, 0, sizeof(struct kobject));
1960 per_cpu(mce_dev, cpu).id = cpu; 1948 sysdev->id = cpu;
1961 per_cpu(mce_dev, cpu).cls = &mce_sysclass; 1949 sysdev->cls = &mce_sysdev_class;
1962 1950
1963 err = sysdev_register(&per_cpu(mce_dev, cpu)); 1951 err = sysdev_register(sysdev);
1964 if (err) 1952 if (err)
1965 return err; 1953 return err;
1966 1954
1967 for (i = 0; mce_attrs[i]; i++) { 1955 for (i = 0; mce_sysdev_attrs[i]; i++) {
1968 err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1956 err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]);
1969 if (err) 1957 if (err)
1970 goto error; 1958 goto error;
1971 } 1959 }
1972 for (j = 0; j < banks; j++) { 1960 for (j = 0; j < banks; j++) {
1973 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1961 err = sysdev_create_file(sysdev, &mce_banks[j].attr);
1974 &mce_banks[j].attr);
1975 if (err) 1962 if (err)
1976 goto error2; 1963 goto error2;
1977 } 1964 }
1978 cpumask_set_cpu(cpu, mce_dev_initialized); 1965 cpumask_set_cpu(cpu, mce_sysdev_initialized);
1979 1966
1980 return 0; 1967 return 0;
1981error2: 1968error2:
1982 while (--j >= 0) 1969 while (--j >= 0)
1983 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); 1970 sysdev_remove_file(sysdev, &mce_banks[j].attr);
1984error: 1971error:
1985 while (--i >= 0) 1972 while (--i >= 0)
1986 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1973 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);
1987 1974
1988 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1975 sysdev_unregister(sysdev);
1989 1976
1990 return err; 1977 return err;
1991} 1978}
1992 1979
1993static __cpuinit void mce_remove_device(unsigned int cpu) 1980static __cpuinit void mce_sysdev_remove(unsigned int cpu)
1994{ 1981{
1982 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
1995 int i; 1983 int i;
1996 1984
1997 if (!cpumask_test_cpu(cpu, mce_dev_initialized)) 1985 if (!cpumask_test_cpu(cpu, mce_sysdev_initialized))
1998 return; 1986 return;
1999 1987
2000 for (i = 0; mce_attrs[i]; i++) 1988 for (i = 0; mce_sysdev_attrs[i]; i++)
2001 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1989 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);
2002 1990
2003 for (i = 0; i < banks; i++) 1991 for (i = 0; i < banks; i++)
2004 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); 1992 sysdev_remove_file(sysdev, &mce_banks[i].attr);
2005 1993
2006 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1994 sysdev_unregister(sysdev);
2007 cpumask_clear_cpu(cpu, mce_dev_initialized); 1995 cpumask_clear_cpu(cpu, mce_sysdev_initialized);
2008} 1996}
2009 1997
2010/* Make sure there are no machine checks on offlined CPUs. */ 1998/* Make sure there are no machine checks on offlined CPUs. */
@@ -2054,7 +2042,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2054 switch (action) { 2042 switch (action) {
2055 case CPU_ONLINE: 2043 case CPU_ONLINE:
2056 case CPU_ONLINE_FROZEN: 2044 case CPU_ONLINE_FROZEN:
2057 mce_create_device(cpu); 2045 mce_sysdev_create(cpu);
2058 if (threshold_cpu_callback) 2046 if (threshold_cpu_callback)
2059 threshold_cpu_callback(action, cpu); 2047 threshold_cpu_callback(action, cpu);
2060 break; 2048 break;
@@ -2062,7 +2050,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2062 case CPU_DEAD_FROZEN: 2050 case CPU_DEAD_FROZEN:
2063 if (threshold_cpu_callback) 2051 if (threshold_cpu_callback)
2064 threshold_cpu_callback(action, cpu); 2052 threshold_cpu_callback(action, cpu);
2065 mce_remove_device(cpu); 2053 mce_sysdev_remove(cpu);
2066 break; 2054 break;
2067 case CPU_DOWN_PREPARE: 2055 case CPU_DOWN_PREPARE:
2068 case CPU_DOWN_PREPARE_FROZEN: 2056 case CPU_DOWN_PREPARE_FROZEN:
@@ -2116,27 +2104,28 @@ static __init int mcheck_init_device(void)
2116 if (!mce_available(&boot_cpu_data)) 2104 if (!mce_available(&boot_cpu_data))
2117 return -EIO; 2105 return -EIO;
2118 2106
2119 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 2107 zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL);
2120 2108
2121 mce_init_banks(); 2109 mce_init_banks();
2122 2110
2123 err = sysdev_class_register(&mce_sysclass); 2111 err = sysdev_class_register(&mce_sysdev_class);
2124 if (err) 2112 if (err)
2125 return err; 2113 return err;
2126 2114
2127 for_each_online_cpu(i) { 2115 for_each_online_cpu(i) {
2128 err = mce_create_device(i); 2116 err = mce_sysdev_create(i);
2129 if (err) 2117 if (err)
2130 return err; 2118 return err;
2131 } 2119 }
2132 2120
2133 register_syscore_ops(&mce_syscore_ops); 2121 register_syscore_ops(&mce_syscore_ops);
2134 register_hotcpu_notifier(&mce_cpu_notifier); 2122 register_hotcpu_notifier(&mce_cpu_notifier);
2135 misc_register(&mce_log_device); 2123
2124 /* register character device /dev/mcelog */
2125 misc_register(&mce_chrdev_device);
2136 2126
2137 return err; 2127 return err;
2138} 2128}
2139
2140device_initcall(mcheck_init_device); 2129device_initcall(mcheck_init_device);
2141 2130
2142/* 2131/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index bb0adad35143..f5474218cffe 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -548,7 +548,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
548 if (!b) 548 if (!b)
549 goto out; 549 goto out;
550 550
551 err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, 551 err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj,
552 b->kobj, name); 552 b->kobj, name);
553 if (err) 553 if (err)
554 goto out; 554 goto out;
@@ -571,7 +571,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
571 goto out; 571 goto out;
572 } 572 }
573 573
574 b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); 574 b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj);
575 if (!b->kobj) 575 if (!b->kobj)
576 goto out_free; 576 goto out_free;
577 577
@@ -591,7 +591,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
591 if (i == cpu) 591 if (i == cpu)
592 continue; 592 continue;
593 593
594 err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, 594 err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj,
595 b->kobj, name); 595 b->kobj, name);
596 if (err) 596 if (err)
597 goto out; 597 goto out;
@@ -669,7 +669,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
669#ifdef CONFIG_SMP 669#ifdef CONFIG_SMP
670 /* sibling symlink */ 670 /* sibling symlink */
671 if (shared_bank[bank] && b->blocks->cpu != cpu) { 671 if (shared_bank[bank] && b->blocks->cpu != cpu) {
672 sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); 672 sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name);
673 per_cpu(threshold_banks, cpu)[bank] = NULL; 673 per_cpu(threshold_banks, cpu)[bank] = NULL;
674 674
675 return; 675 return;
@@ -681,7 +681,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
681 if (i == cpu) 681 if (i == cpu)
682 continue; 682 continue;
683 683
684 sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); 684 sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name);
685 per_cpu(threshold_banks, i)[bank] = NULL; 685 per_cpu(threshold_banks, i)[bank] = NULL;
686 } 686 }
687 687
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 8694ef56459d..38e49bc95ffc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
28 * cmci_discover_lock protects against parallel discovery attempts 28 * cmci_discover_lock protects against parallel discovery attempts
29 * which could race against each other. 29 * which could race against each other.
30 */ 30 */
31static DEFINE_SPINLOCK(cmci_discover_lock); 31static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
32 32
33#define CMCI_THRESHOLD 1 33#define CMCI_THRESHOLD 1
34 34
@@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot)
85 int hdr = 0; 85 int hdr = 0;
86 int i; 86 int i;
87 87
88 spin_lock_irqsave(&cmci_discover_lock, flags); 88 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
89 for (i = 0; i < banks; i++) { 89 for (i = 0; i < banks; i++) {
90 u64 val; 90 u64 val;
91 91
@@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot)
116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
117 } 117 }
118 } 118 }
119 spin_unlock_irqrestore(&cmci_discover_lock, flags); 119 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
120 if (hdr) 120 if (hdr)
121 printk(KERN_CONT "\n"); 121 printk(KERN_CONT "\n");
122} 122}
@@ -150,7 +150,7 @@ void cmci_clear(void)
150 150
151 if (!cmci_supported(&banks)) 151 if (!cmci_supported(&banks))
152 return; 152 return;
153 spin_lock_irqsave(&cmci_discover_lock, flags); 153 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
154 for (i = 0; i < banks; i++) { 154 for (i = 0; i < banks; i++) {
155 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 155 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
156 continue; 156 continue;
@@ -160,7 +160,7 @@ void cmci_clear(void)
160 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 160 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
161 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 161 __clear_bit(i, __get_cpu_var(mce_banks_owned));
162 } 162 }
163 spin_unlock_irqrestore(&cmci_discover_lock, flags); 163 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
164} 164}
165 165
166/* 166/*
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 929739a653d1..6b96110bb0c3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -79,7 +79,6 @@ void set_mtrr_ops(const struct mtrr_ops *ops)
79static int have_wrcomb(void) 79static int have_wrcomb(void)
80{ 80{
81 struct pci_dev *dev; 81 struct pci_dev *dev;
82 u8 rev;
83 82
84 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); 83 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
85 if (dev != NULL) { 84 if (dev != NULL) {
@@ -89,13 +88,11 @@ static int have_wrcomb(void)
89 * chipsets to be tagged 88 * chipsets to be tagged
90 */ 89 */
91 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && 90 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
92 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { 91 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
93 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); 92 dev->revision <= 5) {
94 if (rev <= 5) { 93 pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
95 pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); 94 pci_dev_put(dev);
96 pci_dev_put(dev); 95 return 0;
97 return 0;
98 }
99 } 96 }
100 /* 97 /*
101 * Intel 450NX errata # 23. Non ascending cacheline evictions to 98 * Intel 450NX errata # 23. Non ascending cacheline evictions to
@@ -137,56 +134,42 @@ static void __init init_table(void)
137} 134}
138 135
139struct set_mtrr_data { 136struct set_mtrr_data {
140 atomic_t count;
141 atomic_t gate;
142 unsigned long smp_base; 137 unsigned long smp_base;
143 unsigned long smp_size; 138 unsigned long smp_size;
144 unsigned int smp_reg; 139 unsigned int smp_reg;
145 mtrr_type smp_type; 140 mtrr_type smp_type;
146}; 141};
147 142
148static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
149
150/** 143/**
151 * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. 144 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
145 * by all the CPUs.
152 * @info: pointer to mtrr configuration data 146 * @info: pointer to mtrr configuration data
153 * 147 *
154 * Returns nothing. 148 * Returns nothing.
155 */ 149 */
156static int mtrr_work_handler(void *info) 150static int mtrr_rendezvous_handler(void *info)
157{ 151{
158#ifdef CONFIG_SMP
159 struct set_mtrr_data *data = info; 152 struct set_mtrr_data *data = info;
160 unsigned long flags;
161 153
162 atomic_dec(&data->count); 154 /*
163 while (!atomic_read(&data->gate)) 155 * We use this same function to initialize the mtrrs during boot,
164 cpu_relax(); 156 * resume, runtime cpu online and on an explicit request to set a
165 157 * specific MTRR.
166 local_irq_save(flags); 158 *
167 159 * During boot or suspend, the state of the boot cpu's mtrrs has been
168 atomic_dec(&data->count); 160 * saved, and we want to replicate that across all the cpus that come
169 while (atomic_read(&data->gate)) 161 * online (either at the end of boot or resume or during a runtime cpu
170 cpu_relax(); 162 * online). If we're doing that, @reg is set to something special and on
171 163 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
172 /* The master has cleared me to execute */ 164 * started the boot/resume sequence, this might be a duplicate
165 * set_all()).
166 */
173 if (data->smp_reg != ~0U) { 167 if (data->smp_reg != ~0U) {
174 mtrr_if->set(data->smp_reg, data->smp_base, 168 mtrr_if->set(data->smp_reg, data->smp_base,
175 data->smp_size, data->smp_type); 169 data->smp_size, data->smp_type);
176 } else if (mtrr_aps_delayed_init) { 170 } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
177 /*
178 * Initialize the MTRRs inaddition to the synchronisation.
179 */
180 mtrr_if->set_all(); 171 mtrr_if->set_all();
181 } 172 }
182
183 atomic_dec(&data->count);
184 while (!atomic_read(&data->gate))
185 cpu_relax();
186
187 atomic_dec(&data->count);
188 local_irq_restore(flags);
189#endif
190 return 0; 173 return 0;
191} 174}
192 175
@@ -223,20 +206,11 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
223 * 14. Wait for buddies to catch up 206 * 14. Wait for buddies to catch up
224 * 15. Enable interrupts. 207 * 15. Enable interrupts.
225 * 208 *
226 * What does that mean for us? Well, first we set data.count to the number 209 * What does that mean for us? Well, stop_machine() will ensure that
227 * of CPUs. As each CPU announces that it started the rendezvous handler by 210 * the rendezvous handler is started on each CPU. And in lockstep they
228 * decrementing the count, We reset data.count and set the data.gate flag 211 * do the state transition of disabling interrupts, updating MTRR's
229 * allowing all the cpu's to proceed with the work. As each cpu disables 212 * (the CPU vendors may each do it differently, so we call mtrr_if->set()
230 * interrupts, it'll decrement data.count once. We wait until it hits 0 and 213 * callback and let them take care of it.) and enabling interrupts.
231 * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
232 * are waiting for that flag to be cleared. Once it's cleared, each
233 * CPU goes through the transition of updating MTRRs.
234 * The CPU vendors may each do it differently,
235 * so we call mtrr_if->set() callback and let them take care of it.
236 * When they're done, they again decrement data->count and wait for data.gate
237 * to be set.
238 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
239 * Everyone then enables interrupts and we all continue on.
240 * 214 *
241 * Note that the mechanism is the same for UP systems, too; all the SMP stuff 215 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
242 * becomes nops. 216 * becomes nops.
@@ -244,92 +218,26 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
244static void 218static void
245set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) 219set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
246{ 220{
247 struct set_mtrr_data data; 221 struct set_mtrr_data data = { .smp_reg = reg,
248 unsigned long flags; 222 .smp_base = base,
249 int cpu; 223 .smp_size = size,
224 .smp_type = type
225 };
250 226
251 preempt_disable(); 227 stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
252 228}
253 data.smp_reg = reg;
254 data.smp_base = base;
255 data.smp_size = size;
256 data.smp_type = type;
257 atomic_set(&data.count, num_booting_cpus() - 1);
258
259 /* Make sure data.count is visible before unleashing other CPUs */
260 smp_wmb();
261 atomic_set(&data.gate, 0);
262
263 /* Start the ball rolling on other CPUs */
264 for_each_online_cpu(cpu) {
265 struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
266
267 if (cpu == smp_processor_id())
268 continue;
269
270 stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
271 }
272
273
274 while (atomic_read(&data.count))
275 cpu_relax();
276
277 /* Ok, reset count and toggle gate */
278 atomic_set(&data.count, num_booting_cpus() - 1);
279 smp_wmb();
280 atomic_set(&data.gate, 1);
281
282 local_irq_save(flags);
283
284 while (atomic_read(&data.count))
285 cpu_relax();
286
287 /* Ok, reset count and toggle gate */
288 atomic_set(&data.count, num_booting_cpus() - 1);
289 smp_wmb();
290 atomic_set(&data.gate, 0);
291
292 /* Do our MTRR business */
293
294 /*
295 * HACK!
296 *
297 * We use this same function to initialize the mtrrs during boot,
298 * resume, runtime cpu online and on an explicit request to set a
299 * specific MTRR.
300 *
301 * During boot or suspend, the state of the boot cpu's mtrrs has been
302 * saved, and we want to replicate that across all the cpus that come
303 * online (either at the end of boot or resume or during a runtime cpu
304 * online). If we're doing that, @reg is set to something special and on
305 * this cpu we still do mtrr_if->set_all(). During boot/resume, this
306 * is unnecessary if at this point we are still on the cpu that started
307 * the boot/resume sequence. But there is no guarantee that we are still
308 * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
309 * sure that we are in sync with everyone else.
310 */
311 if (reg != ~0U)
312 mtrr_if->set(reg, base, size, type);
313 else
314 mtrr_if->set_all();
315
316 /* Wait for the others */
317 while (atomic_read(&data.count))
318 cpu_relax();
319
320 atomic_set(&data.count, num_booting_cpus() - 1);
321 smp_wmb();
322 atomic_set(&data.gate, 1);
323
324 /*
325 * Wait here for everyone to have seen the gate change
326 * So we're the last ones to touch 'data'
327 */
328 while (atomic_read(&data.count))
329 cpu_relax();
330 229
331 local_irq_restore(flags); 230static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
332 preempt_enable(); 231 unsigned long size, mtrr_type type)
232{
233 struct set_mtrr_data data = { .smp_reg = reg,
234 .smp_base = base,
235 .smp_size = size,
236 .smp_type = type
237 };
238
239 stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
240 cpu_callout_mask);
333} 241}
334 242
335/** 243/**
@@ -783,7 +691,7 @@ void mtrr_ap_init(void)
783 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug 691 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
784 * lock to prevent mtrr entry changes 692 * lock to prevent mtrr entry changes
785 */ 693 */
786 set_mtrr(~0U, 0, 0, 0); 694 set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
787} 695}
788 696
789/** 697/**
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3a0338b4b179..640891014b2a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -22,7 +22,6 @@
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/highmem.h>
26#include <linux/cpu.h> 25#include <linux/cpu.h>
27#include <linux/bitops.h> 26#include <linux/bitops.h>
28 27
@@ -33,6 +32,8 @@
33#include <asm/smp.h> 32#include <asm/smp.h>
34#include <asm/alternative.h> 33#include <asm/alternative.h>
35 34
35#include "perf_event.h"
36
36#if 0 37#if 0
37#undef wrmsrl 38#undef wrmsrl
38#define wrmsrl(msr, val) \ 39#define wrmsrl(msr, val) \
@@ -44,273 +45,17 @@ do { \
44} while (0) 45} while (0)
45#endif 46#endif
46 47
47/* 48struct x86_pmu x86_pmu __read_mostly;
48 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
49 */
50static unsigned long
51copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
52{
53 unsigned long offset, addr = (unsigned long)from;
54 unsigned long size, len = 0;
55 struct page *page;
56 void *map;
57 int ret;
58
59 do {
60 ret = __get_user_pages_fast(addr, 1, 0, &page);
61 if (!ret)
62 break;
63
64 offset = addr & (PAGE_SIZE - 1);
65 size = min(PAGE_SIZE - offset, n - len);
66
67 map = kmap_atomic(page);
68 memcpy(to, map+offset, size);
69 kunmap_atomic(map);
70 put_page(page);
71
72 len += size;
73 to += size;
74 addr += size;
75
76 } while (len < n);
77
78 return len;
79}
80
81struct event_constraint {
82 union {
83 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
84 u64 idxmsk64;
85 };
86 u64 code;
87 u64 cmask;
88 int weight;
89};
90
91struct amd_nb {
92 int nb_id; /* NorthBridge id */
93 int refcnt; /* reference count */
94 struct perf_event *owners[X86_PMC_IDX_MAX];
95 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
96};
97
98struct intel_percore;
99
100#define MAX_LBR_ENTRIES 16
101
102struct cpu_hw_events {
103 /*
104 * Generic x86 PMC bits
105 */
106 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
107 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
108 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
109 int enabled;
110
111 int n_events;
112 int n_added;
113 int n_txn;
114 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
115 u64 tags[X86_PMC_IDX_MAX];
116 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
117
118 unsigned int group_flag;
119
120 /*
121 * Intel DebugStore bits
122 */
123 struct debug_store *ds;
124 u64 pebs_enabled;
125
126 /*
127 * Intel LBR bits
128 */
129 int lbr_users;
130 void *lbr_context;
131 struct perf_branch_stack lbr_stack;
132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
133
134 /*
135 * Intel percore register state.
136 * Coordinate shared resources between HT threads.
137 */
138 int percore_used; /* Used by this CPU? */
139 struct intel_percore *per_core;
140
141 /*
142 * AMD specific bits
143 */
144 struct amd_nb *amd_nb;
145};
146
147#define __EVENT_CONSTRAINT(c, n, m, w) {\
148 { .idxmsk64 = (n) }, \
149 .code = (c), \
150 .cmask = (m), \
151 .weight = (w), \
152}
153
154#define EVENT_CONSTRAINT(c, n, m) \
155 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
156
157/*
158 * Constraint on the Event code.
159 */
160#define INTEL_EVENT_CONSTRAINT(c, n) \
161 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
162
163/*
164 * Constraint on the Event code + UMask + fixed-mask
165 *
166 * filter mask to validate fixed counter events.
167 * the following filters disqualify for fixed counters:
168 * - inv
169 * - edge
170 * - cnt-mask
171 * The other filters are supported by fixed counters.
172 * The any-thread option is supported starting with v3.
173 */
174#define FIXED_EVENT_CONSTRAINT(c, n) \
175 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
176
177/*
178 * Constraint on the Event code + UMask
179 */
180#define INTEL_UEVENT_CONSTRAINT(c, n) \
181 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
182
183#define EVENT_CONSTRAINT_END \
184 EVENT_CONSTRAINT(0, 0, 0)
185
186#define for_each_event_constraint(e, c) \
187 for ((e) = (c); (e)->weight; (e)++)
188
189/*
190 * Extra registers for specific events.
191 * Some events need large masks and require external MSRs.
192 * Define a mapping to these extra registers.
193 */
194struct extra_reg {
195 unsigned int event;
196 unsigned int msr;
197 u64 config_mask;
198 u64 valid_mask;
199};
200
201#define EVENT_EXTRA_REG(e, ms, m, vm) { \
202 .event = (e), \
203 .msr = (ms), \
204 .config_mask = (m), \
205 .valid_mask = (vm), \
206 }
207#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
208 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
209#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
210
211union perf_capabilities {
212 struct {
213 u64 lbr_format : 6;
214 u64 pebs_trap : 1;
215 u64 pebs_arch_reg : 1;
216 u64 pebs_format : 4;
217 u64 smm_freeze : 1;
218 };
219 u64 capabilities;
220};
221
222/*
223 * struct x86_pmu - generic x86 pmu
224 */
225struct x86_pmu {
226 /*
227 * Generic x86 PMC bits
228 */
229 const char *name;
230 int version;
231 int (*handle_irq)(struct pt_regs *);
232 void (*disable_all)(void);
233 void (*enable_all)(int added);
234 void (*enable)(struct perf_event *);
235 void (*disable)(struct perf_event *);
236 int (*hw_config)(struct perf_event *event);
237 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
238 unsigned eventsel;
239 unsigned perfctr;
240 u64 (*event_map)(int);
241 int max_events;
242 int num_counters;
243 int num_counters_fixed;
244 int cntval_bits;
245 u64 cntval_mask;
246 int apic;
247 u64 max_period;
248 struct event_constraint *
249 (*get_event_constraints)(struct cpu_hw_events *cpuc,
250 struct perf_event *event);
251
252 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
253 struct perf_event *event);
254 struct event_constraint *event_constraints;
255 struct event_constraint *percore_constraints;
256 void (*quirks)(void);
257 int perfctr_second_write;
258
259 int (*cpu_prepare)(int cpu);
260 void (*cpu_starting)(int cpu);
261 void (*cpu_dying)(int cpu);
262 void (*cpu_dead)(int cpu);
263
264 /*
265 * Intel Arch Perfmon v2+
266 */
267 u64 intel_ctrl;
268 union perf_capabilities intel_cap;
269
270 /*
271 * Intel DebugStore bits
272 */
273 int bts, pebs;
274 int bts_active, pebs_active;
275 int pebs_record_size;
276 void (*drain_pebs)(struct pt_regs *regs);
277 struct event_constraint *pebs_constraints;
278
279 /*
280 * Intel LBR
281 */
282 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
283 int lbr_nr; /* hardware stack size */
284
285 /*
286 * Extra registers for events
287 */
288 struct extra_reg *extra_regs;
289};
290
291static struct x86_pmu x86_pmu __read_mostly;
292 49
293static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 50DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
294 .enabled = 1, 51 .enabled = 1,
295}; 52};
296 53
297static int x86_perf_event_set_period(struct perf_event *event); 54u64 __read_mostly hw_cache_event_ids
298
299/*
300 * Generalized hw caching related hw_event table, filled
301 * in on a per model basis. A value of 0 means
302 * 'not supported', -1 means 'hw_event makes no sense on
303 * this CPU', any other value means the raw hw_event
304 * ID.
305 */
306
307#define C(x) PERF_COUNT_HW_CACHE_##x
308
309static u64 __read_mostly hw_cache_event_ids
310 [PERF_COUNT_HW_CACHE_MAX] 55 [PERF_COUNT_HW_CACHE_MAX]
311 [PERF_COUNT_HW_CACHE_OP_MAX] 56 [PERF_COUNT_HW_CACHE_OP_MAX]
312 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 57 [PERF_COUNT_HW_CACHE_RESULT_MAX];
313static u64 __read_mostly hw_cache_extra_regs 58u64 __read_mostly hw_cache_extra_regs
314 [PERF_COUNT_HW_CACHE_MAX] 59 [PERF_COUNT_HW_CACHE_MAX]
315 [PERF_COUNT_HW_CACHE_OP_MAX] 60 [PERF_COUNT_HW_CACHE_OP_MAX]
316 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 61 [PERF_COUNT_HW_CACHE_RESULT_MAX];
@@ -320,8 +65,7 @@ static u64 __read_mostly hw_cache_extra_regs
320 * Can only be executed on the CPU where the event is active. 65 * Can only be executed on the CPU where the event is active.
321 * Returns the delta events processed. 66 * Returns the delta events processed.
322 */ 67 */
323static u64 68u64 x86_perf_event_update(struct perf_event *event)
324x86_perf_event_update(struct perf_event *event)
325{ 69{
326 struct hw_perf_event *hwc = &event->hw; 70 struct hw_perf_event *hwc = &event->hw;
327 int shift = 64 - x86_pmu.cntval_bits; 71 int shift = 64 - x86_pmu.cntval_bits;
@@ -364,39 +108,15 @@ again:
364 return new_raw_count; 108 return new_raw_count;
365} 109}
366 110
367static inline int x86_pmu_addr_offset(int index)
368{
369 int offset;
370
371 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
372 alternative_io(ASM_NOP2,
373 "shll $1, %%eax",
374 X86_FEATURE_PERFCTR_CORE,
375 "=a" (offset),
376 "a" (index));
377
378 return offset;
379}
380
381static inline unsigned int x86_pmu_config_addr(int index)
382{
383 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
384}
385
386static inline unsigned int x86_pmu_event_addr(int index)
387{
388 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
389}
390
391/* 111/*
392 * Find and validate any extra registers to set up. 112 * Find and validate any extra registers to set up.
393 */ 113 */
394static int x86_pmu_extra_regs(u64 config, struct perf_event *event) 114static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
395{ 115{
116 struct hw_perf_event_extra *reg;
396 struct extra_reg *er; 117 struct extra_reg *er;
397 118
398 event->hw.extra_reg = 0; 119 reg = &event->hw.extra_reg;
399 event->hw.extra_config = 0;
400 120
401 if (!x86_pmu.extra_regs) 121 if (!x86_pmu.extra_regs)
402 return 0; 122 return 0;
@@ -406,8 +126,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
406 continue; 126 continue;
407 if (event->attr.config1 & ~er->valid_mask) 127 if (event->attr.config1 & ~er->valid_mask)
408 return -EINVAL; 128 return -EINVAL;
409 event->hw.extra_reg = er->msr; 129
410 event->hw.extra_config = event->attr.config1; 130 reg->idx = er->idx;
131 reg->config = event->attr.config1;
132 reg->reg = er->msr;
411 break; 133 break;
412 } 134 }
413 return 0; 135 return 0;
@@ -521,9 +243,6 @@ msr_fail:
521 return false; 243 return false;
522} 244}
523 245
524static void reserve_ds_buffers(void);
525static void release_ds_buffers(void);
526
527static void hw_perf_event_destroy(struct perf_event *event) 246static void hw_perf_event_destroy(struct perf_event *event)
528{ 247{
529 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 248 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
@@ -572,7 +291,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
572 return x86_pmu_extra_regs(val, event); 291 return x86_pmu_extra_regs(val, event);
573} 292}
574 293
575static int x86_setup_perfctr(struct perf_event *event) 294int x86_setup_perfctr(struct perf_event *event)
576{ 295{
577 struct perf_event_attr *attr = &event->attr; 296 struct perf_event_attr *attr = &event->attr;
578 struct hw_perf_event *hwc = &event->hw; 297 struct hw_perf_event *hwc = &event->hw;
@@ -636,7 +355,7 @@ static int x86_setup_perfctr(struct perf_event *event)
636 return 0; 355 return 0;
637} 356}
638 357
639static int x86_pmu_hw_config(struct perf_event *event) 358int x86_pmu_hw_config(struct perf_event *event)
640{ 359{
641 if (event->attr.precise_ip) { 360 if (event->attr.precise_ip) {
642 int precise = 0; 361 int precise = 0;
@@ -706,10 +425,13 @@ static int __x86_pmu_event_init(struct perf_event *event)
706 event->hw.last_cpu = -1; 425 event->hw.last_cpu = -1;
707 event->hw.last_tag = ~0ULL; 426 event->hw.last_tag = ~0ULL;
708 427
428 /* mark unused */
429 event->hw.extra_reg.idx = EXTRA_REG_NONE;
430
709 return x86_pmu.hw_config(event); 431 return x86_pmu.hw_config(event);
710} 432}
711 433
712static void x86_pmu_disable_all(void) 434void x86_pmu_disable_all(void)
713{ 435{
714 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 436 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
715 int idx; 437 int idx;
@@ -744,15 +466,7 @@ static void x86_pmu_disable(struct pmu *pmu)
744 x86_pmu.disable_all(); 466 x86_pmu.disable_all();
745} 467}
746 468
747static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 469void x86_pmu_enable_all(int added)
748 u64 enable_mask)
749{
750 if (hwc->extra_reg)
751 wrmsrl(hwc->extra_reg, hwc->extra_config);
752 wrmsrl(hwc->config_base, hwc->config | enable_mask);
753}
754
755static void x86_pmu_enable_all(int added)
756{ 470{
757 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
758 int idx; 472 int idx;
@@ -774,7 +488,7 @@ static inline int is_x86_event(struct perf_event *event)
774 return event->pmu == &pmu; 488 return event->pmu == &pmu;
775} 489}
776 490
777static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 491int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
778{ 492{
779 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 493 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
780 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 494 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -945,7 +659,6 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
945} 659}
946 660
947static void x86_pmu_start(struct perf_event *event, int flags); 661static void x86_pmu_start(struct perf_event *event, int flags);
948static void x86_pmu_stop(struct perf_event *event, int flags);
949 662
950static void x86_pmu_enable(struct pmu *pmu) 663static void x86_pmu_enable(struct pmu *pmu)
951{ 664{
@@ -1017,21 +730,13 @@ static void x86_pmu_enable(struct pmu *pmu)
1017 x86_pmu.enable_all(added); 730 x86_pmu.enable_all(added);
1018} 731}
1019 732
1020static inline void x86_pmu_disable_event(struct perf_event *event)
1021{
1022 struct hw_perf_event *hwc = &event->hw;
1023
1024 wrmsrl(hwc->config_base, hwc->config);
1025}
1026
1027static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 733static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1028 734
1029/* 735/*
1030 * Set the next IRQ period, based on the hwc->period_left value. 736 * Set the next IRQ period, based on the hwc->period_left value.
1031 * To be called with the event disabled in hw: 737 * To be called with the event disabled in hw:
1032 */ 738 */
1033static int 739int x86_perf_event_set_period(struct perf_event *event)
1034x86_perf_event_set_period(struct perf_event *event)
1035{ 740{
1036 struct hw_perf_event *hwc = &event->hw; 741 struct hw_perf_event *hwc = &event->hw;
1037 s64 left = local64_read(&hwc->period_left); 742 s64 left = local64_read(&hwc->period_left);
@@ -1091,7 +796,7 @@ x86_perf_event_set_period(struct perf_event *event)
1091 return ret; 796 return ret;
1092} 797}
1093 798
1094static void x86_pmu_enable_event(struct perf_event *event) 799void x86_pmu_enable_event(struct perf_event *event)
1095{ 800{
1096 if (__this_cpu_read(cpu_hw_events.enabled)) 801 if (__this_cpu_read(cpu_hw_events.enabled))
1097 __x86_pmu_enable_event(&event->hw, 802 __x86_pmu_enable_event(&event->hw,
@@ -1230,7 +935,7 @@ void perf_event_print_debug(void)
1230 local_irq_restore(flags); 935 local_irq_restore(flags);
1231} 936}
1232 937
1233static void x86_pmu_stop(struct perf_event *event, int flags) 938void x86_pmu_stop(struct perf_event *event, int flags)
1234{ 939{
1235 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 940 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1236 struct hw_perf_event *hwc = &event->hw; 941 struct hw_perf_event *hwc = &event->hw;
@@ -1283,7 +988,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1283 perf_event_update_userpage(event); 988 perf_event_update_userpage(event);
1284} 989}
1285 990
1286static int x86_pmu_handle_irq(struct pt_regs *regs) 991int x86_pmu_handle_irq(struct pt_regs *regs)
1287{ 992{
1288 struct perf_sample_data data; 993 struct perf_sample_data data;
1289 struct cpu_hw_events *cpuc; 994 struct cpu_hw_events *cpuc;
@@ -1332,7 +1037,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1332 if (!x86_perf_event_set_period(event)) 1037 if (!x86_perf_event_set_period(event))
1333 continue; 1038 continue;
1334 1039
1335 if (perf_event_overflow(event, 1, &data, regs)) 1040 if (perf_event_overflow(event, &data, regs))
1336 x86_pmu_stop(event, 0); 1041 x86_pmu_stop(event, 0);
1337 } 1042 }
1338 1043
@@ -1353,109 +1058,28 @@ void perf_events_lapic_init(void)
1353 apic_write(APIC_LVTPC, APIC_DM_NMI); 1058 apic_write(APIC_LVTPC, APIC_DM_NMI);
1354} 1059}
1355 1060
1356struct pmu_nmi_state {
1357 unsigned int marked;
1358 int handled;
1359};
1360
1361static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
1362
1363static int __kprobes 1061static int __kprobes
1364perf_event_nmi_handler(struct notifier_block *self, 1062perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1365 unsigned long cmd, void *__args)
1366{ 1063{
1367 struct die_args *args = __args;
1368 unsigned int this_nmi;
1369 int handled;
1370
1371 if (!atomic_read(&active_events)) 1064 if (!atomic_read(&active_events))
1372 return NOTIFY_DONE; 1065 return NMI_DONE;
1373
1374 switch (cmd) {
1375 case DIE_NMI:
1376 break;
1377 case DIE_NMIUNKNOWN:
1378 this_nmi = percpu_read(irq_stat.__nmi_count);
1379 if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1380 /* let the kernel handle the unknown nmi */
1381 return NOTIFY_DONE;
1382 /*
1383 * This one is a PMU back-to-back nmi. Two events
1384 * trigger 'simultaneously' raising two back-to-back
1385 * NMIs. If the first NMI handles both, the latter
1386 * will be empty and daze the CPU. So, we drop it to
1387 * avoid false-positive 'unknown nmi' messages.
1388 */
1389 return NOTIFY_STOP;
1390 default:
1391 return NOTIFY_DONE;
1392 }
1393
1394 handled = x86_pmu.handle_irq(args->regs);
1395 if (!handled)
1396 return NOTIFY_DONE;
1397 1066
1398 this_nmi = percpu_read(irq_stat.__nmi_count); 1067 return x86_pmu.handle_irq(regs);
1399 if ((handled > 1) ||
1400 /* the next nmi could be a back-to-back nmi */
1401 ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
1402 (__this_cpu_read(pmu_nmi.handled) > 1))) {
1403 /*
1404 * We could have two subsequent back-to-back nmis: The
1405 * first handles more than one counter, the 2nd
1406 * handles only one counter and the 3rd handles no
1407 * counter.
1408 *
1409 * This is the 2nd nmi because the previous was
1410 * handling more than one counter. We will mark the
1411 * next (3rd) and then drop it if unhandled.
1412 */
1413 __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
1414 __this_cpu_write(pmu_nmi.handled, handled);
1415 }
1416
1417 return NOTIFY_STOP;
1418} 1068}
1419 1069
1420static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1070struct event_constraint emptyconstraint;
1421 .notifier_call = perf_event_nmi_handler, 1071struct event_constraint unconstrained;
1422 .next = NULL,
1423 .priority = NMI_LOCAL_LOW_PRIOR,
1424};
1425
1426static struct event_constraint unconstrained;
1427static struct event_constraint emptyconstraint;
1428
1429static struct event_constraint *
1430x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1431{
1432 struct event_constraint *c;
1433
1434 if (x86_pmu.event_constraints) {
1435 for_each_event_constraint(c, x86_pmu.event_constraints) {
1436 if ((event->hw.config & c->cmask) == c->code)
1437 return c;
1438 }
1439 }
1440
1441 return &unconstrained;
1442}
1443
1444#include "perf_event_amd.c"
1445#include "perf_event_p6.c"
1446#include "perf_event_p4.c"
1447#include "perf_event_intel_lbr.c"
1448#include "perf_event_intel_ds.c"
1449#include "perf_event_intel.c"
1450 1072
1451static int __cpuinit 1073static int __cpuinit
1452x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 1074x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1453{ 1075{
1454 unsigned int cpu = (long)hcpu; 1076 unsigned int cpu = (long)hcpu;
1077 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1455 int ret = NOTIFY_OK; 1078 int ret = NOTIFY_OK;
1456 1079
1457 switch (action & ~CPU_TASKS_FROZEN) { 1080 switch (action & ~CPU_TASKS_FROZEN) {
1458 case CPU_UP_PREPARE: 1081 case CPU_UP_PREPARE:
1082 cpuc->kfree_on_online = NULL;
1459 if (x86_pmu.cpu_prepare) 1083 if (x86_pmu.cpu_prepare)
1460 ret = x86_pmu.cpu_prepare(cpu); 1084 ret = x86_pmu.cpu_prepare(cpu);
1461 break; 1085 break;
@@ -1465,6 +1089,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1465 x86_pmu.cpu_starting(cpu); 1089 x86_pmu.cpu_starting(cpu);
1466 break; 1090 break;
1467 1091
1092 case CPU_ONLINE:
1093 kfree(cpuc->kfree_on_online);
1094 break;
1095
1468 case CPU_DYING: 1096 case CPU_DYING:
1469 if (x86_pmu.cpu_dying) 1097 if (x86_pmu.cpu_dying)
1470 x86_pmu.cpu_dying(cpu); 1098 x86_pmu.cpu_dying(cpu);
@@ -1543,7 +1171,7 @@ static int __init init_hw_perf_events(void)
1543 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1171 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1544 1172
1545 perf_events_lapic_init(); 1173 perf_events_lapic_init();
1546 register_die_notifier(&perf_event_nmi_notifier); 1174 register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
1547 1175
1548 unconstrained = (struct event_constraint) 1176 unconstrained = (struct event_constraint)
1549 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1177 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
@@ -1637,6 +1265,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
1637 perf_pmu_enable(pmu); 1265 perf_pmu_enable(pmu);
1638 return 0; 1266 return 0;
1639} 1267}
1268/*
1269 * a fake_cpuc is used to validate event groups. Due to
1270 * the extra reg logic, we need to also allocate a fake
1271 * per_core and per_cpu structure. Otherwise, group events
1272 * using extra reg may conflict without the kernel being
1273 * able to catch this when the last event gets added to
1274 * the group.
1275 */
1276static void free_fake_cpuc(struct cpu_hw_events *cpuc)
1277{
1278 kfree(cpuc->shared_regs);
1279 kfree(cpuc);
1280}
1281
1282static struct cpu_hw_events *allocate_fake_cpuc(void)
1283{
1284 struct cpu_hw_events *cpuc;
1285 int cpu = raw_smp_processor_id();
1286
1287 cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
1288 if (!cpuc)
1289 return ERR_PTR(-ENOMEM);
1290
1291 /* only needed, if we have extra_regs */
1292 if (x86_pmu.extra_regs) {
1293 cpuc->shared_regs = allocate_shared_regs(cpu);
1294 if (!cpuc->shared_regs)
1295 goto error;
1296 }
1297 return cpuc;
1298error:
1299 free_fake_cpuc(cpuc);
1300 return ERR_PTR(-ENOMEM);
1301}
1640 1302
1641/* 1303/*
1642 * validate that we can schedule this event 1304 * validate that we can schedule this event
@@ -1647,9 +1309,9 @@ static int validate_event(struct perf_event *event)
1647 struct event_constraint *c; 1309 struct event_constraint *c;
1648 int ret = 0; 1310 int ret = 0;
1649 1311
1650 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); 1312 fake_cpuc = allocate_fake_cpuc();
1651 if (!fake_cpuc) 1313 if (IS_ERR(fake_cpuc))
1652 return -ENOMEM; 1314 return PTR_ERR(fake_cpuc);
1653 1315
1654 c = x86_pmu.get_event_constraints(fake_cpuc, event); 1316 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1655 1317
@@ -1659,7 +1321,7 @@ static int validate_event(struct perf_event *event)
1659 if (x86_pmu.put_event_constraints) 1321 if (x86_pmu.put_event_constraints)
1660 x86_pmu.put_event_constraints(fake_cpuc, event); 1322 x86_pmu.put_event_constraints(fake_cpuc, event);
1661 1323
1662 kfree(fake_cpuc); 1324 free_fake_cpuc(fake_cpuc);
1663 1325
1664 return ret; 1326 return ret;
1665} 1327}
@@ -1679,36 +1341,32 @@ static int validate_group(struct perf_event *event)
1679{ 1341{
1680 struct perf_event *leader = event->group_leader; 1342 struct perf_event *leader = event->group_leader;
1681 struct cpu_hw_events *fake_cpuc; 1343 struct cpu_hw_events *fake_cpuc;
1682 int ret, n; 1344 int ret = -ENOSPC, n;
1683
1684 ret = -ENOMEM;
1685 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1686 if (!fake_cpuc)
1687 goto out;
1688 1345
1346 fake_cpuc = allocate_fake_cpuc();
1347 if (IS_ERR(fake_cpuc))
1348 return PTR_ERR(fake_cpuc);
1689 /* 1349 /*
1690 * the event is not yet connected with its 1350 * the event is not yet connected with its
1691 * siblings therefore we must first collect 1351 * siblings therefore we must first collect
1692 * existing siblings, then add the new event 1352 * existing siblings, then add the new event
1693 * before we can simulate the scheduling 1353 * before we can simulate the scheduling
1694 */ 1354 */
1695 ret = -ENOSPC;
1696 n = collect_events(fake_cpuc, leader, true); 1355 n = collect_events(fake_cpuc, leader, true);
1697 if (n < 0) 1356 if (n < 0)
1698 goto out_free; 1357 goto out;
1699 1358
1700 fake_cpuc->n_events = n; 1359 fake_cpuc->n_events = n;
1701 n = collect_events(fake_cpuc, event, false); 1360 n = collect_events(fake_cpuc, event, false);
1702 if (n < 0) 1361 if (n < 0)
1703 goto out_free; 1362 goto out;
1704 1363
1705 fake_cpuc->n_events = n; 1364 fake_cpuc->n_events = n;
1706 1365
1707 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); 1366 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1708 1367
1709out_free:
1710 kfree(fake_cpuc);
1711out: 1368out:
1369 free_fake_cpuc(fake_cpuc);
1712 return ret; 1370 return ret;
1713} 1371}
1714 1372
@@ -1856,6 +1514,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1856 1514
1857 perf_callchain_store(entry, regs->ip); 1515 perf_callchain_store(entry, regs->ip);
1858 1516
1517 if (!current->mm)
1518 return;
1519
1859 if (perf_callchain_user32(regs, entry)) 1520 if (perf_callchain_user32(regs, entry))
1860 return; 1521 return;
1861 1522
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
new file mode 100644
index 000000000000..b9698d40ac4b
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -0,0 +1,505 @@
1/*
2 * Performance events x86 architecture header
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14
15#include <linux/perf_event.h>
16
17/*
18 * | NHM/WSM | SNB |
19 * register -------------------------------
20 * | HT | no HT | HT | no HT |
21 *-----------------------------------------
22 * offcore | core | core | cpu | core |
23 * lbr_sel | core | core | cpu | core |
24 * ld_lat | cpu | core | cpu | core |
25 *-----------------------------------------
26 *
27 * Given that there is a small number of shared regs,
28 * we can pre-allocate their slot in the per-cpu
29 * per-core reg tables.
30 */
31enum extra_reg_type {
32 EXTRA_REG_NONE = -1, /* not used */
33
34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
36
37 EXTRA_REG_MAX /* number of entries needed */
38};
39
40struct event_constraint {
41 union {
42 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
43 u64 idxmsk64;
44 };
45 u64 code;
46 u64 cmask;
47 int weight;
48};
49
50struct amd_nb {
51 int nb_id; /* NorthBridge id */
52 int refcnt; /* reference count */
53 struct perf_event *owners[X86_PMC_IDX_MAX];
54 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
55};
56
57/* The maximal number of PEBS events: */
58#define MAX_PEBS_EVENTS 4
59
60/*
61 * A debug store configuration.
62 *
63 * We only support architectures that use 64bit fields.
64 */
65struct debug_store {
66 u64 bts_buffer_base;
67 u64 bts_index;
68 u64 bts_absolute_maximum;
69 u64 bts_interrupt_threshold;
70 u64 pebs_buffer_base;
71 u64 pebs_index;
72 u64 pebs_absolute_maximum;
73 u64 pebs_interrupt_threshold;
74 u64 pebs_event_reset[MAX_PEBS_EVENTS];
75};
76
77/*
78 * Per register state.
79 */
80struct er_account {
81 raw_spinlock_t lock; /* per-core: protect structure */
82 u64 config; /* extra MSR config */
83 u64 reg; /* extra MSR number */
84 atomic_t ref; /* reference count */
85};
86
87/*
88 * Per core/cpu state
89 *
90 * Used to coordinate shared registers between HT threads or
91 * among events on a single PMU.
92 */
93struct intel_shared_regs {
94 struct er_account regs[EXTRA_REG_MAX];
95 int refcnt; /* per-core: #HT threads */
96 unsigned core_id; /* per-core: core id */
97};
98
99#define MAX_LBR_ENTRIES 16
100
101struct cpu_hw_events {
102 /*
103 * Generic x86 PMC bits
104 */
105 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
106 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
107 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
108 int enabled;
109
110 int n_events;
111 int n_added;
112 int n_txn;
113 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
114 u64 tags[X86_PMC_IDX_MAX];
115 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
116
117 unsigned int group_flag;
118
119 /*
120 * Intel DebugStore bits
121 */
122 struct debug_store *ds;
123 u64 pebs_enabled;
124
125 /*
126 * Intel LBR bits
127 */
128 int lbr_users;
129 void *lbr_context;
130 struct perf_branch_stack lbr_stack;
131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
132
133 /*
134 * Intel host/guest exclude bits
135 */
136 u64 intel_ctrl_guest_mask;
137 u64 intel_ctrl_host_mask;
138 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
139
140 /*
141 * manage shared (per-core, per-cpu) registers
142 * used on Intel NHM/WSM/SNB
143 */
144 struct intel_shared_regs *shared_regs;
145
146 /*
147 * AMD specific bits
148 */
149 struct amd_nb *amd_nb;
150
151 void *kfree_on_online;
152};
153
154#define __EVENT_CONSTRAINT(c, n, m, w) {\
155 { .idxmsk64 = (n) }, \
156 .code = (c), \
157 .cmask = (m), \
158 .weight = (w), \
159}
160
161#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
163
164/*
165 * Constraint on the Event code.
166 */
167#define INTEL_EVENT_CONSTRAINT(c, n) \
168 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
169
170/*
171 * Constraint on the Event code + UMask + fixed-mask
172 *
173 * filter mask to validate fixed counter events.
174 * the following filters disqualify for fixed counters:
175 * - inv
176 * - edge
177 * - cnt-mask
178 * The other filters are supported by fixed counters.
179 * The any-thread option is supported starting with v3.
180 */
181#define FIXED_EVENT_CONSTRAINT(c, n) \
182 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
183
184/*
185 * Constraint on the Event code + UMask
186 */
187#define INTEL_UEVENT_CONSTRAINT(c, n) \
188 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
189
190#define EVENT_CONSTRAINT_END \
191 EVENT_CONSTRAINT(0, 0, 0)
192
193#define for_each_event_constraint(e, c) \
194 for ((e) = (c); (e)->weight; (e)++)
195
196/*
197 * Extra registers for specific events.
198 *
199 * Some events need large masks and require external MSRs.
200 * Those extra MSRs end up being shared for all events on
201 * a PMU and sometimes between PMU of sibling HT threads.
202 * In either case, the kernel needs to handle conflicting
203 * accesses to those extra, shared, regs. The data structure
204 * to manage those registers is stored in cpu_hw_event.
205 */
206struct extra_reg {
207 unsigned int event;
208 unsigned int msr;
209 u64 config_mask;
210 u64 valid_mask;
211 int idx; /* per_xxx->regs[] reg index */
212};
213
214#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
215 .event = (e), \
216 .msr = (ms), \
217 .config_mask = (m), \
218 .valid_mask = (vm), \
219 .idx = EXTRA_REG_##i \
220 }
221
222#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
223 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
224
225#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
226
227union perf_capabilities {
228 struct {
229 u64 lbr_format:6;
230 u64 pebs_trap:1;
231 u64 pebs_arch_reg:1;
232 u64 pebs_format:4;
233 u64 smm_freeze:1;
234 };
235 u64 capabilities;
236};
237
238/*
239 * struct x86_pmu - generic x86 pmu
240 */
241struct x86_pmu {
242 /*
243 * Generic x86 PMC bits
244 */
245 const char *name;
246 int version;
247 int (*handle_irq)(struct pt_regs *);
248 void (*disable_all)(void);
249 void (*enable_all)(int added);
250 void (*enable)(struct perf_event *);
251 void (*disable)(struct perf_event *);
252 int (*hw_config)(struct perf_event *event);
253 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
254 unsigned eventsel;
255 unsigned perfctr;
256 u64 (*event_map)(int);
257 int max_events;
258 int num_counters;
259 int num_counters_fixed;
260 int cntval_bits;
261 u64 cntval_mask;
262 int apic;
263 u64 max_period;
264 struct event_constraint *
265 (*get_event_constraints)(struct cpu_hw_events *cpuc,
266 struct perf_event *event);
267
268 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
269 struct perf_event *event);
270 struct event_constraint *event_constraints;
271 void (*quirks)(void);
272 int perfctr_second_write;
273
274 int (*cpu_prepare)(int cpu);
275 void (*cpu_starting)(int cpu);
276 void (*cpu_dying)(int cpu);
277 void (*cpu_dead)(int cpu);
278
279 /*
280 * Intel Arch Perfmon v2+
281 */
282 u64 intel_ctrl;
283 union perf_capabilities intel_cap;
284
285 /*
286 * Intel DebugStore bits
287 */
288 int bts, pebs;
289 int bts_active, pebs_active;
290 int pebs_record_size;
291 void (*drain_pebs)(struct pt_regs *regs);
292 struct event_constraint *pebs_constraints;
293
294 /*
295 * Intel LBR
296 */
297 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
298 int lbr_nr; /* hardware stack size */
299
300 /*
301 * Extra registers for events
302 */
303 struct extra_reg *extra_regs;
304 unsigned int er_flags;
305
306 /*
307 * Intel host/guest support (KVM)
308 */
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
310};
311
312#define ERF_NO_HT_SHARING 1
313#define ERF_HAS_RSP_1 2
314
315extern struct x86_pmu x86_pmu __read_mostly;
316
317DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
318
319int x86_perf_event_set_period(struct perf_event *event);
320
321/*
322 * Generalized hw caching related hw_event table, filled
323 * in on a per model basis. A value of 0 means
324 * 'not supported', -1 means 'hw_event makes no sense on
325 * this CPU', any other value means the raw hw_event
326 * ID.
327 */
328
329#define C(x) PERF_COUNT_HW_CACHE_##x
330
331extern u64 __read_mostly hw_cache_event_ids
332 [PERF_COUNT_HW_CACHE_MAX]
333 [PERF_COUNT_HW_CACHE_OP_MAX]
334 [PERF_COUNT_HW_CACHE_RESULT_MAX];
335extern u64 __read_mostly hw_cache_extra_regs
336 [PERF_COUNT_HW_CACHE_MAX]
337 [PERF_COUNT_HW_CACHE_OP_MAX]
338 [PERF_COUNT_HW_CACHE_RESULT_MAX];
339
340u64 x86_perf_event_update(struct perf_event *event);
341
342static inline int x86_pmu_addr_offset(int index)
343{
344 int offset;
345
346 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
347 alternative_io(ASM_NOP2,
348 "shll $1, %%eax",
349 X86_FEATURE_PERFCTR_CORE,
350 "=a" (offset),
351 "a" (index));
352
353 return offset;
354}
355
356static inline unsigned int x86_pmu_config_addr(int index)
357{
358 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
359}
360
361static inline unsigned int x86_pmu_event_addr(int index)
362{
363 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
364}
365
366int x86_setup_perfctr(struct perf_event *event);
367
368int x86_pmu_hw_config(struct perf_event *event);
369
370void x86_pmu_disable_all(void);
371
372static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
373 u64 enable_mask)
374{
375 if (hwc->extra_reg.reg)
376 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
377 wrmsrl(hwc->config_base, hwc->config | enable_mask);
378}
379
380void x86_pmu_enable_all(int added);
381
382int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
383
384void x86_pmu_stop(struct perf_event *event, int flags);
385
386static inline void x86_pmu_disable_event(struct perf_event *event)
387{
388 struct hw_perf_event *hwc = &event->hw;
389
390 wrmsrl(hwc->config_base, hwc->config);
391}
392
393void x86_pmu_enable_event(struct perf_event *event);
394
395int x86_pmu_handle_irq(struct pt_regs *regs);
396
397extern struct event_constraint emptyconstraint;
398
399extern struct event_constraint unconstrained;
400
401#ifdef CONFIG_CPU_SUP_AMD
402
403int amd_pmu_init(void);
404
405#else /* CONFIG_CPU_SUP_AMD */
406
407static inline int amd_pmu_init(void)
408{
409 return 0;
410}
411
412#endif /* CONFIG_CPU_SUP_AMD */
413
414#ifdef CONFIG_CPU_SUP_INTEL
415
416int intel_pmu_save_and_restart(struct perf_event *event);
417
418struct event_constraint *
419x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event);
420
421struct intel_shared_regs *allocate_shared_regs(int cpu);
422
423int intel_pmu_init(void);
424
425void init_debug_store_on_cpu(int cpu);
426
427void fini_debug_store_on_cpu(int cpu);
428
429void release_ds_buffers(void);
430
431void reserve_ds_buffers(void);
432
433extern struct event_constraint bts_constraint;
434
435void intel_pmu_enable_bts(u64 config);
436
437void intel_pmu_disable_bts(void);
438
439int intel_pmu_drain_bts_buffer(void);
440
441extern struct event_constraint intel_core2_pebs_event_constraints[];
442
443extern struct event_constraint intel_atom_pebs_event_constraints[];
444
445extern struct event_constraint intel_nehalem_pebs_event_constraints[];
446
447extern struct event_constraint intel_westmere_pebs_event_constraints[];
448
449extern struct event_constraint intel_snb_pebs_event_constraints[];
450
451struct event_constraint *intel_pebs_constraints(struct perf_event *event);
452
453void intel_pmu_pebs_enable(struct perf_event *event);
454
455void intel_pmu_pebs_disable(struct perf_event *event);
456
457void intel_pmu_pebs_enable_all(void);
458
459void intel_pmu_pebs_disable_all(void);
460
461void intel_ds_init(void);
462
463void intel_pmu_lbr_reset(void);
464
465void intel_pmu_lbr_enable(struct perf_event *event);
466
467void intel_pmu_lbr_disable(struct perf_event *event);
468
469void intel_pmu_lbr_enable_all(void);
470
471void intel_pmu_lbr_disable_all(void);
472
473void intel_pmu_lbr_read(void);
474
475void intel_pmu_lbr_init_core(void);
476
477void intel_pmu_lbr_init_nhm(void);
478
479void intel_pmu_lbr_init_atom(void);
480
481int p4_pmu_init(void);
482
483int p6_pmu_init(void);
484
485#else /* CONFIG_CPU_SUP_INTEL */
486
487static inline void reserve_ds_buffers(void)
488{
489}
490
491static inline void release_ds_buffers(void)
492{
493}
494
495static inline int intel_pmu_init(void)
496{
497 return 0;
498}
499
500static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
501{
502 return NULL;
503}
504
505#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index fe29c1d2219e..aeefd45697a2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,10 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#include <linux/perf_event.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/slab.h>
5#include <asm/apicdef.h>
6
7#include "perf_event.h"
2 8
3static __initconst const u64 amd_hw_cache_event_ids 9static __initconst const u64 amd_hw_cache_event_ids
4 [PERF_COUNT_HW_CACHE_MAX] 10 [PERF_COUNT_HW_CACHE_MAX]
@@ -89,6 +95,20 @@ static __initconst const u64 amd_hw_cache_event_ids
89 [ C(RESULT_MISS) ] = -1, 95 [ C(RESULT_MISS) ] = -1,
90 }, 96 },
91 }, 97 },
98 [ C(NODE) ] = {
99 [ C(OP_READ) ] = {
100 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
101 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
102 },
103 [ C(OP_WRITE) ] = {
104 [ C(RESULT_ACCESS) ] = -1,
105 [ C(RESULT_MISS) ] = -1,
106 },
107 [ C(OP_PREFETCH) ] = {
108 [ C(RESULT_ACCESS) ] = -1,
109 [ C(RESULT_MISS) ] = -1,
110 },
111 },
92}; 112};
93 113
94/* 114/*
@@ -118,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event)
118 if (ret) 138 if (ret)
119 return ret; 139 return ret;
120 140
141 if (event->attr.exclude_host && event->attr.exclude_guest)
142 /*
143 * When HO == GO == 1 the hardware treats that as GO == HO == 0
144 * and will count in both modes. We don't want to count in that
145 * case so we emulate no-counting by setting US = OS = 0.
146 */
147 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
148 ARCH_PERFMON_EVENTSEL_OS);
149 else if (event->attr.exclude_host)
150 event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
151 else if (event->attr.exclude_guest)
152 event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
153
121 if (event->attr.type != PERF_TYPE_RAW) 154 if (event->attr.type != PERF_TYPE_RAW)
122 return 0; 155 return 0;
123 156
@@ -336,7 +369,7 @@ static void amd_pmu_cpu_starting(int cpu)
336 continue; 369 continue;
337 370
338 if (nb->nb_id == nb_id) { 371 if (nb->nb_id == nb_id) {
339 kfree(cpuc->amd_nb); 372 cpuc->kfree_on_online = cpuc->amd_nb;
340 cpuc->amd_nb = nb; 373 cpuc->amd_nb = nb;
341 break; 374 break;
342 } 375 }
@@ -378,7 +411,7 @@ static __initconst const struct x86_pmu amd_pmu = {
378 .perfctr = MSR_K7_PERFCTR0, 411 .perfctr = MSR_K7_PERFCTR0,
379 .event_map = amd_pmu_event_map, 412 .event_map = amd_pmu_event_map,
380 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 413 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
381 .num_counters = 4, 414 .num_counters = AMD64_NUM_COUNTERS,
382 .cntval_bits = 48, 415 .cntval_bits = 48,
383 .cntval_mask = (1ULL << 48) - 1, 416 .cntval_mask = (1ULL << 48) - 1,
384 .apic = 1, 417 .apic = 1,
@@ -542,7 +575,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
542 .perfctr = MSR_F15H_PERF_CTR, 575 .perfctr = MSR_F15H_PERF_CTR,
543 .event_map = amd_pmu_event_map, 576 .event_map = amd_pmu_event_map,
544 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 577 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
545 .num_counters = 6, 578 .num_counters = AMD64_NUM_COUNTERS_F15H,
546 .cntval_bits = 48, 579 .cntval_bits = 48,
547 .cntval_mask = (1ULL << 48) - 1, 580 .cntval_mask = (1ULL << 48) - 1,
548 .apic = 1, 581 .apic = 1,
@@ -559,7 +592,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
559#endif 592#endif
560}; 593};
561 594
562static __init int amd_pmu_init(void) 595__init int amd_pmu_init(void)
563{ 596{
564 /* Performance-monitoring supported from K7 and later: */ 597 /* Performance-monitoring supported from K7 and later: */
565 if (boot_cpu_data.x86 < 6) 598 if (boot_cpu_data.x86 < 6)
@@ -588,12 +621,3 @@ static __init int amd_pmu_init(void)
588 621
589 return 0; 622 return 0;
590} 623}
591
592#else /* CONFIG_CPU_SUP_AMD */
593
594static int amd_pmu_init(void)
595{
596 return 0;
597}
598
599#endif
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
new file mode 100644
index 000000000000..ab6343d21825
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -0,0 +1,294 @@
1/*
2 * Performance events - AMD IBS
3 *
4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/perf_event.h>
10#include <linux/module.h>
11#include <linux/pci.h>
12
13#include <asm/apic.h>
14
15static u32 ibs_caps;
16
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18
19static struct pmu perf_ibs;
20
21static int perf_ibs_init(struct perf_event *event)
22{
23 if (perf_ibs.type != event->attr.type)
24 return -ENOENT;
25 return 0;
26}
27
28static int perf_ibs_add(struct perf_event *event, int flags)
29{
30 return 0;
31}
32
33static void perf_ibs_del(struct perf_event *event, int flags)
34{
35}
36
37static struct pmu perf_ibs = {
38 .event_init= perf_ibs_init,
39 .add= perf_ibs_add,
40 .del= perf_ibs_del,
41};
42
43static __init int perf_event_ibs_init(void)
44{
45 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */
47
48 perf_pmu_register(&perf_ibs, "ibs", -1);
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50
51 return 0;
52}
53
54#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
55
56static __init int perf_event_ibs_init(void) { return 0; }
57
58#endif
59
60/* IBS - apic initialization, for perf and oprofile */
61
62static __init u32 __get_ibs_caps(void)
63{
64 u32 caps;
65 unsigned int max_level;
66
67 if (!boot_cpu_has(X86_FEATURE_IBS))
68 return 0;
69
70 /* check IBS cpuid feature flags */
71 max_level = cpuid_eax(0x80000000);
72 if (max_level < IBS_CPUID_FEATURES)
73 return IBS_CAPS_DEFAULT;
74
75 caps = cpuid_eax(IBS_CPUID_FEATURES);
76 if (!(caps & IBS_CAPS_AVAIL))
77 /* cpuid flags not valid */
78 return IBS_CAPS_DEFAULT;
79
80 return caps;
81}
82
83u32 get_ibs_caps(void)
84{
85 return ibs_caps;
86}
87
88EXPORT_SYMBOL(get_ibs_caps);
89
90static inline int get_eilvt(int offset)
91{
92 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
93}
94
95static inline int put_eilvt(int offset)
96{
97 return !setup_APIC_eilvt(offset, 0, 0, 1);
98}
99
100/*
101 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
102 */
103static inline int ibs_eilvt_valid(void)
104{
105 int offset;
106 u64 val;
107 int valid = 0;
108
109 preempt_disable();
110
111 rdmsrl(MSR_AMD64_IBSCTL, val);
112 offset = val & IBSCTL_LVT_OFFSET_MASK;
113
114 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
115 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
116 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
117 goto out;
118 }
119
120 if (!get_eilvt(offset)) {
121 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
122 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
123 goto out;
124 }
125
126 valid = 1;
127out:
128 preempt_enable();
129
130 return valid;
131}
132
133static int setup_ibs_ctl(int ibs_eilvt_off)
134{
135 struct pci_dev *cpu_cfg;
136 int nodes;
137 u32 value = 0;
138
139 nodes = 0;
140 cpu_cfg = NULL;
141 do {
142 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
143 PCI_DEVICE_ID_AMD_10H_NB_MISC,
144 cpu_cfg);
145 if (!cpu_cfg)
146 break;
147 ++nodes;
148 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
149 | IBSCTL_LVT_OFFSET_VALID);
150 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
151 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
152 pci_dev_put(cpu_cfg);
153 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
154 "IBSCTL = 0x%08x\n", value);
155 return -EINVAL;
156 }
157 } while (1);
158
159 if (!nodes) {
160 printk(KERN_DEBUG "No CPU node configured for IBS\n");
161 return -ENODEV;
162 }
163
164 return 0;
165}
166
167/*
168 * This runs only on the current cpu. We try to find an LVT offset and
169 * setup the local APIC. For this we must disable preemption. On
170 * success we initialize all nodes with this offset. This updates then
171 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
172 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
173 * is using the new offset.
174 */
175static int force_ibs_eilvt_setup(void)
176{
177 int offset;
178 int ret;
179
180 preempt_disable();
181 /* find the next free available EILVT entry, skip offset 0 */
182 for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
183 if (get_eilvt(offset))
184 break;
185 }
186 preempt_enable();
187
188 if (offset == APIC_EILVT_NR_MAX) {
189 printk(KERN_DEBUG "No EILVT entry available\n");
190 return -EBUSY;
191 }
192
193 ret = setup_ibs_ctl(offset);
194 if (ret)
195 goto out;
196
197 if (!ibs_eilvt_valid()) {
198 ret = -EFAULT;
199 goto out;
200 }
201
202 pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
203 pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
204
205 return 0;
206out:
207 preempt_disable();
208 put_eilvt(offset);
209 preempt_enable();
210 return ret;
211}
212
213static inline int get_ibs_lvt_offset(void)
214{
215 u64 val;
216
217 rdmsrl(MSR_AMD64_IBSCTL, val);
218 if (!(val & IBSCTL_LVT_OFFSET_VALID))
219 return -EINVAL;
220
221 return val & IBSCTL_LVT_OFFSET_MASK;
222}
223
224static void setup_APIC_ibs(void *dummy)
225{
226 int offset;
227
228 offset = get_ibs_lvt_offset();
229 if (offset < 0)
230 goto failed;
231
232 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
233 return;
234failed:
235 pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
236 smp_processor_id());
237}
238
239static void clear_APIC_ibs(void *dummy)
240{
241 int offset;
242
243 offset = get_ibs_lvt_offset();
244 if (offset >= 0)
245 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
246}
247
248static int __cpuinit
249perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
250{
251 switch (action & ~CPU_TASKS_FROZEN) {
252 case CPU_STARTING:
253 setup_APIC_ibs(NULL);
254 break;
255 case CPU_DYING:
256 clear_APIC_ibs(NULL);
257 break;
258 default:
259 break;
260 }
261
262 return NOTIFY_OK;
263}
264
265static __init int amd_ibs_init(void)
266{
267 u32 caps;
268 int ret;
269
270 caps = __get_ibs_caps();
271 if (!caps)
272 return -ENODEV; /* ibs not supported by the cpu */
273
274 if (!ibs_eilvt_valid()) {
275 ret = force_ibs_eilvt_setup();
276 if (ret) {
277 pr_err("Failed to setup IBS, %d\n", ret);
278 return ret;
279 }
280 }
281
282 get_online_cpus();
283 ibs_caps = caps;
284 /* make ibs_caps visible to other cpus: */
285 smp_mb();
286 perf_cpu_notifier(perf_ibs_cpu_notifier);
287 smp_call_function(setup_APIC_ibs, NULL, 1);
288 put_online_cpus();
289
290 return perf_event_ibs_init();
291}
292
293/* Since we need the pci subsystem to init ibs we can't do this earlier: */
294device_initcall(amd_ibs_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 41178c826c48..e09ca20e86ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,26 +1,19 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3#define MAX_EXTRA_REGS 2
4
5/* 1/*
6 * Per register state. 2 * Per core/cpu state
3 *
4 * Used to coordinate shared registers between HT threads or
5 * among events on a single PMU.
7 */ 6 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13 7
14/* 8#include <linux/stddef.h>
15 * Per core state 9#include <linux/types.h>
16 * This used to coordinate shared registers for HT threads. 10#include <linux/init.h>
17 */ 11#include <linux/slab.h>
18struct intel_percore { 12
19 raw_spinlock_t lock; /* protect structure */ 13#include <asm/hardirq.h>
20 struct er_account regs[MAX_EXTRA_REGS]; 14#include <asm/apic.h>
21 int refcnt; /* number of threads */ 15
22 unsigned core_id; 16#include "perf_event.h"
23};
24 17
25/* 18/*
26 * Intel PerfMon, used on Core and later. 19 * Intel PerfMon, used on Core and later.
@@ -88,16 +81,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
88 81
89static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 82static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90{ 83{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 84 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
92 EVENT_EXTRA_END 85 EVENT_EXTRA_END
93}; 86};
94 87
95static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
101static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 88static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102{ 89{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 90 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -116,8 +103,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 103 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 104 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 105 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 106 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 107 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END 108 EVENT_CONSTRAINT_END
@@ -125,15 +110,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
125 110
126static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 111static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127{ 112{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 113 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 114 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
130 EVENT_EXTRA_END 115 EVENT_EXTRA_END
131}; 116};
132 117
133static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = 118static struct event_constraint intel_v1_event_constraints[] __read_mostly =
134{ 119{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END 120 EVENT_CONSTRAINT_END
138}; 121};
139 122
@@ -145,6 +128,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
145 EVENT_CONSTRAINT_END 128 EVENT_CONSTRAINT_END
146}; 129};
147 130
131static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
132 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
133 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
134 EVENT_EXTRA_END
135};
136
148static u64 intel_pmu_event_map(int hw_event) 137static u64 intel_pmu_event_map(int hw_event)
149{ 138{
150 return intel_perfmon_event_map[hw_event]; 139 return intel_perfmon_event_map[hw_event];
@@ -245,6 +234,21 @@ static __initconst const u64 snb_hw_cache_event_ids
245 [ C(RESULT_MISS) ] = -1, 234 [ C(RESULT_MISS) ] = -1,
246 }, 235 },
247 }, 236 },
237 [ C(NODE) ] = {
238 [ C(OP_READ) ] = {
239 [ C(RESULT_ACCESS) ] = -1,
240 [ C(RESULT_MISS) ] = -1,
241 },
242 [ C(OP_WRITE) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 [ C(OP_PREFETCH) ] = {
247 [ C(RESULT_ACCESS) ] = -1,
248 [ C(RESULT_MISS) ] = -1,
249 },
250 },
251
248}; 252};
249 253
250static __initconst const u64 westmere_hw_cache_event_ids 254static __initconst const u64 westmere_hw_cache_event_ids
@@ -346,6 +350,20 @@ static __initconst const u64 westmere_hw_cache_event_ids
346 [ C(RESULT_MISS) ] = -1, 350 [ C(RESULT_MISS) ] = -1,
347 }, 351 },
348 }, 352 },
353 [ C(NODE) ] = {
354 [ C(OP_READ) ] = {
355 [ C(RESULT_ACCESS) ] = 0x01b7,
356 [ C(RESULT_MISS) ] = 0x01b7,
357 },
358 [ C(OP_WRITE) ] = {
359 [ C(RESULT_ACCESS) ] = 0x01b7,
360 [ C(RESULT_MISS) ] = 0x01b7,
361 },
362 [ C(OP_PREFETCH) ] = {
363 [ C(RESULT_ACCESS) ] = 0x01b7,
364 [ C(RESULT_MISS) ] = 0x01b7,
365 },
366 },
349}; 367};
350 368
351/* 369/*
@@ -398,7 +416,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 416 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 417 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
400 }, 418 },
401 } 419 },
420 [ C(NODE) ] = {
421 [ C(OP_READ) ] = {
422 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
423 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
424 },
425 [ C(OP_WRITE) ] = {
426 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
427 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
428 },
429 [ C(OP_PREFETCH) ] = {
430 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
431 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
432 },
433 },
402}; 434};
403 435
404static __initconst const u64 nehalem_hw_cache_event_ids 436static __initconst const u64 nehalem_hw_cache_event_ids
@@ -500,6 +532,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids
500 [ C(RESULT_MISS) ] = -1, 532 [ C(RESULT_MISS) ] = -1,
501 }, 533 },
502 }, 534 },
535 [ C(NODE) ] = {
536 [ C(OP_READ) ] = {
537 [ C(RESULT_ACCESS) ] = 0x01b7,
538 [ C(RESULT_MISS) ] = 0x01b7,
539 },
540 [ C(OP_WRITE) ] = {
541 [ C(RESULT_ACCESS) ] = 0x01b7,
542 [ C(RESULT_MISS) ] = 0x01b7,
543 },
544 [ C(OP_PREFETCH) ] = {
545 [ C(RESULT_ACCESS) ] = 0x01b7,
546 [ C(RESULT_MISS) ] = 0x01b7,
547 },
548 },
503}; 549};
504 550
505static __initconst const u64 core2_hw_cache_event_ids 551static __initconst const u64 core2_hw_cache_event_ids
@@ -703,7 +749,8 @@ static void intel_pmu_enable_all(int added)
703 749
704 intel_pmu_pebs_enable_all(); 750 intel_pmu_pebs_enable_all();
705 intel_pmu_lbr_enable_all(); 751 intel_pmu_lbr_enable_all();
706 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 752 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
753 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
707 754
708 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 755 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
709 struct perf_event *event = 756 struct perf_event *event =
@@ -826,6 +873,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
826static void intel_pmu_disable_event(struct perf_event *event) 873static void intel_pmu_disable_event(struct perf_event *event)
827{ 874{
828 struct hw_perf_event *hwc = &event->hw; 875 struct hw_perf_event *hwc = &event->hw;
876 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
829 877
830 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 878 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
831 intel_pmu_disable_bts(); 879 intel_pmu_disable_bts();
@@ -833,6 +881,9 @@ static void intel_pmu_disable_event(struct perf_event *event)
833 return; 881 return;
834 } 882 }
835 883
884 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
885 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
886
836 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 887 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
837 intel_pmu_disable_fixed(hwc); 888 intel_pmu_disable_fixed(hwc);
838 return; 889 return;
@@ -878,6 +929,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
878static void intel_pmu_enable_event(struct perf_event *event) 929static void intel_pmu_enable_event(struct perf_event *event)
879{ 930{
880 struct hw_perf_event *hwc = &event->hw; 931 struct hw_perf_event *hwc = &event->hw;
932 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
881 933
882 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 934 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
883 if (!__this_cpu_read(cpu_hw_events.enabled)) 935 if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -887,6 +939,11 @@ static void intel_pmu_enable_event(struct perf_event *event)
887 return; 939 return;
888 } 940 }
889 941
942 if (event->attr.exclude_host)
943 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
944 if (event->attr.exclude_guest)
945 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
946
890 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 947 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
891 intel_pmu_enable_fixed(hwc); 948 intel_pmu_enable_fixed(hwc);
892 return; 949 return;
@@ -902,7 +959,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
902 * Save and restart an expired event. Called by NMI contexts, 959 * Save and restart an expired event. Called by NMI contexts,
903 * so it has to be careful about preempting normal event ops: 960 * so it has to be careful about preempting normal event ops:
904 */ 961 */
905static int intel_pmu_save_and_restart(struct perf_event *event) 962int intel_pmu_save_and_restart(struct perf_event *event)
906{ 963{
907 x86_perf_event_update(event); 964 x86_perf_event_update(event);
908 return x86_perf_event_set_period(event); 965 return x86_perf_event_set_period(event);
@@ -1003,7 +1060,7 @@ again:
1003 1060
1004 data.period = event->hw.last_period; 1061 data.period = event->hw.last_period;
1005 1062
1006 if (perf_event_overflow(event, 1, &data, regs)) 1063 if (perf_event_overflow(event, &data, regs))
1007 x86_pmu_stop(event, 0); 1064 x86_pmu_stop(event, 0);
1008 } 1065 }
1009 1066
@@ -1037,65 +1094,136 @@ intel_bts_constraints(struct perf_event *event)
1037 return NULL; 1094 return NULL;
1038} 1095}
1039 1096
1097static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1098{
1099 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1100 return false;
1101
1102 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
1103 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1104 event->hw.config |= 0x01bb;
1105 event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
1106 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1107 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
1108 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1109 event->hw.config |= 0x01b7;
1110 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1111 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1112 }
1113
1114 if (event->hw.extra_reg.idx == orig_idx)
1115 return false;
1116
1117 return true;
1118}
1119
1120/*
1121 * manage allocation of shared extra msr for certain events
1122 *
1123 * sharing can be:
1124 * per-cpu: to be shared between the various events on a single PMU
1125 * per-core: per-cpu + shared by HT threads
1126 */
1040static struct event_constraint * 1127static struct event_constraint *
1041intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1128__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1129 struct perf_event *event)
1042{ 1130{
1043 struct hw_perf_event *hwc = &event->hw; 1131 struct event_constraint *c = &emptyconstraint;
1044 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; 1132 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1045 struct event_constraint *c;
1046 struct intel_percore *pc;
1047 struct er_account *era; 1133 struct er_account *era;
1048 int i; 1134 unsigned long flags;
1049 int free_slot; 1135 int orig_idx = reg->idx;
1050 int found;
1051 1136
1052 if (!x86_pmu.percore_constraints || hwc->extra_alloc) 1137 /* already allocated shared msr */
1053 return NULL; 1138 if (reg->alloc)
1139 return &unconstrained;
1054 1140
1055 for (c = x86_pmu.percore_constraints; c->cmask; c++) { 1141again:
1056 if (e != c->code) 1142 era = &cpuc->shared_regs->regs[reg->idx];
1057 continue; 1143 /*
1144 * we use spin_lock_irqsave() to avoid lockdep issues when
1145 * passing a fake cpuc
1146 */
1147 raw_spin_lock_irqsave(&era->lock, flags);
1148
1149 if (!atomic_read(&era->ref) || era->config == reg->config) {
1150
1151 /* lock in msr value */
1152 era->config = reg->config;
1153 era->reg = reg->reg;
1154
1155 /* one more user */
1156 atomic_inc(&era->ref);
1157
1158 /* no need to reallocate during incremental event scheduling */
1159 reg->alloc = 1;
1058 1160
1059 /* 1161 /*
1060 * Allocate resource per core. 1162 * All events using extra_reg are unconstrained.
1163 * Avoids calling x86_get_event_constraints()
1164 *
1165 * Must revisit if extra_reg controlling events
1166 * ever have constraints. Worst case we go through
1167 * the regular event constraint table.
1061 */ 1168 */
1062 pc = cpuc->per_core; 1169 c = &unconstrained;
1063 if (!pc) 1170 } else if (intel_try_alt_er(event, orig_idx)) {
1064 break; 1171 raw_spin_unlock(&era->lock);
1065 c = &emptyconstraint; 1172 goto again;
1066 raw_spin_lock(&pc->lock); 1173 }
1067 free_slot = -1; 1174 raw_spin_unlock_irqrestore(&era->lock, flags);
1068 found = 0; 1175
1069 for (i = 0; i < MAX_EXTRA_REGS; i++) { 1176 return c;
1070 era = &pc->regs[i]; 1177}
1071 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { 1178
1072 /* Allow sharing same config */ 1179static void
1073 if (hwc->extra_config == era->extra_config) { 1180__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1074 era->ref++; 1181 struct hw_perf_event_extra *reg)
1075 cpuc->percore_used = 1; 1182{
1076 hwc->extra_alloc = 1; 1183 struct er_account *era;
1077 c = NULL; 1184
1078 } 1185 /*
1079 /* else conflict */ 1186 * only put constraint if extra reg was actually
1080 found = 1; 1187 * allocated. Also takes care of event which do
1081 break; 1188 * not use an extra shared reg
1082 } else if (era->ref == 0 && free_slot == -1) 1189 */
1083 free_slot = i; 1190 if (!reg->alloc)
1084 } 1191 return;
1085 if (!found && free_slot != -1) { 1192
1086 era = &pc->regs[free_slot]; 1193 era = &cpuc->shared_regs->regs[reg->idx];
1087 era->ref = 1; 1194
1088 era->extra_reg = hwc->extra_reg; 1195 /* one fewer user */
1089 era->extra_config = hwc->extra_config; 1196 atomic_dec(&era->ref);
1090 cpuc->percore_used = 1; 1197
1091 hwc->extra_alloc = 1; 1198 /* allocate again next time */
1092 c = NULL; 1199 reg->alloc = 0;
1200}
1201
1202static struct event_constraint *
1203intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1204 struct perf_event *event)
1205{
1206 struct event_constraint *c = NULL;
1207
1208 if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
1209 c = __intel_shared_reg_get_constraints(cpuc, event);
1210
1211 return c;
1212}
1213
1214struct event_constraint *
1215x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1216{
1217 struct event_constraint *c;
1218
1219 if (x86_pmu.event_constraints) {
1220 for_each_event_constraint(c, x86_pmu.event_constraints) {
1221 if ((event->hw.config & c->cmask) == c->code)
1222 return c;
1093 } 1223 }
1094 raw_spin_unlock(&pc->lock);
1095 return c;
1096 } 1224 }
1097 1225
1098 return NULL; 1226 return &unconstrained;
1099} 1227}
1100 1228
1101static struct event_constraint * 1229static struct event_constraint *
@@ -1111,49 +1239,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
1111 if (c) 1239 if (c)
1112 return c; 1240 return c;
1113 1241
1114 c = intel_percore_constraints(cpuc, event); 1242 c = intel_shared_regs_constraints(cpuc, event);
1115 if (c) 1243 if (c)
1116 return c; 1244 return c;
1117 1245
1118 return x86_get_event_constraints(cpuc, event); 1246 return x86_get_event_constraints(cpuc, event);
1119} 1247}
1120 1248
1121static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1249static void
1250intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1122 struct perf_event *event) 1251 struct perf_event *event)
1123{ 1252{
1124 struct extra_reg *er; 1253 struct hw_perf_event_extra *reg;
1125 struct intel_percore *pc;
1126 struct er_account *era;
1127 struct hw_perf_event *hwc = &event->hw;
1128 int i, allref;
1129
1130 if (!cpuc->percore_used)
1131 return;
1132 1254
1133 for (er = x86_pmu.extra_regs; er->msr; er++) { 1255 reg = &event->hw.extra_reg;
1134 if (er->event != (hwc->config & er->config_mask)) 1256 if (reg->idx != EXTRA_REG_NONE)
1135 continue; 1257 __intel_shared_reg_put_constraints(cpuc, reg);
1258}
1136 1259
1137 pc = cpuc->per_core; 1260static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1138 raw_spin_lock(&pc->lock); 1261 struct perf_event *event)
1139 for (i = 0; i < MAX_EXTRA_REGS; i++) { 1262{
1140 era = &pc->regs[i]; 1263 intel_put_shared_regs_event_constraints(cpuc, event);
1141 if (era->ref > 0 &&
1142 era->extra_config == hwc->extra_config &&
1143 era->extra_reg == er->msr) {
1144 era->ref--;
1145 hwc->extra_alloc = 0;
1146 break;
1147 }
1148 }
1149 allref = 0;
1150 for (i = 0; i < MAX_EXTRA_REGS; i++)
1151 allref += pc->regs[i].ref;
1152 if (allref == 0)
1153 cpuc->percore_used = 0;
1154 raw_spin_unlock(&pc->lock);
1155 break;
1156 }
1157} 1264}
1158 1265
1159static int intel_pmu_hw_config(struct perf_event *event) 1266static int intel_pmu_hw_config(struct perf_event *event)
@@ -1206,12 +1313,84 @@ static int intel_pmu_hw_config(struct perf_event *event)
1206 return 0; 1313 return 0;
1207} 1314}
1208 1315
1316struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
1317{
1318 if (x86_pmu.guest_get_msrs)
1319 return x86_pmu.guest_get_msrs(nr);
1320 *nr = 0;
1321 return NULL;
1322}
1323EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
1324
1325static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
1326{
1327 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1328 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1329
1330 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
1331 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
1332 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
1333
1334 *nr = 1;
1335 return arr;
1336}
1337
1338static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
1339{
1340 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1341 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1342 int idx;
1343
1344 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1345 struct perf_event *event = cpuc->events[idx];
1346
1347 arr[idx].msr = x86_pmu_config_addr(idx);
1348 arr[idx].host = arr[idx].guest = 0;
1349
1350 if (!test_bit(idx, cpuc->active_mask))
1351 continue;
1352
1353 arr[idx].host = arr[idx].guest =
1354 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
1355
1356 if (event->attr.exclude_host)
1357 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1358 else if (event->attr.exclude_guest)
1359 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1360 }
1361
1362 *nr = x86_pmu.num_counters;
1363 return arr;
1364}
1365
1366static void core_pmu_enable_event(struct perf_event *event)
1367{
1368 if (!event->attr.exclude_host)
1369 x86_pmu_enable_event(event);
1370}
1371
1372static void core_pmu_enable_all(int added)
1373{
1374 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1375 int idx;
1376
1377 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1378 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
1379
1380 if (!test_bit(idx, cpuc->active_mask) ||
1381 cpuc->events[idx]->attr.exclude_host)
1382 continue;
1383
1384 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1385 }
1386}
1387
1209static __initconst const struct x86_pmu core_pmu = { 1388static __initconst const struct x86_pmu core_pmu = {
1210 .name = "core", 1389 .name = "core",
1211 .handle_irq = x86_pmu_handle_irq, 1390 .handle_irq = x86_pmu_handle_irq,
1212 .disable_all = x86_pmu_disable_all, 1391 .disable_all = x86_pmu_disable_all,
1213 .enable_all = x86_pmu_enable_all, 1392 .enable_all = core_pmu_enable_all,
1214 .enable = x86_pmu_enable_event, 1393 .enable = core_pmu_enable_event,
1215 .disable = x86_pmu_disable_event, 1394 .disable = x86_pmu_disable_event,
1216 .hw_config = x86_pmu_hw_config, 1395 .hw_config = x86_pmu_hw_config,
1217 .schedule_events = x86_schedule_events, 1396 .schedule_events = x86_schedule_events,
@@ -1229,22 +1408,39 @@ static __initconst const struct x86_pmu core_pmu = {
1229 .get_event_constraints = intel_get_event_constraints, 1408 .get_event_constraints = intel_get_event_constraints,
1230 .put_event_constraints = intel_put_event_constraints, 1409 .put_event_constraints = intel_put_event_constraints,
1231 .event_constraints = intel_core_event_constraints, 1410 .event_constraints = intel_core_event_constraints,
1411 .guest_get_msrs = core_guest_get_msrs,
1232}; 1412};
1233 1413
1414struct intel_shared_regs *allocate_shared_regs(int cpu)
1415{
1416 struct intel_shared_regs *regs;
1417 int i;
1418
1419 regs = kzalloc_node(sizeof(struct intel_shared_regs),
1420 GFP_KERNEL, cpu_to_node(cpu));
1421 if (regs) {
1422 /*
1423 * initialize the locks to keep lockdep happy
1424 */
1425 for (i = 0; i < EXTRA_REG_MAX; i++)
1426 raw_spin_lock_init(&regs->regs[i].lock);
1427
1428 regs->core_id = -1;
1429 }
1430 return regs;
1431}
1432
1234static int intel_pmu_cpu_prepare(int cpu) 1433static int intel_pmu_cpu_prepare(int cpu)
1235{ 1434{
1236 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1435 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1237 1436
1238 if (!cpu_has_ht_siblings()) 1437 if (!x86_pmu.extra_regs)
1239 return NOTIFY_OK; 1438 return NOTIFY_OK;
1240 1439
1241 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), 1440 cpuc->shared_regs = allocate_shared_regs(cpu);
1242 GFP_KERNEL, cpu_to_node(cpu)); 1441 if (!cpuc->shared_regs)
1243 if (!cpuc->per_core)
1244 return NOTIFY_BAD; 1442 return NOTIFY_BAD;
1245 1443
1246 raw_spin_lock_init(&cpuc->per_core->lock);
1247 cpuc->per_core->core_id = -1;
1248 return NOTIFY_OK; 1444 return NOTIFY_OK;
1249} 1445}
1250 1446
@@ -1260,32 +1456,34 @@ static void intel_pmu_cpu_starting(int cpu)
1260 */ 1456 */
1261 intel_pmu_lbr_reset(); 1457 intel_pmu_lbr_reset();
1262 1458
1263 if (!cpu_has_ht_siblings()) 1459 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
1264 return; 1460 return;
1265 1461
1266 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1462 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1267 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; 1463 struct intel_shared_regs *pc;
1268 1464
1465 pc = per_cpu(cpu_hw_events, i).shared_regs;
1269 if (pc && pc->core_id == core_id) { 1466 if (pc && pc->core_id == core_id) {
1270 kfree(cpuc->per_core); 1467 cpuc->kfree_on_online = cpuc->shared_regs;
1271 cpuc->per_core = pc; 1468 cpuc->shared_regs = pc;
1272 break; 1469 break;
1273 } 1470 }
1274 } 1471 }
1275 1472
1276 cpuc->per_core->core_id = core_id; 1473 cpuc->shared_regs->core_id = core_id;
1277 cpuc->per_core->refcnt++; 1474 cpuc->shared_regs->refcnt++;
1278} 1475}
1279 1476
1280static void intel_pmu_cpu_dying(int cpu) 1477static void intel_pmu_cpu_dying(int cpu)
1281{ 1478{
1282 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1479 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1283 struct intel_percore *pc = cpuc->per_core; 1480 struct intel_shared_regs *pc;
1284 1481
1482 pc = cpuc->shared_regs;
1285 if (pc) { 1483 if (pc) {
1286 if (pc->core_id == -1 || --pc->refcnt == 0) 1484 if (pc->core_id == -1 || --pc->refcnt == 0)
1287 kfree(pc); 1485 kfree(pc);
1288 cpuc->per_core = NULL; 1486 cpuc->shared_regs = NULL;
1289 } 1487 }
1290 1488
1291 fini_debug_store_on_cpu(cpu); 1489 fini_debug_store_on_cpu(cpu);
@@ -1317,6 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1317 .cpu_prepare = intel_pmu_cpu_prepare, 1515 .cpu_prepare = intel_pmu_cpu_prepare,
1318 .cpu_starting = intel_pmu_cpu_starting, 1516 .cpu_starting = intel_pmu_cpu_starting,
1319 .cpu_dying = intel_pmu_cpu_dying, 1517 .cpu_dying = intel_pmu_cpu_dying,
1518 .guest_get_msrs = intel_guest_get_msrs,
1320}; 1519};
1321 1520
1322static void intel_clovertown_quirks(void) 1521static void intel_clovertown_quirks(void)
@@ -1345,7 +1544,7 @@ static void intel_clovertown_quirks(void)
1345 x86_pmu.pebs_constraints = NULL; 1544 x86_pmu.pebs_constraints = NULL;
1346} 1545}
1347 1546
1348static __init int intel_pmu_init(void) 1547__init int intel_pmu_init(void)
1349{ 1548{
1350 union cpuid10_edx edx; 1549 union cpuid10_edx edx;
1351 union cpuid10_eax eax; 1550 union cpuid10_eax eax;
@@ -1436,7 +1635,6 @@ static __init int intel_pmu_init(void)
1436 1635
1437 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1636 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1438 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 1637 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1439 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1638 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1639 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442 1640
@@ -1481,10 +1679,10 @@ static __init int intel_pmu_init(void)
1481 intel_pmu_lbr_init_nhm(); 1679 intel_pmu_lbr_init_nhm();
1482 1680
1483 x86_pmu.event_constraints = intel_westmere_event_constraints; 1681 x86_pmu.event_constraints = intel_westmere_event_constraints;
1484 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1485 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1682 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1486 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1683 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1487 x86_pmu.extra_regs = intel_westmere_extra_regs; 1684 x86_pmu.extra_regs = intel_westmere_extra_regs;
1685 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1488 1686
1489 /* UOPS_ISSUED.STALLED_CYCLES */ 1687 /* UOPS_ISSUED.STALLED_CYCLES */
1490 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1688 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1495,13 +1693,18 @@ static __init int intel_pmu_init(void)
1495 break; 1693 break;
1496 1694
1497 case 42: /* SandyBridge */ 1695 case 42: /* SandyBridge */
1696 case 45: /* SandyBridge, "Romely-EP" */
1498 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1697 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1499 sizeof(hw_cache_event_ids)); 1698 sizeof(hw_cache_event_ids));
1500 1699
1501 intel_pmu_lbr_init_nhm(); 1700 intel_pmu_lbr_init_nhm();
1502 1701
1503 x86_pmu.event_constraints = intel_snb_event_constraints; 1702 x86_pmu.event_constraints = intel_snb_event_constraints;
1504 x86_pmu.pebs_constraints = intel_snb_pebs_events; 1703 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1704 x86_pmu.extra_regs = intel_snb_extra_regs;
1705 /* all extra regs are per-cpu when HT is on */
1706 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1707 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1505 1708
1506 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1709 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1507 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1710 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1512,20 +1715,19 @@ static __init int intel_pmu_init(void)
1512 break; 1715 break;
1513 1716
1514 default: 1717 default:
1515 /* 1718 switch (x86_pmu.version) {
1516 * default constraints for v2 and up 1719 case 1:
1517 */ 1720 x86_pmu.event_constraints = intel_v1_event_constraints;
1518 x86_pmu.event_constraints = intel_gen_event_constraints; 1721 pr_cont("generic architected perfmon v1, ");
1519 pr_cont("generic architected perfmon, "); 1722 break;
1723 default:
1724 /*
1725 * default constraints for v2 and up
1726 */
1727 x86_pmu.event_constraints = intel_gen_event_constraints;
1728 pr_cont("generic architected perfmon, ");
1729 break;
1730 }
1520 } 1731 }
1521 return 0; 1732 return 0;
1522} 1733}
1523
1524#else /* CONFIG_CPU_SUP_INTEL */
1525
1526static int intel_pmu_init(void)
1527{
1528 return 0;
1529}
1530
1531#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index bab491b8ee25..c0d238f49db8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1,7 +1,10 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#include <linux/bitops.h>
2#include <linux/types.h>
3#include <linux/slab.h>
2 4
3/* The maximal number of PEBS events: */ 5#include <asm/perf_event.h>
4#define MAX_PEBS_EVENTS 4 6
7#include "perf_event.h"
5 8
6/* The size of a BTS record in bytes: */ 9/* The size of a BTS record in bytes: */
7#define BTS_RECORD_SIZE 24 10#define BTS_RECORD_SIZE 24
@@ -37,24 +40,7 @@ struct pebs_record_nhm {
37 u64 status, dla, dse, lat; 40 u64 status, dla, dse, lat;
38}; 41};
39 42
40/* 43void init_debug_store_on_cpu(int cpu)
41 * A debug store configuration.
42 *
43 * We only support architectures that use 64bit fields.
44 */
45struct debug_store {
46 u64 bts_buffer_base;
47 u64 bts_index;
48 u64 bts_absolute_maximum;
49 u64 bts_interrupt_threshold;
50 u64 pebs_buffer_base;
51 u64 pebs_index;
52 u64 pebs_absolute_maximum;
53 u64 pebs_interrupt_threshold;
54 u64 pebs_event_reset[MAX_PEBS_EVENTS];
55};
56
57static void init_debug_store_on_cpu(int cpu)
58{ 44{
59 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 45 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
60 46
@@ -66,7 +52,7 @@ static void init_debug_store_on_cpu(int cpu)
66 (u32)((u64)(unsigned long)ds >> 32)); 52 (u32)((u64)(unsigned long)ds >> 32));
67} 53}
68 54
69static void fini_debug_store_on_cpu(int cpu) 55void fini_debug_store_on_cpu(int cpu)
70{ 56{
71 if (!per_cpu(cpu_hw_events, cpu).ds) 57 if (!per_cpu(cpu_hw_events, cpu).ds)
72 return; 58 return;
@@ -175,7 +161,7 @@ static void release_ds_buffer(int cpu)
175 kfree(ds); 161 kfree(ds);
176} 162}
177 163
178static void release_ds_buffers(void) 164void release_ds_buffers(void)
179{ 165{
180 int cpu; 166 int cpu;
181 167
@@ -194,7 +180,7 @@ static void release_ds_buffers(void)
194 put_online_cpus(); 180 put_online_cpus();
195} 181}
196 182
197static void reserve_ds_buffers(void) 183void reserve_ds_buffers(void)
198{ 184{
199 int bts_err = 0, pebs_err = 0; 185 int bts_err = 0, pebs_err = 0;
200 int cpu; 186 int cpu;
@@ -260,10 +246,10 @@ static void reserve_ds_buffers(void)
260 * BTS 246 * BTS
261 */ 247 */
262 248
263static struct event_constraint bts_constraint = 249struct event_constraint bts_constraint =
264 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); 250 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
265 251
266static void intel_pmu_enable_bts(u64 config) 252void intel_pmu_enable_bts(u64 config)
267{ 253{
268 unsigned long debugctlmsr; 254 unsigned long debugctlmsr;
269 255
@@ -282,7 +268,7 @@ static void intel_pmu_enable_bts(u64 config)
282 update_debugctlmsr(debugctlmsr); 268 update_debugctlmsr(debugctlmsr);
283} 269}
284 270
285static void intel_pmu_disable_bts(void) 271void intel_pmu_disable_bts(void)
286{ 272{
287 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 273 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
288 unsigned long debugctlmsr; 274 unsigned long debugctlmsr;
@@ -299,7 +285,7 @@ static void intel_pmu_disable_bts(void)
299 update_debugctlmsr(debugctlmsr); 285 update_debugctlmsr(debugctlmsr);
300} 286}
301 287
302static int intel_pmu_drain_bts_buffer(void) 288int intel_pmu_drain_bts_buffer(void)
303{ 289{
304 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 290 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
305 struct debug_store *ds = cpuc->ds; 291 struct debug_store *ds = cpuc->ds;
@@ -340,7 +326,7 @@ static int intel_pmu_drain_bts_buffer(void)
340 */ 326 */
341 perf_prepare_sample(&header, &data, event, &regs); 327 perf_prepare_sample(&header, &data, event, &regs);
342 328
343 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 329 if (perf_output_begin(&handle, event, header.size * (top - at)))
344 return 1; 330 return 1;
345 331
346 for (; at < top; at++) { 332 for (; at < top; at++) {
@@ -361,7 +347,7 @@ static int intel_pmu_drain_bts_buffer(void)
361/* 347/*
362 * PEBS 348 * PEBS
363 */ 349 */
364static struct event_constraint intel_core2_pebs_event_constraints[] = { 350struct event_constraint intel_core2_pebs_event_constraints[] = {
365 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 351 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
366 INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 352 INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
367 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 353 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
@@ -370,14 +356,14 @@ static struct event_constraint intel_core2_pebs_event_constraints[] = {
370 EVENT_CONSTRAINT_END 356 EVENT_CONSTRAINT_END
371}; 357};
372 358
373static struct event_constraint intel_atom_pebs_event_constraints[] = { 359struct event_constraint intel_atom_pebs_event_constraints[] = {
374 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 360 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
375 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 361 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
376 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 362 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
377 EVENT_CONSTRAINT_END 363 EVENT_CONSTRAINT_END
378}; 364};
379 365
380static struct event_constraint intel_nehalem_pebs_event_constraints[] = { 366struct event_constraint intel_nehalem_pebs_event_constraints[] = {
381 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 367 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
382 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 368 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
383 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 369 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
@@ -392,7 +378,7 @@ static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
392 EVENT_CONSTRAINT_END 378 EVENT_CONSTRAINT_END
393}; 379};
394 380
395static struct event_constraint intel_westmere_pebs_event_constraints[] = { 381struct event_constraint intel_westmere_pebs_event_constraints[] = {
396 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 382 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
397 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 383 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
398 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 384 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
@@ -407,7 +393,7 @@ static struct event_constraint intel_westmere_pebs_event_constraints[] = {
407 EVENT_CONSTRAINT_END 393 EVENT_CONSTRAINT_END
408}; 394};
409 395
410static struct event_constraint intel_snb_pebs_events[] = { 396struct event_constraint intel_snb_pebs_event_constraints[] = {
411 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 397 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
412 INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 398 INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 399 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
@@ -428,8 +414,7 @@ static struct event_constraint intel_snb_pebs_events[] = {
428 EVENT_CONSTRAINT_END 414 EVENT_CONSTRAINT_END
429}; 415};
430 416
431static struct event_constraint * 417struct event_constraint *intel_pebs_constraints(struct perf_event *event)
432intel_pebs_constraints(struct perf_event *event)
433{ 418{
434 struct event_constraint *c; 419 struct event_constraint *c;
435 420
@@ -446,7 +431,7 @@ intel_pebs_constraints(struct perf_event *event)
446 return &emptyconstraint; 431 return &emptyconstraint;
447} 432}
448 433
449static void intel_pmu_pebs_enable(struct perf_event *event) 434void intel_pmu_pebs_enable(struct perf_event *event)
450{ 435{
451 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 436 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
452 struct hw_perf_event *hwc = &event->hw; 437 struct hw_perf_event *hwc = &event->hw;
@@ -460,7 +445,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event)
460 intel_pmu_lbr_enable(event); 445 intel_pmu_lbr_enable(event);
461} 446}
462 447
463static void intel_pmu_pebs_disable(struct perf_event *event) 448void intel_pmu_pebs_disable(struct perf_event *event)
464{ 449{
465 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 450 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
466 struct hw_perf_event *hwc = &event->hw; 451 struct hw_perf_event *hwc = &event->hw;
@@ -475,7 +460,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event)
475 intel_pmu_lbr_disable(event); 460 intel_pmu_lbr_disable(event);
476} 461}
477 462
478static void intel_pmu_pebs_enable_all(void) 463void intel_pmu_pebs_enable_all(void)
479{ 464{
480 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 465 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
481 466
@@ -483,7 +468,7 @@ static void intel_pmu_pebs_enable_all(void)
483 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 468 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
484} 469}
485 470
486static void intel_pmu_pebs_disable_all(void) 471void intel_pmu_pebs_disable_all(void)
487{ 472{
488 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 473 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
489 474
@@ -576,8 +561,6 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
576 return 0; 561 return 0;
577} 562}
578 563
579static int intel_pmu_save_and_restart(struct perf_event *event);
580
581static void __intel_pmu_pebs_event(struct perf_event *event, 564static void __intel_pmu_pebs_event(struct perf_event *event,
582 struct pt_regs *iregs, void *__pebs) 565 struct pt_regs *iregs, void *__pebs)
583{ 566{
@@ -616,7 +599,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
616 else 599 else
617 regs.flags &= ~PERF_EFLAGS_EXACT; 600 regs.flags &= ~PERF_EFLAGS_EXACT;
618 601
619 if (perf_event_overflow(event, 1, &data, &regs)) 602 if (perf_event_overflow(event, &data, &regs))
620 x86_pmu_stop(event, 0); 603 x86_pmu_stop(event, 0);
621} 604}
622 605
@@ -716,7 +699,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
716 * BTS, PEBS probe and setup 699 * BTS, PEBS probe and setup
717 */ 700 */
718 701
719static void intel_ds_init(void) 702void intel_ds_init(void)
720{ 703{
721 /* 704 /*
722 * No support for 32bit formats 705 * No support for 32bit formats
@@ -749,15 +732,3 @@ static void intel_ds_init(void)
749 } 732 }
750 } 733 }
751} 734}
752
753#else /* CONFIG_CPU_SUP_INTEL */
754
755static void reserve_ds_buffers(void)
756{
757}
758
759static void release_ds_buffers(void)
760{
761}
762
763#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d202c1bece1a..3fab3de3ce96 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -1,4 +1,10 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#include <linux/perf_event.h>
2#include <linux/types.h>
3
4#include <asm/perf_event.h>
5#include <asm/msr.h>
6
7#include "perf_event.h"
2 8
3enum { 9enum {
4 LBR_FORMAT_32 = 0x00, 10 LBR_FORMAT_32 = 0x00,
@@ -48,7 +54,7 @@ static void intel_pmu_lbr_reset_64(void)
48 } 54 }
49} 55}
50 56
51static void intel_pmu_lbr_reset(void) 57void intel_pmu_lbr_reset(void)
52{ 58{
53 if (!x86_pmu.lbr_nr) 59 if (!x86_pmu.lbr_nr)
54 return; 60 return;
@@ -59,7 +65,7 @@ static void intel_pmu_lbr_reset(void)
59 intel_pmu_lbr_reset_64(); 65 intel_pmu_lbr_reset_64();
60} 66}
61 67
62static void intel_pmu_lbr_enable(struct perf_event *event) 68void intel_pmu_lbr_enable(struct perf_event *event)
63{ 69{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 70 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65 71
@@ -81,7 +87,7 @@ static void intel_pmu_lbr_enable(struct perf_event *event)
81 cpuc->lbr_users++; 87 cpuc->lbr_users++;
82} 88}
83 89
84static void intel_pmu_lbr_disable(struct perf_event *event) 90void intel_pmu_lbr_disable(struct perf_event *event)
85{ 91{
86 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 92 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
87 93
@@ -95,7 +101,7 @@ static void intel_pmu_lbr_disable(struct perf_event *event)
95 __intel_pmu_lbr_disable(); 101 __intel_pmu_lbr_disable();
96} 102}
97 103
98static void intel_pmu_lbr_enable_all(void) 104void intel_pmu_lbr_enable_all(void)
99{ 105{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 106 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101 107
@@ -103,7 +109,7 @@ static void intel_pmu_lbr_enable_all(void)
103 __intel_pmu_lbr_enable(); 109 __intel_pmu_lbr_enable();
104} 110}
105 111
106static void intel_pmu_lbr_disable_all(void) 112void intel_pmu_lbr_disable_all(void)
107{ 113{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 114 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109 115
@@ -178,7 +184,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
178 cpuc->lbr_stack.nr = i; 184 cpuc->lbr_stack.nr = i;
179} 185}
180 186
181static void intel_pmu_lbr_read(void) 187void intel_pmu_lbr_read(void)
182{ 188{
183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 189 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
184 190
@@ -191,7 +197,7 @@ static void intel_pmu_lbr_read(void)
191 intel_pmu_lbr_read_64(cpuc); 197 intel_pmu_lbr_read_64(cpuc);
192} 198}
193 199
194static void intel_pmu_lbr_init_core(void) 200void intel_pmu_lbr_init_core(void)
195{ 201{
196 x86_pmu.lbr_nr = 4; 202 x86_pmu.lbr_nr = 4;
197 x86_pmu.lbr_tos = 0x01c9; 203 x86_pmu.lbr_tos = 0x01c9;
@@ -199,7 +205,7 @@ static void intel_pmu_lbr_init_core(void)
199 x86_pmu.lbr_to = 0x60; 205 x86_pmu.lbr_to = 0x60;
200} 206}
201 207
202static void intel_pmu_lbr_init_nhm(void) 208void intel_pmu_lbr_init_nhm(void)
203{ 209{
204 x86_pmu.lbr_nr = 16; 210 x86_pmu.lbr_nr = 16;
205 x86_pmu.lbr_tos = 0x01c9; 211 x86_pmu.lbr_tos = 0x01c9;
@@ -207,12 +213,10 @@ static void intel_pmu_lbr_init_nhm(void)
207 x86_pmu.lbr_to = 0x6c0; 213 x86_pmu.lbr_to = 0x6c0;
208} 214}
209 215
210static void intel_pmu_lbr_init_atom(void) 216void intel_pmu_lbr_init_atom(void)
211{ 217{
212 x86_pmu.lbr_nr = 8; 218 x86_pmu.lbr_nr = 8;
213 x86_pmu.lbr_tos = 0x01c9; 219 x86_pmu.lbr_tos = 0x01c9;
214 x86_pmu.lbr_from = 0x40; 220 x86_pmu.lbr_from = 0x40;
215 x86_pmu.lbr_to = 0x60; 221 x86_pmu.lbr_to = 0x60;
216} 222}
217
218#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ead584fb6a7d..492bf1358a7c 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -7,9 +7,13 @@
7 * For licencing details see kernel-base/COPYING 7 * For licencing details see kernel-base/COPYING
8 */ 8 */
9 9
10#ifdef CONFIG_CPU_SUP_INTEL 10#include <linux/perf_event.h>
11 11
12#include <asm/perf_event_p4.h> 12#include <asm/perf_event_p4.h>
13#include <asm/hardirq.h>
14#include <asm/apic.h>
15
16#include "perf_event.h"
13 17
14#define P4_CNTR_LIMIT 3 18#define P4_CNTR_LIMIT 3
15/* 19/*
@@ -554,13 +558,102 @@ static __initconst const u64 p4_hw_cache_event_ids
554 [ C(RESULT_MISS) ] = -1, 558 [ C(RESULT_MISS) ] = -1,
555 }, 559 },
556 }, 560 },
561 [ C(NODE) ] = {
562 [ C(OP_READ) ] = {
563 [ C(RESULT_ACCESS) ] = -1,
564 [ C(RESULT_MISS) ] = -1,
565 },
566 [ C(OP_WRITE) ] = {
567 [ C(RESULT_ACCESS) ] = -1,
568 [ C(RESULT_MISS) ] = -1,
569 },
570 [ C(OP_PREFETCH) ] = {
571 [ C(RESULT_ACCESS) ] = -1,
572 [ C(RESULT_MISS) ] = -1,
573 },
574 },
557}; 575};
558 576
577/*
578 * Because of Netburst being quite restricted in how many
579 * identical events may run simultaneously, we introduce event aliases,
580 * ie the different events which have the same functionality but
581 * utilize non-intersected resources (ESCR/CCCR/counter registers).
582 *
583 * This allow us to relax restrictions a bit and run two or more
584 * identical events together.
585 *
586 * Never set any custom internal bits such as P4_CONFIG_HT,
587 * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
588 * either up to date automatically or not applicable at all.
589 */
590struct p4_event_alias {
591 u64 original;
592 u64 alternative;
593} p4_event_aliases[] = {
594 {
595 /*
596 * Non-halted cycles can be substituted with non-sleeping cycles (see
597 * Intel SDM Vol3b for details). We need this alias to be able
598 * to run nmi-watchdog and 'perf top' (or any other user space tool
599 * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
600 * simultaneously.
601 */
602 .original =
603 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
604 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
605 .alternative =
606 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
607 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
608 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
609 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
610 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
611 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
612 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
613 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
614 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
615 p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
616 P4_CCCR_COMPARE),
617 },
618};
619
620static u64 p4_get_alias_event(u64 config)
621{
622 u64 config_match;
623 int i;
624
625 /*
626 * Only event with special mark is allowed,
627 * we're to be sure it didn't come as malformed
628 * RAW event.
629 */
630 if (!(config & P4_CONFIG_ALIASABLE))
631 return 0;
632
633 config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
634
635 for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
636 if (config_match == p4_event_aliases[i].original) {
637 config_match = p4_event_aliases[i].alternative;
638 break;
639 } else if (config_match == p4_event_aliases[i].alternative) {
640 config_match = p4_event_aliases[i].original;
641 break;
642 }
643 }
644
645 if (i >= ARRAY_SIZE(p4_event_aliases))
646 return 0;
647
648 return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
649}
650
559static u64 p4_general_events[PERF_COUNT_HW_MAX] = { 651static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
560 /* non-halted CPU clocks */ 652 /* non-halted CPU clocks */
561 [PERF_COUNT_HW_CPU_CYCLES] = 653 [PERF_COUNT_HW_CPU_CYCLES] =
562 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | 654 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
563 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), 655 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
656 P4_CONFIG_ALIASABLE,
564 657
565 /* 658 /*
566 * retired instructions 659 * retired instructions
@@ -945,7 +1038,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
945 1038
946 if (!x86_perf_event_set_period(event)) 1039 if (!x86_perf_event_set_period(event))
947 continue; 1040 continue;
948 if (perf_event_overflow(event, 1, &data, regs)) 1041 if (perf_event_overflow(event, &data, regs))
949 x86_pmu_stop(event, 0); 1042 x86_pmu_stop(event, 0);
950 } 1043 }
951 1044
@@ -1120,6 +1213,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1120 struct p4_event_bind *bind; 1213 struct p4_event_bind *bind;
1121 unsigned int i, thread, num; 1214 unsigned int i, thread, num;
1122 int cntr_idx, escr_idx; 1215 int cntr_idx, escr_idx;
1216 u64 config_alias;
1217 int pass;
1123 1218
1124 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 1219 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1125 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); 1220 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
@@ -1128,6 +1223,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1128 1223
1129 hwc = &cpuc->event_list[i]->hw; 1224 hwc = &cpuc->event_list[i]->hw;
1130 thread = p4_ht_thread(cpu); 1225 thread = p4_ht_thread(cpu);
1226 pass = 0;
1227
1228again:
1229 /*
1230 * It's possible to hit a circular lock
1231 * between original and alternative events
1232 * if both are scheduled already.
1233 */
1234 if (pass > 2)
1235 goto done;
1236
1131 bind = p4_config_get_bind(hwc->config); 1237 bind = p4_config_get_bind(hwc->config);
1132 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); 1238 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
1133 if (unlikely(escr_idx == -1)) 1239 if (unlikely(escr_idx == -1))
@@ -1141,8 +1247,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1141 } 1247 }
1142 1248
1143 cntr_idx = p4_next_cntr(thread, used_mask, bind); 1249 cntr_idx = p4_next_cntr(thread, used_mask, bind);
1144 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) 1250 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
1145 goto done; 1251 /*
1252 * Check whether an event alias is still available.
1253 */
1254 config_alias = p4_get_alias_event(hwc->config);
1255 if (!config_alias)
1256 goto done;
1257 hwc->config = config_alias;
1258 pass++;
1259 goto again;
1260 }
1146 1261
1147 p4_pmu_swap_config_ts(hwc, cpu); 1262 p4_pmu_swap_config_ts(hwc, cpu);
1148 if (assign) 1263 if (assign)
@@ -1192,7 +1307,7 @@ static __initconst const struct x86_pmu p4_pmu = {
1192 .perfctr_second_write = 1, 1307 .perfctr_second_write = 1,
1193}; 1308};
1194 1309
1195static __init int p4_pmu_init(void) 1310__init int p4_pmu_init(void)
1196{ 1311{
1197 unsigned int low, high; 1312 unsigned int low, high;
1198 1313
@@ -1215,5 +1330,3 @@ static __init int p4_pmu_init(void)
1215 1330
1216 return 0; 1331 return 0;
1217} 1332}
1218
1219#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 20c097e33860..c7181befecde 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -1,4 +1,7 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#include <linux/perf_event.h>
2#include <linux/types.h>
3
4#include "perf_event.h"
2 5
3/* 6/*
4 * Not sure about some of these 7 * Not sure about some of these
@@ -114,7 +117,7 @@ static __initconst const struct x86_pmu p6_pmu = {
114 .event_constraints = p6_event_constraints, 117 .event_constraints = p6_event_constraints,
115}; 118};
116 119
117static __init int p6_pmu_init(void) 120__init int p6_pmu_init(void)
118{ 121{
119 switch (boot_cpu_data.x86_model) { 122 switch (boot_cpu_data.x86_model) {
120 case 1: 123 case 1:
@@ -138,5 +141,3 @@ static __init int p6_pmu_init(void)
138 141
139 return 0; 142 return 0;
140} 143}
141
142#endif /* CONFIG_CPU_SUP_INTEL */