diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/hypervisor.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 152 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 288 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 184 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 171 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 386 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 119 |
13 files changed, 804 insertions, 558 deletions
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 525514cf33c..46674fbb62b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -62,6 +62,8 @@ static void __init check_fpu(void) | |||
62 | return; | 62 | return; |
63 | } | 63 | } |
64 | 64 | ||
65 | kernel_fpu_begin(); | ||
66 | |||
65 | /* | 67 | /* |
66 | * trap_init() enabled FXSR and company _before_ testing for FP | 68 | * trap_init() enabled FXSR and company _before_ testing for FP |
67 | * problems here. | 69 | * problems here. |
@@ -80,6 +82,8 @@ static void __init check_fpu(void) | |||
80 | : "=m" (*&fdiv_bug) | 82 | : "=m" (*&fdiv_bug) |
81 | : "m" (*&x), "m" (*&y)); | 83 | : "m" (*&x), "m" (*&y)); |
82 | 84 | ||
85 | kernel_fpu_end(); | ||
86 | |||
83 | boot_cpu_data.fdiv_bug = fdiv_bug; | 87 | boot_cpu_data.fdiv_bug = fdiv_bug; |
84 | if (boot_cpu_data.fdiv_bug) | 88 | if (boot_cpu_data.fdiv_bug) |
85 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); | 89 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22a073d7fbf..62184390a60 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/topology.h> | 21 | #include <linux/topology.h> |
22 | #include <linux/cpumask.h> | 22 | #include <linux/cpumask.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/atomic.h> | 24 | #include <linux/atomic.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/setup.h> | 26 | #include <asm/setup.h> |
27 | #include <asm/apic.h> | 27 | #include <asm/apic.h> |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 8095f8611f8..755f64fb074 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -32,11 +32,11 @@ | |||
32 | */ | 32 | */ |
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | 33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = |
34 | { | 34 | { |
35 | &x86_hyper_vmware, | ||
36 | &x86_hyper_ms_hyperv, | ||
37 | #ifdef CONFIG_XEN_PVHVM | 35 | #ifdef CONFIG_XEN_PVHVM |
38 | &x86_hyper_xen_hvm, | 36 | &x86_hyper_xen_hvm, |
39 | #endif | 37 | #endif |
38 | &x86_hyper_vmware, | ||
39 | &x86_hyper_ms_hyperv, | ||
40 | }; | 40 | }; |
41 | 41 | ||
42 | const struct hypervisor_x86 *x86_hyper; | 42 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1edf5ba4fb2..ed6086eedf1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -456,6 +456,24 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
456 | 456 | ||
457 | if (cpu_has(c, X86_FEATURE_VMX)) | 457 | if (cpu_has(c, X86_FEATURE_VMX)) |
458 | detect_vmx_virtcap(c); | 458 | detect_vmx_virtcap(c); |
459 | |||
460 | /* | ||
461 | * Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not. | ||
462 | * x86_energy_perf_policy(8) is available to change it at run-time | ||
463 | */ | ||
464 | if (cpu_has(c, X86_FEATURE_EPB)) { | ||
465 | u64 epb; | ||
466 | |||
467 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | ||
468 | if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { | ||
469 | printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" | ||
470 | " Set to 'normal', was 'performance'\n" | ||
471 | "ENERGY_PERF_BIAS: View and update with" | ||
472 | " x86_energy_perf_policy(8)\n"); | ||
473 | epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; | ||
474 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | ||
475 | } | ||
476 | } | ||
459 | } | 477 | } |
460 | 478 | ||
461 | #ifdef CONFIG_X86_32 | 479 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 1e8d66c1336..7395d5f4272 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -43,61 +43,105 @@ static struct severity { | |||
43 | unsigned char covered; | 43 | unsigned char covered; |
44 | char *msg; | 44 | char *msg; |
45 | } severities[] = { | 45 | } severities[] = { |
46 | #define KERNEL .context = IN_KERNEL | 46 | #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } |
47 | #define USER .context = IN_USER | 47 | #define KERNEL .context = IN_KERNEL |
48 | #define SER .ser = SER_REQUIRED | 48 | #define USER .context = IN_USER |
49 | #define NOSER .ser = NO_SER | 49 | #define SER .ser = SER_REQUIRED |
50 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY | 50 | #define NOSER .ser = NO_SER |
51 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } | 51 | #define BITCLR(x) .mask = x, .result = 0 |
52 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } | 52 | #define BITSET(x) .mask = x, .result = x |
53 | #define MCGMASK(x, res, s, m, r...) \ | 53 | #define MCGMASK(x, y) .mcgmask = x, .mcgres = y |
54 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } | 54 | #define MASK(x, y) .mask = x, .result = y |
55 | #define MASK(x, y, s, m, r...) \ | ||
56 | { .mask = x, .result = y, SEV(s), .msg = m, ## r } | ||
57 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | 55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
58 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | 56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) |
59 | #define MCACOD 0xffff | 57 | #define MCACOD 0xffff |
60 | 58 | ||
61 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), | 59 | MCESEV( |
62 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), | 60 | NO, "Invalid", |
63 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), | 61 | BITCLR(MCI_STATUS_VAL) |
62 | ), | ||
63 | MCESEV( | ||
64 | NO, "Not enabled", | ||
65 | BITCLR(MCI_STATUS_EN) | ||
66 | ), | ||
67 | MCESEV( | ||
68 | PANIC, "Processor context corrupt", | ||
69 | BITSET(MCI_STATUS_PCC) | ||
70 | ), | ||
64 | /* When MCIP is not set something is very confused */ | 71 | /* When MCIP is not set something is very confused */ |
65 | MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), | 72 | MCESEV( |
73 | PANIC, "MCIP not set in MCA handler", | ||
74 | MCGMASK(MCG_STATUS_MCIP, 0) | ||
75 | ), | ||
66 | /* Neither return not error IP -- no chance to recover -> PANIC */ | 76 | /* Neither return not error IP -- no chance to recover -> PANIC */ |
67 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, | 77 | MCESEV( |
68 | "Neither restart nor error IP"), | 78 | PANIC, "Neither restart nor error IP", |
69 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", | 79 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) |
70 | KERNEL), | 80 | ), |
71 | BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), | 81 | MCESEV( |
72 | MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, | 82 | PANIC, "In kernel and no restart IP", |
73 | "Spurious not enabled", SER), | 83 | KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) |
84 | ), | ||
85 | MCESEV( | ||
86 | KEEP, "Corrected error", | ||
87 | NOSER, BITCLR(MCI_STATUS_UC) | ||
88 | ), | ||
74 | 89 | ||
75 | /* ignore OVER for UCNA */ | 90 | /* ignore OVER for UCNA */ |
76 | MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, | 91 | MCESEV( |
77 | "Uncorrected no action required", SER), | 92 | KEEP, "Uncorrected no action required", |
78 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, | 93 | SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) |
79 | "Illegal combination (UCNA with AR=1)", SER), | 94 | ), |
80 | MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), | 95 | MCESEV( |
96 | PANIC, "Illegal combination (UCNA with AR=1)", | ||
97 | SER, | ||
98 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR) | ||
99 | ), | ||
100 | MCESEV( | ||
101 | KEEP, "Non signalled machine check", | ||
102 | SER, BITCLR(MCI_STATUS_S) | ||
103 | ), | ||
81 | 104 | ||
82 | /* AR add known MCACODs here */ | 105 | /* AR add known MCACODs here */ |
83 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, | 106 | MCESEV( |
84 | "Action required with lost events", SER), | 107 | PANIC, "Action required with lost events", |
85 | MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, | 108 | SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) |
86 | "Action required; unknown MCACOD", SER), | 109 | ), |
110 | MCESEV( | ||
111 | PANIC, "Action required: unknown MCACOD", | ||
112 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) | ||
113 | ), | ||
87 | 114 | ||
88 | /* known AO MCACODs: */ | 115 | /* known AO MCACODs: */ |
89 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, | 116 | MCESEV( |
90 | "Action optional: memory scrubbing error", SER), | 117 | AO, "Action optional: memory scrubbing error", |
91 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, | 118 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) |
92 | "Action optional: last level cache writeback error", SER), | 119 | ), |
93 | 120 | MCESEV( | |
94 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, | 121 | AO, "Action optional: last level cache writeback error", |
95 | "Action optional unknown MCACOD", SER), | 122 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) |
96 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, | 123 | ), |
97 | "Action optional with lost events", SER), | 124 | MCESEV( |
98 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), | 125 | SOME, "Action optional: unknown MCACOD", |
99 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), | 126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) |
100 | BITSET(0, SOME, "No match") /* always matches. keep at end */ | 127 | ), |
128 | MCESEV( | ||
129 | SOME, "Action optional with lost events", | ||
130 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S) | ||
131 | ), | ||
132 | |||
133 | MCESEV( | ||
134 | PANIC, "Overflowed uncorrected", | ||
135 | BITSET(MCI_STATUS_OVER|MCI_STATUS_UC) | ||
136 | ), | ||
137 | MCESEV( | ||
138 | UC, "Uncorrected", | ||
139 | BITSET(MCI_STATUS_UC) | ||
140 | ), | ||
141 | MCESEV( | ||
142 | SOME, "No match", | ||
143 | BITSET(0) | ||
144 | ) /* always matches. keep at end */ | ||
101 | }; | 145 | }; |
102 | 146 | ||
103 | /* | 147 | /* |
@@ -112,15 +156,15 @@ static int error_context(struct mce *m) | |||
112 | return IN_KERNEL; | 156 | return IN_KERNEL; |
113 | } | 157 | } |
114 | 158 | ||
115 | int mce_severity(struct mce *a, int tolerant, char **msg) | 159 | int mce_severity(struct mce *m, int tolerant, char **msg) |
116 | { | 160 | { |
117 | enum context ctx = error_context(a); | 161 | enum context ctx = error_context(m); |
118 | struct severity *s; | 162 | struct severity *s; |
119 | 163 | ||
120 | for (s = severities;; s++) { | 164 | for (s = severities;; s++) { |
121 | if ((a->status & s->mask) != s->result) | 165 | if ((m->status & s->mask) != s->result) |
122 | continue; | 166 | continue; |
123 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) | 167 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) |
124 | continue; | 168 | continue; |
125 | if (s->ser == SER_REQUIRED && !mce_ser) | 169 | if (s->ser == SER_REQUIRED && !mce_ser) |
126 | continue; | 170 | continue; |
@@ -197,15 +241,15 @@ static const struct file_operations severities_coverage_fops = { | |||
197 | 241 | ||
198 | static int __init severities_debugfs_init(void) | 242 | static int __init severities_debugfs_init(void) |
199 | { | 243 | { |
200 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | 244 | struct dentry *dmce, *fsev; |
201 | 245 | ||
202 | dmce = mce_get_debugfs_dir(); | 246 | dmce = mce_get_debugfs_dir(); |
203 | if (dmce == NULL) | 247 | if (!dmce) |
204 | goto err_out; | 248 | goto err_out; |
205 | fseverities_coverage = debugfs_create_file("severities-coverage", | 249 | |
206 | 0444, dmce, NULL, | 250 | fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, |
207 | &severities_coverage_fops); | 251 | &severities_coverage_fops); |
208 | if (fseverities_coverage == NULL) | 252 | if (!fsev) |
209 | goto err_out; | 253 | goto err_out; |
210 | 254 | ||
211 | return 0; | 255 | return 0; |
@@ -214,4 +258,4 @@ err_out: | |||
214 | return -ENOMEM; | 258 | return -ENOMEM; |
215 | } | 259 | } |
216 | late_initcall(severities_debugfs_init); | 260 | late_initcall(severities_debugfs_init); |
217 | #endif | 261 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff1ae9b6464..08363b04212 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/thread_info.h> | 10 | #include <linux/thread_info.h> |
11 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
12 | #include <linux/miscdevice.h> | 12 | #include <linux/miscdevice.h> |
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/ratelimit.h> | 13 | #include <linux/ratelimit.h> |
15 | #include <linux/kallsyms.h> | 14 | #include <linux/kallsyms.h> |
16 | #include <linux/rcupdate.h> | 15 | #include <linux/rcupdate.h> |
@@ -38,23 +37,20 @@ | |||
38 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
39 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
40 | #include <linux/edac_mce.h> | 39 | #include <linux/edac_mce.h> |
40 | #include <linux/irq_work.h> | ||
41 | 41 | ||
42 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
43 | #include <asm/hw_irq.h> | ||
44 | #include <asm/apic.h> | ||
45 | #include <asm/idle.h> | ||
46 | #include <asm/ipi.h> | ||
47 | #include <asm/mce.h> | 43 | #include <asm/mce.h> |
48 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
49 | 45 | ||
50 | #include "mce-internal.h" | 46 | #include "mce-internal.h" |
51 | 47 | ||
52 | static DEFINE_MUTEX(mce_read_mutex); | 48 | static DEFINE_MUTEX(mce_chrdev_read_mutex); |
53 | 49 | ||
54 | #define rcu_dereference_check_mce(p) \ | 50 | #define rcu_dereference_check_mce(p) \ |
55 | rcu_dereference_index_check((p), \ | 51 | rcu_dereference_index_check((p), \ |
56 | rcu_read_lock_sched_held() || \ | 52 | rcu_read_lock_sched_held() || \ |
57 | lockdep_is_held(&mce_read_mutex)) | 53 | lockdep_is_held(&mce_chrdev_read_mutex)) |
58 | 54 | ||
59 | #define CREATE_TRACE_POINTS | 55 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/mce.h> | 56 | #include <trace/events/mce.h> |
@@ -94,7 +90,8 @@ static unsigned long mce_need_notify; | |||
94 | static char mce_helper[128]; | 90 | static char mce_helper[128]; |
95 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | 91 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
96 | 92 | ||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 93 | static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); |
94 | |||
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | 95 | static DEFINE_PER_CPU(struct mce, mces_seen); |
99 | static int cpu_missing; | 96 | static int cpu_missing; |
100 | 97 | ||
@@ -373,6 +370,31 @@ static void mce_wrmsrl(u32 msr, u64 v) | |||
373 | } | 370 | } |
374 | 371 | ||
375 | /* | 372 | /* |
373 | * Collect all global (w.r.t. this processor) status about this machine | ||
374 | * check into our "mce" struct so that we can use it later to assess | ||
375 | * the severity of the problem as we read per-bank specific details. | ||
376 | */ | ||
377 | static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | ||
378 | { | ||
379 | mce_setup(m); | ||
380 | |||
381 | m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
382 | if (regs) { | ||
383 | /* | ||
384 | * Get the address of the instruction at the time of | ||
385 | * the machine check error. | ||
386 | */ | ||
387 | if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { | ||
388 | m->ip = regs->ip; | ||
389 | m->cs = regs->cs; | ||
390 | } | ||
391 | /* Use accurate RIP reporting if available. */ | ||
392 | if (rip_msr) | ||
393 | m->ip = mce_rdmsrl(rip_msr); | ||
394 | } | ||
395 | } | ||
396 | |||
397 | /* | ||
376 | * Simple lockless ring to communicate PFNs from the exception handler with the | 398 | * Simple lockless ring to communicate PFNs from the exception handler with the |
377 | * process context work function. This is vastly simplified because there's | 399 | * process context work function. This is vastly simplified because there's |
378 | * only a single reader and a single writer. | 400 | * only a single reader and a single writer. |
@@ -443,40 +465,13 @@ static void mce_schedule_work(void) | |||
443 | } | 465 | } |
444 | } | 466 | } |
445 | 467 | ||
446 | /* | 468 | DEFINE_PER_CPU(struct irq_work, mce_irq_work); |
447 | * Get the address of the instruction at the time of the machine check | ||
448 | * error. | ||
449 | */ | ||
450 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
451 | { | ||
452 | |||
453 | if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { | ||
454 | m->ip = regs->ip; | ||
455 | m->cs = regs->cs; | ||
456 | } else { | ||
457 | m->ip = 0; | ||
458 | m->cs = 0; | ||
459 | } | ||
460 | if (rip_msr) | ||
461 | m->ip = mce_rdmsrl(rip_msr); | ||
462 | } | ||
463 | 469 | ||
464 | #ifdef CONFIG_X86_LOCAL_APIC | 470 | static void mce_irq_work_cb(struct irq_work *entry) |
465 | /* | ||
466 | * Called after interrupts have been reenabled again | ||
467 | * when a MCE happened during an interrupts off region | ||
468 | * in the kernel. | ||
469 | */ | ||
470 | asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) | ||
471 | { | 471 | { |
472 | ack_APIC_irq(); | ||
473 | exit_idle(); | ||
474 | irq_enter(); | ||
475 | mce_notify_irq(); | 472 | mce_notify_irq(); |
476 | mce_schedule_work(); | 473 | mce_schedule_work(); |
477 | irq_exit(); | ||
478 | } | 474 | } |
479 | #endif | ||
480 | 475 | ||
481 | static void mce_report_event(struct pt_regs *regs) | 476 | static void mce_report_event(struct pt_regs *regs) |
482 | { | 477 | { |
@@ -492,29 +487,7 @@ static void mce_report_event(struct pt_regs *regs) | |||
492 | return; | 487 | return; |
493 | } | 488 | } |
494 | 489 | ||
495 | #ifdef CONFIG_X86_LOCAL_APIC | 490 | irq_work_queue(&__get_cpu_var(mce_irq_work)); |
496 | /* | ||
497 | * Without APIC do not notify. The event will be picked | ||
498 | * up eventually. | ||
499 | */ | ||
500 | if (!cpu_has_apic) | ||
501 | return; | ||
502 | |||
503 | /* | ||
504 | * When interrupts are disabled we cannot use | ||
505 | * kernel services safely. Trigger an self interrupt | ||
506 | * through the APIC to instead do the notification | ||
507 | * after interrupts are reenabled again. | ||
508 | */ | ||
509 | apic->send_IPI_self(MCE_SELF_VECTOR); | ||
510 | |||
511 | /* | ||
512 | * Wait for idle afterwards again so that we don't leave the | ||
513 | * APIC in a non idle state because the normal APIC writes | ||
514 | * cannot exclude us. | ||
515 | */ | ||
516 | apic_wait_icr_idle(); | ||
517 | #endif | ||
518 | } | 491 | } |
519 | 492 | ||
520 | DEFINE_PER_CPU(unsigned, mce_poll_count); | 493 | DEFINE_PER_CPU(unsigned, mce_poll_count); |
@@ -541,9 +514,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
541 | 514 | ||
542 | percpu_inc(mce_poll_count); | 515 | percpu_inc(mce_poll_count); |
543 | 516 | ||
544 | mce_setup(&m); | 517 | mce_gather_info(&m, NULL); |
545 | 518 | ||
546 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
547 | for (i = 0; i < banks; i++) { | 519 | for (i = 0; i < banks; i++) { |
548 | if (!mce_banks[i].ctl || !test_bit(i, *b)) | 520 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
549 | continue; | 521 | continue; |
@@ -879,9 +851,9 @@ static int mce_usable_address(struct mce *m) | |||
879 | { | 851 | { |
880 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | 852 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) |
881 | return 0; | 853 | return 0; |
882 | if ((m->misc & 0x3f) > PAGE_SHIFT) | 854 | if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) |
883 | return 0; | 855 | return 0; |
884 | if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) | 856 | if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) |
885 | return 0; | 857 | return 0; |
886 | return 1; | 858 | return 1; |
887 | } | 859 | } |
@@ -942,9 +914,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
942 | if (!banks) | 914 | if (!banks) |
943 | goto out; | 915 | goto out; |
944 | 916 | ||
945 | mce_setup(&m); | 917 | mce_gather_info(&m, regs); |
946 | 918 | ||
947 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
948 | final = &__get_cpu_var(mces_seen); | 919 | final = &__get_cpu_var(mces_seen); |
949 | *final = m; | 920 | *final = m; |
950 | 921 | ||
@@ -1028,7 +999,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1028 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | 999 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) |
1029 | mce_ring_add(m.addr >> PAGE_SHIFT); | 1000 | mce_ring_add(m.addr >> PAGE_SHIFT); |
1030 | 1001 | ||
1031 | mce_get_rip(&m, regs); | ||
1032 | mce_log(&m); | 1002 | mce_log(&m); |
1033 | 1003 | ||
1034 | if (severity > worst) { | 1004 | if (severity > worst) { |
@@ -1190,7 +1160,8 @@ int mce_notify_irq(void) | |||
1190 | clear_thread_flag(TIF_MCE_NOTIFY); | 1160 | clear_thread_flag(TIF_MCE_NOTIFY); |
1191 | 1161 | ||
1192 | if (test_and_clear_bit(0, &mce_need_notify)) { | 1162 | if (test_and_clear_bit(0, &mce_need_notify)) { |
1193 | wake_up_interruptible(&mce_wait); | 1163 | /* wake processes polling /dev/mcelog */ |
1164 | wake_up_interruptible(&mce_chrdev_wait); | ||
1194 | 1165 | ||
1195 | /* | 1166 | /* |
1196 | * There is no risk of missing notifications because | 1167 | * There is no risk of missing notifications because |
@@ -1363,18 +1334,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1363 | return 0; | 1334 | return 0; |
1364 | } | 1335 | } |
1365 | 1336 | ||
1366 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) | 1337 | static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1367 | { | 1338 | { |
1368 | if (c->x86 != 5) | 1339 | if (c->x86 != 5) |
1369 | return; | 1340 | return 0; |
1341 | |||
1370 | switch (c->x86_vendor) { | 1342 | switch (c->x86_vendor) { |
1371 | case X86_VENDOR_INTEL: | 1343 | case X86_VENDOR_INTEL: |
1372 | intel_p5_mcheck_init(c); | 1344 | intel_p5_mcheck_init(c); |
1345 | return 1; | ||
1373 | break; | 1346 | break; |
1374 | case X86_VENDOR_CENTAUR: | 1347 | case X86_VENDOR_CENTAUR: |
1375 | winchip_mcheck_init(c); | 1348 | winchip_mcheck_init(c); |
1349 | return 1; | ||
1376 | break; | 1350 | break; |
1377 | } | 1351 | } |
1352 | |||
1353 | return 0; | ||
1378 | } | 1354 | } |
1379 | 1355 | ||
1380 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | 1356 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
@@ -1428,7 +1404,8 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1428 | if (mce_disabled) | 1404 | if (mce_disabled) |
1429 | return; | 1405 | return; |
1430 | 1406 | ||
1431 | __mcheck_cpu_ancient_init(c); | 1407 | if (__mcheck_cpu_ancient_init(c)) |
1408 | return; | ||
1432 | 1409 | ||
1433 | if (!mce_available(c)) | 1410 | if (!mce_available(c)) |
1434 | return; | 1411 | return; |
@@ -1444,44 +1421,45 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1444 | __mcheck_cpu_init_vendor(c); | 1421 | __mcheck_cpu_init_vendor(c); |
1445 | __mcheck_cpu_init_timer(); | 1422 | __mcheck_cpu_init_timer(); |
1446 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1423 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1447 | 1424 | init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); | |
1448 | } | 1425 | } |
1449 | 1426 | ||
1450 | /* | 1427 | /* |
1451 | * Character device to read and clear the MCE log. | 1428 | * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. |
1452 | */ | 1429 | */ |
1453 | 1430 | ||
1454 | static DEFINE_SPINLOCK(mce_state_lock); | 1431 | static DEFINE_SPINLOCK(mce_chrdev_state_lock); |
1455 | static int open_count; /* #times opened */ | 1432 | static int mce_chrdev_open_count; /* #times opened */ |
1456 | static int open_exclu; /* already open exclusive? */ | 1433 | static int mce_chrdev_open_exclu; /* already open exclusive? */ |
1457 | 1434 | ||
1458 | static int mce_open(struct inode *inode, struct file *file) | 1435 | static int mce_chrdev_open(struct inode *inode, struct file *file) |
1459 | { | 1436 | { |
1460 | spin_lock(&mce_state_lock); | 1437 | spin_lock(&mce_chrdev_state_lock); |
1461 | 1438 | ||
1462 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | 1439 | if (mce_chrdev_open_exclu || |
1463 | spin_unlock(&mce_state_lock); | 1440 | (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { |
1441 | spin_unlock(&mce_chrdev_state_lock); | ||
1464 | 1442 | ||
1465 | return -EBUSY; | 1443 | return -EBUSY; |
1466 | } | 1444 | } |
1467 | 1445 | ||
1468 | if (file->f_flags & O_EXCL) | 1446 | if (file->f_flags & O_EXCL) |
1469 | open_exclu = 1; | 1447 | mce_chrdev_open_exclu = 1; |
1470 | open_count++; | 1448 | mce_chrdev_open_count++; |
1471 | 1449 | ||
1472 | spin_unlock(&mce_state_lock); | 1450 | spin_unlock(&mce_chrdev_state_lock); |
1473 | 1451 | ||
1474 | return nonseekable_open(inode, file); | 1452 | return nonseekable_open(inode, file); |
1475 | } | 1453 | } |
1476 | 1454 | ||
1477 | static int mce_release(struct inode *inode, struct file *file) | 1455 | static int mce_chrdev_release(struct inode *inode, struct file *file) |
1478 | { | 1456 | { |
1479 | spin_lock(&mce_state_lock); | 1457 | spin_lock(&mce_chrdev_state_lock); |
1480 | 1458 | ||
1481 | open_count--; | 1459 | mce_chrdev_open_count--; |
1482 | open_exclu = 0; | 1460 | mce_chrdev_open_exclu = 0; |
1483 | 1461 | ||
1484 | spin_unlock(&mce_state_lock); | 1462 | spin_unlock(&mce_chrdev_state_lock); |
1485 | 1463 | ||
1486 | return 0; | 1464 | return 0; |
1487 | } | 1465 | } |
@@ -1530,8 +1508,8 @@ static int __mce_read_apei(char __user **ubuf, size_t usize) | |||
1530 | return 0; | 1508 | return 0; |
1531 | } | 1509 | } |
1532 | 1510 | ||
1533 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1511 | static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, |
1534 | loff_t *off) | 1512 | size_t usize, loff_t *off) |
1535 | { | 1513 | { |
1536 | char __user *buf = ubuf; | 1514 | char __user *buf = ubuf; |
1537 | unsigned long *cpu_tsc; | 1515 | unsigned long *cpu_tsc; |
@@ -1542,7 +1520,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1542 | if (!cpu_tsc) | 1520 | if (!cpu_tsc) |
1543 | return -ENOMEM; | 1521 | return -ENOMEM; |
1544 | 1522 | ||
1545 | mutex_lock(&mce_read_mutex); | 1523 | mutex_lock(&mce_chrdev_read_mutex); |
1546 | 1524 | ||
1547 | if (!mce_apei_read_done) { | 1525 | if (!mce_apei_read_done) { |
1548 | err = __mce_read_apei(&buf, usize); | 1526 | err = __mce_read_apei(&buf, usize); |
@@ -1562,19 +1540,18 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1562 | do { | 1540 | do { |
1563 | for (i = prev; i < next; i++) { | 1541 | for (i = prev; i < next; i++) { |
1564 | unsigned long start = jiffies; | 1542 | unsigned long start = jiffies; |
1543 | struct mce *m = &mcelog.entry[i]; | ||
1565 | 1544 | ||
1566 | while (!mcelog.entry[i].finished) { | 1545 | while (!m->finished) { |
1567 | if (time_after_eq(jiffies, start + 2)) { | 1546 | if (time_after_eq(jiffies, start + 2)) { |
1568 | memset(mcelog.entry + i, 0, | 1547 | memset(m, 0, sizeof(*m)); |
1569 | sizeof(struct mce)); | ||
1570 | goto timeout; | 1548 | goto timeout; |
1571 | } | 1549 | } |
1572 | cpu_relax(); | 1550 | cpu_relax(); |
1573 | } | 1551 | } |
1574 | smp_rmb(); | 1552 | smp_rmb(); |
1575 | err |= copy_to_user(buf, mcelog.entry + i, | 1553 | err |= copy_to_user(buf, m, sizeof(*m)); |
1576 | sizeof(struct mce)); | 1554 | buf += sizeof(*m); |
1577 | buf += sizeof(struct mce); | ||
1578 | timeout: | 1555 | timeout: |
1579 | ; | 1556 | ; |
1580 | } | 1557 | } |
@@ -1594,13 +1571,13 @@ timeout: | |||
1594 | on_each_cpu(collect_tscs, cpu_tsc, 1); | 1571 | on_each_cpu(collect_tscs, cpu_tsc, 1); |
1595 | 1572 | ||
1596 | for (i = next; i < MCE_LOG_LEN; i++) { | 1573 | for (i = next; i < MCE_LOG_LEN; i++) { |
1597 | if (mcelog.entry[i].finished && | 1574 | struct mce *m = &mcelog.entry[i]; |
1598 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | 1575 | |
1599 | err |= copy_to_user(buf, mcelog.entry+i, | 1576 | if (m->finished && m->tsc < cpu_tsc[m->cpu]) { |
1600 | sizeof(struct mce)); | 1577 | err |= copy_to_user(buf, m, sizeof(*m)); |
1601 | smp_rmb(); | 1578 | smp_rmb(); |
1602 | buf += sizeof(struct mce); | 1579 | buf += sizeof(*m); |
1603 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1580 | memset(m, 0, sizeof(*m)); |
1604 | } | 1581 | } |
1605 | } | 1582 | } |
1606 | 1583 | ||
@@ -1608,15 +1585,15 @@ timeout: | |||
1608 | err = -EFAULT; | 1585 | err = -EFAULT; |
1609 | 1586 | ||
1610 | out: | 1587 | out: |
1611 | mutex_unlock(&mce_read_mutex); | 1588 | mutex_unlock(&mce_chrdev_read_mutex); |
1612 | kfree(cpu_tsc); | 1589 | kfree(cpu_tsc); |
1613 | 1590 | ||
1614 | return err ? err : buf - ubuf; | 1591 | return err ? err : buf - ubuf; |
1615 | } | 1592 | } |
1616 | 1593 | ||
1617 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1594 | static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) |
1618 | { | 1595 | { |
1619 | poll_wait(file, &mce_wait, wait); | 1596 | poll_wait(file, &mce_chrdev_wait, wait); |
1620 | if (rcu_access_index(mcelog.next)) | 1597 | if (rcu_access_index(mcelog.next)) |
1621 | return POLLIN | POLLRDNORM; | 1598 | return POLLIN | POLLRDNORM; |
1622 | if (!mce_apei_read_done && apei_check_mce()) | 1599 | if (!mce_apei_read_done && apei_check_mce()) |
@@ -1624,7 +1601,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
1624 | return 0; | 1601 | return 0; |
1625 | } | 1602 | } |
1626 | 1603 | ||
1627 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | 1604 | static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, |
1605 | unsigned long arg) | ||
1628 | { | 1606 | { |
1629 | int __user *p = (int __user *)arg; | 1607 | int __user *p = (int __user *)arg; |
1630 | 1608 | ||
@@ -1652,16 +1630,16 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | |||
1652 | 1630 | ||
1653 | /* Modified in mce-inject.c, so not static or const */ | 1631 | /* Modified in mce-inject.c, so not static or const */ |
1654 | struct file_operations mce_chrdev_ops = { | 1632 | struct file_operations mce_chrdev_ops = { |
1655 | .open = mce_open, | 1633 | .open = mce_chrdev_open, |
1656 | .release = mce_release, | 1634 | .release = mce_chrdev_release, |
1657 | .read = mce_read, | 1635 | .read = mce_chrdev_read, |
1658 | .poll = mce_poll, | 1636 | .poll = mce_chrdev_poll, |
1659 | .unlocked_ioctl = mce_ioctl, | 1637 | .unlocked_ioctl = mce_chrdev_ioctl, |
1660 | .llseek = no_llseek, | 1638 | .llseek = no_llseek, |
1661 | }; | 1639 | }; |
1662 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | 1640 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); |
1663 | 1641 | ||
1664 | static struct miscdevice mce_log_device = { | 1642 | static struct miscdevice mce_chrdev_device = { |
1665 | MISC_MCELOG_MINOR, | 1643 | MISC_MCELOG_MINOR, |
1666 | "mcelog", | 1644 | "mcelog", |
1667 | &mce_chrdev_ops, | 1645 | &mce_chrdev_ops, |
@@ -1719,7 +1697,7 @@ int __init mcheck_init(void) | |||
1719 | } | 1697 | } |
1720 | 1698 | ||
1721 | /* | 1699 | /* |
1722 | * Sysfs support | 1700 | * mce_syscore: PM support |
1723 | */ | 1701 | */ |
1724 | 1702 | ||
1725 | /* | 1703 | /* |
@@ -1739,12 +1717,12 @@ static int mce_disable_error_reporting(void) | |||
1739 | return 0; | 1717 | return 0; |
1740 | } | 1718 | } |
1741 | 1719 | ||
1742 | static int mce_suspend(void) | 1720 | static int mce_syscore_suspend(void) |
1743 | { | 1721 | { |
1744 | return mce_disable_error_reporting(); | 1722 | return mce_disable_error_reporting(); |
1745 | } | 1723 | } |
1746 | 1724 | ||
1747 | static void mce_shutdown(void) | 1725 | static void mce_syscore_shutdown(void) |
1748 | { | 1726 | { |
1749 | mce_disable_error_reporting(); | 1727 | mce_disable_error_reporting(); |
1750 | } | 1728 | } |
@@ -1754,18 +1732,22 @@ static void mce_shutdown(void) | |||
1754 | * Only one CPU is active at this time, the others get re-added later using | 1732 | * Only one CPU is active at this time, the others get re-added later using |
1755 | * CPU hotplug: | 1733 | * CPU hotplug: |
1756 | */ | 1734 | */ |
1757 | static void mce_resume(void) | 1735 | static void mce_syscore_resume(void) |
1758 | { | 1736 | { |
1759 | __mcheck_cpu_init_generic(); | 1737 | __mcheck_cpu_init_generic(); |
1760 | __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); | 1738 | __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); |
1761 | } | 1739 | } |
1762 | 1740 | ||
1763 | static struct syscore_ops mce_syscore_ops = { | 1741 | static struct syscore_ops mce_syscore_ops = { |
1764 | .suspend = mce_suspend, | 1742 | .suspend = mce_syscore_suspend, |
1765 | .shutdown = mce_shutdown, | 1743 | .shutdown = mce_syscore_shutdown, |
1766 | .resume = mce_resume, | 1744 | .resume = mce_syscore_resume, |
1767 | }; | 1745 | }; |
1768 | 1746 | ||
1747 | /* | ||
1748 | * mce_sysdev: Sysfs support | ||
1749 | */ | ||
1750 | |||
1769 | static void mce_cpu_restart(void *data) | 1751 | static void mce_cpu_restart(void *data) |
1770 | { | 1752 | { |
1771 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1753 | del_timer_sync(&__get_cpu_var(mce_timer)); |
@@ -1801,11 +1783,11 @@ static void mce_enable_ce(void *all) | |||
1801 | __mcheck_cpu_init_timer(); | 1783 | __mcheck_cpu_init_timer(); |
1802 | } | 1784 | } |
1803 | 1785 | ||
1804 | static struct sysdev_class mce_sysclass = { | 1786 | static struct sysdev_class mce_sysdev_class = { |
1805 | .name = "machinecheck", | 1787 | .name = "machinecheck", |
1806 | }; | 1788 | }; |
1807 | 1789 | ||
1808 | DEFINE_PER_CPU(struct sys_device, mce_dev); | 1790 | DEFINE_PER_CPU(struct sys_device, mce_sysdev); |
1809 | 1791 | ||
1810 | __cpuinitdata | 1792 | __cpuinitdata |
1811 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1793 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
@@ -1934,7 +1916,7 @@ static struct sysdev_ext_attribute attr_cmci_disabled = { | |||
1934 | &mce_cmci_disabled | 1916 | &mce_cmci_disabled |
1935 | }; | 1917 | }; |
1936 | 1918 | ||
1937 | static struct sysdev_attribute *mce_attrs[] = { | 1919 | static struct sysdev_attribute *mce_sysdev_attrs[] = { |
1938 | &attr_tolerant.attr, | 1920 | &attr_tolerant.attr, |
1939 | &attr_check_interval.attr, | 1921 | &attr_check_interval.attr, |
1940 | &attr_trigger, | 1922 | &attr_trigger, |
@@ -1945,66 +1927,67 @@ static struct sysdev_attribute *mce_attrs[] = { | |||
1945 | NULL | 1927 | NULL |
1946 | }; | 1928 | }; |
1947 | 1929 | ||
1948 | static cpumask_var_t mce_dev_initialized; | 1930 | static cpumask_var_t mce_sysdev_initialized; |
1949 | 1931 | ||
1950 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | 1932 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ |
1951 | static __cpuinit int mce_create_device(unsigned int cpu) | 1933 | static __cpuinit int mce_sysdev_create(unsigned int cpu) |
1952 | { | 1934 | { |
1935 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | ||
1953 | int err; | 1936 | int err; |
1954 | int i, j; | 1937 | int i, j; |
1955 | 1938 | ||
1956 | if (!mce_available(&boot_cpu_data)) | 1939 | if (!mce_available(&boot_cpu_data)) |
1957 | return -EIO; | 1940 | return -EIO; |
1958 | 1941 | ||
1959 | memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); | 1942 | memset(&sysdev->kobj, 0, sizeof(struct kobject)); |
1960 | per_cpu(mce_dev, cpu).id = cpu; | 1943 | sysdev->id = cpu; |
1961 | per_cpu(mce_dev, cpu).cls = &mce_sysclass; | 1944 | sysdev->cls = &mce_sysdev_class; |
1962 | 1945 | ||
1963 | err = sysdev_register(&per_cpu(mce_dev, cpu)); | 1946 | err = sysdev_register(sysdev); |
1964 | if (err) | 1947 | if (err) |
1965 | return err; | 1948 | return err; |
1966 | 1949 | ||
1967 | for (i = 0; mce_attrs[i]; i++) { | 1950 | for (i = 0; mce_sysdev_attrs[i]; i++) { |
1968 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1951 | err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); |
1969 | if (err) | 1952 | if (err) |
1970 | goto error; | 1953 | goto error; |
1971 | } | 1954 | } |
1972 | for (j = 0; j < banks; j++) { | 1955 | for (j = 0; j < banks; j++) { |
1973 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1956 | err = sysdev_create_file(sysdev, &mce_banks[j].attr); |
1974 | &mce_banks[j].attr); | ||
1975 | if (err) | 1957 | if (err) |
1976 | goto error2; | 1958 | goto error2; |
1977 | } | 1959 | } |
1978 | cpumask_set_cpu(cpu, mce_dev_initialized); | 1960 | cpumask_set_cpu(cpu, mce_sysdev_initialized); |
1979 | 1961 | ||
1980 | return 0; | 1962 | return 0; |
1981 | error2: | 1963 | error2: |
1982 | while (--j >= 0) | 1964 | while (--j >= 0) |
1983 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); | 1965 | sysdev_remove_file(sysdev, &mce_banks[j].attr); |
1984 | error: | 1966 | error: |
1985 | while (--i >= 0) | 1967 | while (--i >= 0) |
1986 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1968 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); |
1987 | 1969 | ||
1988 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1970 | sysdev_unregister(sysdev); |
1989 | 1971 | ||
1990 | return err; | 1972 | return err; |
1991 | } | 1973 | } |
1992 | 1974 | ||
1993 | static __cpuinit void mce_remove_device(unsigned int cpu) | 1975 | static __cpuinit void mce_sysdev_remove(unsigned int cpu) |
1994 | { | 1976 | { |
1977 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | ||
1995 | int i; | 1978 | int i; |
1996 | 1979 | ||
1997 | if (!cpumask_test_cpu(cpu, mce_dev_initialized)) | 1980 | if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) |
1998 | return; | 1981 | return; |
1999 | 1982 | ||
2000 | for (i = 0; mce_attrs[i]; i++) | 1983 | for (i = 0; mce_sysdev_attrs[i]; i++) |
2001 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1984 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); |
2002 | 1985 | ||
2003 | for (i = 0; i < banks; i++) | 1986 | for (i = 0; i < banks; i++) |
2004 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); | 1987 | sysdev_remove_file(sysdev, &mce_banks[i].attr); |
2005 | 1988 | ||
2006 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1989 | sysdev_unregister(sysdev); |
2007 | cpumask_clear_cpu(cpu, mce_dev_initialized); | 1990 | cpumask_clear_cpu(cpu, mce_sysdev_initialized); |
2008 | } | 1991 | } |
2009 | 1992 | ||
2010 | /* Make sure there are no machine checks on offlined CPUs. */ | 1993 | /* Make sure there are no machine checks on offlined CPUs. */ |
@@ -2054,7 +2037,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2054 | switch (action) { | 2037 | switch (action) { |
2055 | case CPU_ONLINE: | 2038 | case CPU_ONLINE: |
2056 | case CPU_ONLINE_FROZEN: | 2039 | case CPU_ONLINE_FROZEN: |
2057 | mce_create_device(cpu); | 2040 | mce_sysdev_create(cpu); |
2058 | if (threshold_cpu_callback) | 2041 | if (threshold_cpu_callback) |
2059 | threshold_cpu_callback(action, cpu); | 2042 | threshold_cpu_callback(action, cpu); |
2060 | break; | 2043 | break; |
@@ -2062,7 +2045,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2062 | case CPU_DEAD_FROZEN: | 2045 | case CPU_DEAD_FROZEN: |
2063 | if (threshold_cpu_callback) | 2046 | if (threshold_cpu_callback) |
2064 | threshold_cpu_callback(action, cpu); | 2047 | threshold_cpu_callback(action, cpu); |
2065 | mce_remove_device(cpu); | 2048 | mce_sysdev_remove(cpu); |
2066 | break; | 2049 | break; |
2067 | case CPU_DOWN_PREPARE: | 2050 | case CPU_DOWN_PREPARE: |
2068 | case CPU_DOWN_PREPARE_FROZEN: | 2051 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -2116,27 +2099,28 @@ static __init int mcheck_init_device(void) | |||
2116 | if (!mce_available(&boot_cpu_data)) | 2099 | if (!mce_available(&boot_cpu_data)) |
2117 | return -EIO; | 2100 | return -EIO; |
2118 | 2101 | ||
2119 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 2102 | zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); |
2120 | 2103 | ||
2121 | mce_init_banks(); | 2104 | mce_init_banks(); |
2122 | 2105 | ||
2123 | err = sysdev_class_register(&mce_sysclass); | 2106 | err = sysdev_class_register(&mce_sysdev_class); |
2124 | if (err) | 2107 | if (err) |
2125 | return err; | 2108 | return err; |
2126 | 2109 | ||
2127 | for_each_online_cpu(i) { | 2110 | for_each_online_cpu(i) { |
2128 | err = mce_create_device(i); | 2111 | err = mce_sysdev_create(i); |
2129 | if (err) | 2112 | if (err) |
2130 | return err; | 2113 | return err; |
2131 | } | 2114 | } |
2132 | 2115 | ||
2133 | register_syscore_ops(&mce_syscore_ops); | 2116 | register_syscore_ops(&mce_syscore_ops); |
2134 | register_hotcpu_notifier(&mce_cpu_notifier); | 2117 | register_hotcpu_notifier(&mce_cpu_notifier); |
2135 | misc_register(&mce_log_device); | 2118 | |
2119 | /* register character device /dev/mcelog */ | ||
2120 | misc_register(&mce_chrdev_device); | ||
2136 | 2121 | ||
2137 | return err; | 2122 | return err; |
2138 | } | 2123 | } |
2139 | |||
2140 | device_initcall(mcheck_init_device); | 2124 | device_initcall(mcheck_init_device); |
2141 | 2125 | ||
2142 | /* | 2126 | /* |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index bb0adad3514..f5474218cff 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -548,7 +548,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
548 | if (!b) | 548 | if (!b) |
549 | goto out; | 549 | goto out; |
550 | 550 | ||
551 | err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, | 551 | err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, |
552 | b->kobj, name); | 552 | b->kobj, name); |
553 | if (err) | 553 | if (err) |
554 | goto out; | 554 | goto out; |
@@ -571,7 +571,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
571 | goto out; | 571 | goto out; |
572 | } | 572 | } |
573 | 573 | ||
574 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); | 574 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); |
575 | if (!b->kobj) | 575 | if (!b->kobj) |
576 | goto out_free; | 576 | goto out_free; |
577 | 577 | ||
@@ -591,7 +591,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
591 | if (i == cpu) | 591 | if (i == cpu) |
592 | continue; | 592 | continue; |
593 | 593 | ||
594 | err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, | 594 | err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, |
595 | b->kobj, name); | 595 | b->kobj, name); |
596 | if (err) | 596 | if (err) |
597 | goto out; | 597 | goto out; |
@@ -669,7 +669,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
669 | #ifdef CONFIG_SMP | 669 | #ifdef CONFIG_SMP |
670 | /* sibling symlink */ | 670 | /* sibling symlink */ |
671 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 671 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
672 | sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); | 672 | sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); |
673 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 673 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
674 | 674 | ||
675 | return; | 675 | return; |
@@ -681,7 +681,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
681 | if (i == cpu) | 681 | if (i == cpu) |
682 | continue; | 682 | continue; |
683 | 683 | ||
684 | sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); | 684 | sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); |
685 | per_cpu(threshold_banks, i)[bank] = NULL; | 685 | per_cpu(threshold_banks, i)[bank] = NULL; |
686 | } | 686 | } |
687 | 687 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 929739a653d..6b96110bb0c 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -79,7 +79,6 @@ void set_mtrr_ops(const struct mtrr_ops *ops) | |||
79 | static int have_wrcomb(void) | 79 | static int have_wrcomb(void) |
80 | { | 80 | { |
81 | struct pci_dev *dev; | 81 | struct pci_dev *dev; |
82 | u8 rev; | ||
83 | 82 | ||
84 | dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); | 83 | dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); |
85 | if (dev != NULL) { | 84 | if (dev != NULL) { |
@@ -89,13 +88,11 @@ static int have_wrcomb(void) | |||
89 | * chipsets to be tagged | 88 | * chipsets to be tagged |
90 | */ | 89 | */ |
91 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && | 90 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && |
92 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { | 91 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && |
93 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 92 | dev->revision <= 5) { |
94 | if (rev <= 5) { | 93 | pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); |
95 | pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); | 94 | pci_dev_put(dev); |
96 | pci_dev_put(dev); | 95 | return 0; |
97 | return 0; | ||
98 | } | ||
99 | } | 96 | } |
100 | /* | 97 | /* |
101 | * Intel 450NX errata # 23. Non ascending cacheline evictions to | 98 | * Intel 450NX errata # 23. Non ascending cacheline evictions to |
@@ -137,56 +134,42 @@ static void __init init_table(void) | |||
137 | } | 134 | } |
138 | 135 | ||
139 | struct set_mtrr_data { | 136 | struct set_mtrr_data { |
140 | atomic_t count; | ||
141 | atomic_t gate; | ||
142 | unsigned long smp_base; | 137 | unsigned long smp_base; |
143 | unsigned long smp_size; | 138 | unsigned long smp_size; |
144 | unsigned int smp_reg; | 139 | unsigned int smp_reg; |
145 | mtrr_type smp_type; | 140 | mtrr_type smp_type; |
146 | }; | 141 | }; |
147 | 142 | ||
148 | static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); | ||
149 | |||
150 | /** | 143 | /** |
151 | * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. | 144 | * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed |
145 | * by all the CPUs. | ||
152 | * @info: pointer to mtrr configuration data | 146 | * @info: pointer to mtrr configuration data |
153 | * | 147 | * |
154 | * Returns nothing. | 148 | * Returns nothing. |
155 | */ | 149 | */ |
156 | static int mtrr_work_handler(void *info) | 150 | static int mtrr_rendezvous_handler(void *info) |
157 | { | 151 | { |
158 | #ifdef CONFIG_SMP | ||
159 | struct set_mtrr_data *data = info; | 152 | struct set_mtrr_data *data = info; |
160 | unsigned long flags; | ||
161 | 153 | ||
162 | atomic_dec(&data->count); | 154 | /* |
163 | while (!atomic_read(&data->gate)) | 155 | * We use this same function to initialize the mtrrs during boot, |
164 | cpu_relax(); | 156 | * resume, runtime cpu online and on an explicit request to set a |
165 | 157 | * specific MTRR. | |
166 | local_irq_save(flags); | 158 | * |
167 | 159 | * During boot or suspend, the state of the boot cpu's mtrrs has been | |
168 | atomic_dec(&data->count); | 160 | * saved, and we want to replicate that across all the cpus that come |
169 | while (atomic_read(&data->gate)) | 161 | * online (either at the end of boot or resume or during a runtime cpu |
170 | cpu_relax(); | 162 | * online). If we're doing that, @reg is set to something special and on |
171 | 163 | * all the cpu's we do mtrr_if->set_all() (On the logical cpu that | |
172 | /* The master has cleared me to execute */ | 164 | * started the boot/resume sequence, this might be a duplicate |
165 | * set_all()). | ||
166 | */ | ||
173 | if (data->smp_reg != ~0U) { | 167 | if (data->smp_reg != ~0U) { |
174 | mtrr_if->set(data->smp_reg, data->smp_base, | 168 | mtrr_if->set(data->smp_reg, data->smp_base, |
175 | data->smp_size, data->smp_type); | 169 | data->smp_size, data->smp_type); |
176 | } else if (mtrr_aps_delayed_init) { | 170 | } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) { |
177 | /* | ||
178 | * Initialize the MTRRs inaddition to the synchronisation. | ||
179 | */ | ||
180 | mtrr_if->set_all(); | 171 | mtrr_if->set_all(); |
181 | } | 172 | } |
182 | |||
183 | atomic_dec(&data->count); | ||
184 | while (!atomic_read(&data->gate)) | ||
185 | cpu_relax(); | ||
186 | |||
187 | atomic_dec(&data->count); | ||
188 | local_irq_restore(flags); | ||
189 | #endif | ||
190 | return 0; | 173 | return 0; |
191 | } | 174 | } |
192 | 175 | ||
@@ -223,20 +206,11 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
223 | * 14. Wait for buddies to catch up | 206 | * 14. Wait for buddies to catch up |
224 | * 15. Enable interrupts. | 207 | * 15. Enable interrupts. |
225 | * | 208 | * |
226 | * What does that mean for us? Well, first we set data.count to the number | 209 | * What does that mean for us? Well, stop_machine() will ensure that |
227 | * of CPUs. As each CPU announces that it started the rendezvous handler by | 210 | * the rendezvous handler is started on each CPU. And in lockstep they |
228 | * decrementing the count, We reset data.count and set the data.gate flag | 211 | * do the state transition of disabling interrupts, updating MTRR's |
229 | * allowing all the cpu's to proceed with the work. As each cpu disables | 212 | * (the CPU vendors may each do it differently, so we call mtrr_if->set() |
230 | * interrupts, it'll decrement data.count once. We wait until it hits 0 and | 213 | * callback and let them take care of it.) and enabling interrupts. |
231 | * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they | ||
232 | * are waiting for that flag to be cleared. Once it's cleared, each | ||
233 | * CPU goes through the transition of updating MTRRs. | ||
234 | * The CPU vendors may each do it differently, | ||
235 | * so we call mtrr_if->set() callback and let them take care of it. | ||
236 | * When they're done, they again decrement data->count and wait for data.gate | ||
237 | * to be set. | ||
238 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | ||
239 | * Everyone then enables interrupts and we all continue on. | ||
240 | * | 214 | * |
241 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff | 215 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff |
242 | * becomes nops. | 216 | * becomes nops. |
@@ -244,92 +218,26 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
244 | static void | 218 | static void |
245 | set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) | 219 | set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) |
246 | { | 220 | { |
247 | struct set_mtrr_data data; | 221 | struct set_mtrr_data data = { .smp_reg = reg, |
248 | unsigned long flags; | 222 | .smp_base = base, |
249 | int cpu; | 223 | .smp_size = size, |
224 | .smp_type = type | ||
225 | }; | ||
250 | 226 | ||
251 | preempt_disable(); | 227 | stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); |
252 | 228 | } | |
253 | data.smp_reg = reg; | ||
254 | data.smp_base = base; | ||
255 | data.smp_size = size; | ||
256 | data.smp_type = type; | ||
257 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
258 | |||
259 | /* Make sure data.count is visible before unleashing other CPUs */ | ||
260 | smp_wmb(); | ||
261 | atomic_set(&data.gate, 0); | ||
262 | |||
263 | /* Start the ball rolling on other CPUs */ | ||
264 | for_each_online_cpu(cpu) { | ||
265 | struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); | ||
266 | |||
267 | if (cpu == smp_processor_id()) | ||
268 | continue; | ||
269 | |||
270 | stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); | ||
271 | } | ||
272 | |||
273 | |||
274 | while (atomic_read(&data.count)) | ||
275 | cpu_relax(); | ||
276 | |||
277 | /* Ok, reset count and toggle gate */ | ||
278 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
279 | smp_wmb(); | ||
280 | atomic_set(&data.gate, 1); | ||
281 | |||
282 | local_irq_save(flags); | ||
283 | |||
284 | while (atomic_read(&data.count)) | ||
285 | cpu_relax(); | ||
286 | |||
287 | /* Ok, reset count and toggle gate */ | ||
288 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
289 | smp_wmb(); | ||
290 | atomic_set(&data.gate, 0); | ||
291 | |||
292 | /* Do our MTRR business */ | ||
293 | |||
294 | /* | ||
295 | * HACK! | ||
296 | * | ||
297 | * We use this same function to initialize the mtrrs during boot, | ||
298 | * resume, runtime cpu online and on an explicit request to set a | ||
299 | * specific MTRR. | ||
300 | * | ||
301 | * During boot or suspend, the state of the boot cpu's mtrrs has been | ||
302 | * saved, and we want to replicate that across all the cpus that come | ||
303 | * online (either at the end of boot or resume or during a runtime cpu | ||
304 | * online). If we're doing that, @reg is set to something special and on | ||
305 | * this cpu we still do mtrr_if->set_all(). During boot/resume, this | ||
306 | * is unnecessary if at this point we are still on the cpu that started | ||
307 | * the boot/resume sequence. But there is no guarantee that we are still | ||
308 | * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be | ||
309 | * sure that we are in sync with everyone else. | ||
310 | */ | ||
311 | if (reg != ~0U) | ||
312 | mtrr_if->set(reg, base, size, type); | ||
313 | else | ||
314 | mtrr_if->set_all(); | ||
315 | |||
316 | /* Wait for the others */ | ||
317 | while (atomic_read(&data.count)) | ||
318 | cpu_relax(); | ||
319 | |||
320 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
321 | smp_wmb(); | ||
322 | atomic_set(&data.gate, 1); | ||
323 | |||
324 | /* | ||
325 | * Wait here for everyone to have seen the gate change | ||
326 | * So we're the last ones to touch 'data' | ||
327 | */ | ||
328 | while (atomic_read(&data.count)) | ||
329 | cpu_relax(); | ||
330 | 229 | ||
331 | local_irq_restore(flags); | 230 | static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, |
332 | preempt_enable(); | 231 | unsigned long size, mtrr_type type) |
232 | { | ||
233 | struct set_mtrr_data data = { .smp_reg = reg, | ||
234 | .smp_base = base, | ||
235 | .smp_size = size, | ||
236 | .smp_type = type | ||
237 | }; | ||
238 | |||
239 | stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data, | ||
240 | cpu_callout_mask); | ||
333 | } | 241 | } |
334 | 242 | ||
335 | /** | 243 | /** |
@@ -783,7 +691,7 @@ void mtrr_ap_init(void) | |||
783 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | 691 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug |
784 | * lock to prevent mtrr entry changes | 692 | * lock to prevent mtrr entry changes |
785 | */ | 693 | */ |
786 | set_mtrr(~0U, 0, 0, 0); | 694 | set_mtrr_from_inactive_cpu(~0U, 0, 0, 0); |
787 | } | 695 | } |
788 | 696 | ||
789 | /** | 697 | /** |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3a0338b4b17..cfa62ec090e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/highmem.h> | ||
26 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
27 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
28 | 27 | ||
@@ -45,38 +44,27 @@ do { \ | |||
45 | #endif | 44 | #endif |
46 | 45 | ||
47 | /* | 46 | /* |
48 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | 47 | * | NHM/WSM | SNB | |
48 | * register ------------------------------- | ||
49 | * | HT | no HT | HT | no HT | | ||
50 | *----------------------------------------- | ||
51 | * offcore | core | core | cpu | core | | ||
52 | * lbr_sel | core | core | cpu | core | | ||
53 | * ld_lat | cpu | core | cpu | core | | ||
54 | *----------------------------------------- | ||
55 | * | ||
56 | * Given that there is a small number of shared regs, | ||
57 | * we can pre-allocate their slot in the per-cpu | ||
58 | * per-core reg tables. | ||
49 | */ | 59 | */ |
50 | static unsigned long | 60 | enum extra_reg_type { |
51 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | 61 | EXTRA_REG_NONE = -1, /* not used */ |
52 | { | ||
53 | unsigned long offset, addr = (unsigned long)from; | ||
54 | unsigned long size, len = 0; | ||
55 | struct page *page; | ||
56 | void *map; | ||
57 | int ret; | ||
58 | |||
59 | do { | ||
60 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
61 | if (!ret) | ||
62 | break; | ||
63 | |||
64 | offset = addr & (PAGE_SIZE - 1); | ||
65 | size = min(PAGE_SIZE - offset, n - len); | ||
66 | |||
67 | map = kmap_atomic(page); | ||
68 | memcpy(to, map+offset, size); | ||
69 | kunmap_atomic(map); | ||
70 | put_page(page); | ||
71 | 62 | ||
72 | len += size; | 63 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
73 | to += size; | 64 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
74 | addr += size; | ||
75 | 65 | ||
76 | } while (len < n); | 66 | EXTRA_REG_MAX /* number of entries needed */ |
77 | 67 | }; | |
78 | return len; | ||
79 | } | ||
80 | 68 | ||
81 | struct event_constraint { | 69 | struct event_constraint { |
82 | union { | 70 | union { |
@@ -132,11 +120,10 @@ struct cpu_hw_events { | |||
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 120 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
133 | 121 | ||
134 | /* | 122 | /* |
135 | * Intel percore register state. | 123 | * manage shared (per-core, per-cpu) registers |
136 | * Coordinate shared resources between HT threads. | 124 | * used on Intel NHM/WSM/SNB |
137 | */ | 125 | */ |
138 | int percore_used; /* Used by this CPU? */ | 126 | struct intel_shared_regs *shared_regs; |
139 | struct intel_percore *per_core; | ||
140 | 127 | ||
141 | /* | 128 | /* |
142 | * AMD specific bits | 129 | * AMD specific bits |
@@ -187,26 +174,45 @@ struct cpu_hw_events { | |||
187 | for ((e) = (c); (e)->weight; (e)++) | 174 | for ((e) = (c); (e)->weight; (e)++) |
188 | 175 | ||
189 | /* | 176 | /* |
177 | * Per register state. | ||
178 | */ | ||
179 | struct er_account { | ||
180 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
181 | u64 config; /* extra MSR config */ | ||
182 | u64 reg; /* extra MSR number */ | ||
183 | atomic_t ref; /* reference count */ | ||
184 | }; | ||
185 | |||
186 | /* | ||
190 | * Extra registers for specific events. | 187 | * Extra registers for specific events. |
188 | * | ||
191 | * Some events need large masks and require external MSRs. | 189 | * Some events need large masks and require external MSRs. |
192 | * Define a mapping to these extra registers. | 190 | * Those extra MSRs end up being shared for all events on |
191 | * a PMU and sometimes between PMU of sibling HT threads. | ||
192 | * In either case, the kernel needs to handle conflicting | ||
193 | * accesses to those extra, shared, regs. The data structure | ||
194 | * to manage those registers is stored in cpu_hw_event. | ||
193 | */ | 195 | */ |
194 | struct extra_reg { | 196 | struct extra_reg { |
195 | unsigned int event; | 197 | unsigned int event; |
196 | unsigned int msr; | 198 | unsigned int msr; |
197 | u64 config_mask; | 199 | u64 config_mask; |
198 | u64 valid_mask; | 200 | u64 valid_mask; |
201 | int idx; /* per_xxx->regs[] reg index */ | ||
199 | }; | 202 | }; |
200 | 203 | ||
201 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | 204 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
202 | .event = (e), \ | 205 | .event = (e), \ |
203 | .msr = (ms), \ | 206 | .msr = (ms), \ |
204 | .config_mask = (m), \ | 207 | .config_mask = (m), \ |
205 | .valid_mask = (vm), \ | 208 | .valid_mask = (vm), \ |
209 | .idx = EXTRA_REG_##i \ | ||
206 | } | 210 | } |
207 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | 211 | |
208 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | 212 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
209 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | 213 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
214 | |||
215 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
210 | 216 | ||
211 | union perf_capabilities { | 217 | union perf_capabilities { |
212 | struct { | 218 | struct { |
@@ -252,7 +258,6 @@ struct x86_pmu { | |||
252 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 258 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
253 | struct perf_event *event); | 259 | struct perf_event *event); |
254 | struct event_constraint *event_constraints; | 260 | struct event_constraint *event_constraints; |
255 | struct event_constraint *percore_constraints; | ||
256 | void (*quirks)(void); | 261 | void (*quirks)(void); |
257 | int perfctr_second_write; | 262 | int perfctr_second_write; |
258 | 263 | ||
@@ -286,8 +291,12 @@ struct x86_pmu { | |||
286 | * Extra registers for events | 291 | * Extra registers for events |
287 | */ | 292 | */ |
288 | struct extra_reg *extra_regs; | 293 | struct extra_reg *extra_regs; |
294 | unsigned int er_flags; | ||
289 | }; | 295 | }; |
290 | 296 | ||
297 | #define ERF_NO_HT_SHARING 1 | ||
298 | #define ERF_HAS_RSP_1 2 | ||
299 | |||
291 | static struct x86_pmu x86_pmu __read_mostly; | 300 | static struct x86_pmu x86_pmu __read_mostly; |
292 | 301 | ||
293 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 302 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index) | |||
393 | */ | 402 | */ |
394 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | 403 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) |
395 | { | 404 | { |
405 | struct hw_perf_event_extra *reg; | ||
396 | struct extra_reg *er; | 406 | struct extra_reg *er; |
397 | 407 | ||
398 | event->hw.extra_reg = 0; | 408 | reg = &event->hw.extra_reg; |
399 | event->hw.extra_config = 0; | ||
400 | 409 | ||
401 | if (!x86_pmu.extra_regs) | 410 | if (!x86_pmu.extra_regs) |
402 | return 0; | 411 | return 0; |
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | |||
406 | continue; | 415 | continue; |
407 | if (event->attr.config1 & ~er->valid_mask) | 416 | if (event->attr.config1 & ~er->valid_mask) |
408 | return -EINVAL; | 417 | return -EINVAL; |
409 | event->hw.extra_reg = er->msr; | 418 | |
410 | event->hw.extra_config = event->attr.config1; | 419 | reg->idx = er->idx; |
420 | reg->config = event->attr.config1; | ||
421 | reg->reg = er->msr; | ||
411 | break; | 422 | break; |
412 | } | 423 | } |
413 | return 0; | 424 | return 0; |
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
706 | event->hw.last_cpu = -1; | 717 | event->hw.last_cpu = -1; |
707 | event->hw.last_tag = ~0ULL; | 718 | event->hw.last_tag = ~0ULL; |
708 | 719 | ||
720 | /* mark unused */ | ||
721 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
722 | |||
709 | return x86_pmu.hw_config(event); | 723 | return x86_pmu.hw_config(event); |
710 | } | 724 | } |
711 | 725 | ||
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
747 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 761 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
748 | u64 enable_mask) | 762 | u64 enable_mask) |
749 | { | 763 | { |
750 | if (hwc->extra_reg) | 764 | if (hwc->extra_reg.reg) |
751 | wrmsrl(hwc->extra_reg, hwc->extra_config); | 765 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); |
752 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 766 | wrmsrl(hwc->config_base, hwc->config | enable_mask); |
753 | } | 767 | } |
754 | 768 | ||
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1332 | if (!x86_perf_event_set_period(event)) | 1346 | if (!x86_perf_event_set_period(event)) |
1333 | continue; | 1347 | continue; |
1334 | 1348 | ||
1335 | if (perf_event_overflow(event, 1, &data, regs)) | 1349 | if (perf_event_overflow(event, &data, regs)) |
1336 | x86_pmu_stop(event, 0); | 1350 | x86_pmu_stop(event, 0); |
1337 | } | 1351 | } |
1338 | 1352 | ||
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu) | |||
1637 | perf_pmu_enable(pmu); | 1651 | perf_pmu_enable(pmu); |
1638 | return 0; | 1652 | return 0; |
1639 | } | 1653 | } |
1654 | /* | ||
1655 | * a fake_cpuc is used to validate event groups. Due to | ||
1656 | * the extra reg logic, we need to also allocate a fake | ||
1657 | * per_core and per_cpu structure. Otherwise, group events | ||
1658 | * using extra reg may conflict without the kernel being | ||
1659 | * able to catch this when the last event gets added to | ||
1660 | * the group. | ||
1661 | */ | ||
1662 | static void free_fake_cpuc(struct cpu_hw_events *cpuc) | ||
1663 | { | ||
1664 | kfree(cpuc->shared_regs); | ||
1665 | kfree(cpuc); | ||
1666 | } | ||
1667 | |||
1668 | static struct cpu_hw_events *allocate_fake_cpuc(void) | ||
1669 | { | ||
1670 | struct cpu_hw_events *cpuc; | ||
1671 | int cpu = raw_smp_processor_id(); | ||
1672 | |||
1673 | cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); | ||
1674 | if (!cpuc) | ||
1675 | return ERR_PTR(-ENOMEM); | ||
1676 | |||
1677 | /* only needed, if we have extra_regs */ | ||
1678 | if (x86_pmu.extra_regs) { | ||
1679 | cpuc->shared_regs = allocate_shared_regs(cpu); | ||
1680 | if (!cpuc->shared_regs) | ||
1681 | goto error; | ||
1682 | } | ||
1683 | return cpuc; | ||
1684 | error: | ||
1685 | free_fake_cpuc(cpuc); | ||
1686 | return ERR_PTR(-ENOMEM); | ||
1687 | } | ||
1640 | 1688 | ||
1641 | /* | 1689 | /* |
1642 | * validate that we can schedule this event | 1690 | * validate that we can schedule this event |
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event) | |||
1647 | struct event_constraint *c; | 1695 | struct event_constraint *c; |
1648 | int ret = 0; | 1696 | int ret = 0; |
1649 | 1697 | ||
1650 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | 1698 | fake_cpuc = allocate_fake_cpuc(); |
1651 | if (!fake_cpuc) | 1699 | if (IS_ERR(fake_cpuc)) |
1652 | return -ENOMEM; | 1700 | return PTR_ERR(fake_cpuc); |
1653 | 1701 | ||
1654 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | 1702 | c = x86_pmu.get_event_constraints(fake_cpuc, event); |
1655 | 1703 | ||
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event) | |||
1659 | if (x86_pmu.put_event_constraints) | 1707 | if (x86_pmu.put_event_constraints) |
1660 | x86_pmu.put_event_constraints(fake_cpuc, event); | 1708 | x86_pmu.put_event_constraints(fake_cpuc, event); |
1661 | 1709 | ||
1662 | kfree(fake_cpuc); | 1710 | free_fake_cpuc(fake_cpuc); |
1663 | 1711 | ||
1664 | return ret; | 1712 | return ret; |
1665 | } | 1713 | } |
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event) | |||
1679 | { | 1727 | { |
1680 | struct perf_event *leader = event->group_leader; | 1728 | struct perf_event *leader = event->group_leader; |
1681 | struct cpu_hw_events *fake_cpuc; | 1729 | struct cpu_hw_events *fake_cpuc; |
1682 | int ret, n; | 1730 | int ret = -ENOSPC, n; |
1683 | |||
1684 | ret = -ENOMEM; | ||
1685 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1686 | if (!fake_cpuc) | ||
1687 | goto out; | ||
1688 | 1731 | ||
1732 | fake_cpuc = allocate_fake_cpuc(); | ||
1733 | if (IS_ERR(fake_cpuc)) | ||
1734 | return PTR_ERR(fake_cpuc); | ||
1689 | /* | 1735 | /* |
1690 | * the event is not yet connected with its | 1736 | * the event is not yet connected with its |
1691 | * siblings therefore we must first collect | 1737 | * siblings therefore we must first collect |
1692 | * existing siblings, then add the new event | 1738 | * existing siblings, then add the new event |
1693 | * before we can simulate the scheduling | 1739 | * before we can simulate the scheduling |
1694 | */ | 1740 | */ |
1695 | ret = -ENOSPC; | ||
1696 | n = collect_events(fake_cpuc, leader, true); | 1741 | n = collect_events(fake_cpuc, leader, true); |
1697 | if (n < 0) | 1742 | if (n < 0) |
1698 | goto out_free; | 1743 | goto out; |
1699 | 1744 | ||
1700 | fake_cpuc->n_events = n; | 1745 | fake_cpuc->n_events = n; |
1701 | n = collect_events(fake_cpuc, event, false); | 1746 | n = collect_events(fake_cpuc, event, false); |
1702 | if (n < 0) | 1747 | if (n < 0) |
1703 | goto out_free; | 1748 | goto out; |
1704 | 1749 | ||
1705 | fake_cpuc->n_events = n; | 1750 | fake_cpuc->n_events = n; |
1706 | 1751 | ||
1707 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); | 1752 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1708 | 1753 | ||
1709 | out_free: | ||
1710 | kfree(fake_cpuc); | ||
1711 | out: | 1754 | out: |
1755 | free_fake_cpuc(fake_cpuc); | ||
1712 | return ret; | 1756 | return ret; |
1713 | } | 1757 | } |
1714 | 1758 | ||
@@ -1856,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1856 | 1900 | ||
1857 | perf_callchain_store(entry, regs->ip); | 1901 | perf_callchain_store(entry, regs->ip); |
1858 | 1902 | ||
1903 | if (!current->mm) | ||
1904 | return; | ||
1905 | |||
1859 | if (perf_callchain_user32(regs, entry)) | 1906 | if (perf_callchain_user32(regs, entry)) |
1860 | return; | 1907 | return; |
1861 | 1908 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index fe29c1d2219..941caa2e449 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
89 | [ C(RESULT_MISS) ] = -1, | 89 | [ C(RESULT_MISS) ] = -1, |
90 | }, | 90 | }, |
91 | }, | 91 | }, |
92 | [ C(NODE) ] = { | ||
93 | [ C(OP_READ) ] = { | ||
94 | [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ | ||
95 | [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ | ||
96 | }, | ||
97 | [ C(OP_WRITE) ] = { | ||
98 | [ C(RESULT_ACCESS) ] = -1, | ||
99 | [ C(RESULT_MISS) ] = -1, | ||
100 | }, | ||
101 | [ C(OP_PREFETCH) ] = { | ||
102 | [ C(RESULT_ACCESS) ] = -1, | ||
103 | [ C(RESULT_MISS) ] = -1, | ||
104 | }, | ||
105 | }, | ||
92 | }; | 106 | }; |
93 | 107 | ||
94 | /* | 108 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 41178c826c4..f88af2c2a56 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,25 +1,15 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | 3 | /* |
15 | * Per core state | 4 | * Per core/cpu state |
16 | * This used to coordinate shared registers for HT threads. | 5 | * |
6 | * Used to coordinate shared registers between HT threads or | ||
7 | * among events on a single PMU. | ||
17 | */ | 8 | */ |
18 | struct intel_percore { | 9 | struct intel_shared_regs { |
19 | raw_spinlock_t lock; /* protect structure */ | 10 | struct er_account regs[EXTRA_REG_MAX]; |
20 | struct er_account regs[MAX_EXTRA_REGS]; | 11 | int refcnt; /* per-core: #HT threads */ |
21 | int refcnt; /* number of threads */ | 12 | unsigned core_id; /* per-core: core id */ |
22 | unsigned core_id; | ||
23 | }; | 13 | }; |
24 | 14 | ||
25 | /* | 15 | /* |
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
88 | 78 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 79 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
90 | { | 80 | { |
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 81 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
92 | EVENT_EXTRA_END | 82 | EVENT_EXTRA_END |
93 | }; | 83 | }; |
94 | 84 | ||
95 | static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
101 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = | 85 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
102 | { | 86 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 100 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
117 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 101 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ |
118 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | 102 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
119 | INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ | ||
120 | INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ | ||
121 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 103 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
122 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 104 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
123 | EVENT_CONSTRAINT_END | 105 | EVENT_CONSTRAINT_END |
@@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
125 | 107 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | 108 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
127 | { | 109 | { |
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 110 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | 111 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
130 | EVENT_EXTRA_END | 112 | EVENT_EXTRA_END |
131 | }; | 113 | }; |
132 | 114 | ||
133 | static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = | 115 | static struct event_constraint intel_v1_event_constraints[] __read_mostly = |
134 | { | 116 | { |
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | 117 | EVENT_CONSTRAINT_END |
138 | }; | 118 | }; |
139 | 119 | ||
@@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
145 | EVENT_CONSTRAINT_END | 125 | EVENT_CONSTRAINT_END |
146 | }; | 126 | }; |
147 | 127 | ||
128 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | ||
129 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), | ||
130 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), | ||
131 | EVENT_EXTRA_END | ||
132 | }; | ||
133 | |||
148 | static u64 intel_pmu_event_map(int hw_event) | 134 | static u64 intel_pmu_event_map(int hw_event) |
149 | { | 135 | { |
150 | return intel_perfmon_event_map[hw_event]; | 136 | return intel_perfmon_event_map[hw_event]; |
@@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids | |||
245 | [ C(RESULT_MISS) ] = -1, | 231 | [ C(RESULT_MISS) ] = -1, |
246 | }, | 232 | }, |
247 | }, | 233 | }, |
234 | [ C(NODE) ] = { | ||
235 | [ C(OP_READ) ] = { | ||
236 | [ C(RESULT_ACCESS) ] = -1, | ||
237 | [ C(RESULT_MISS) ] = -1, | ||
238 | }, | ||
239 | [ C(OP_WRITE) ] = { | ||
240 | [ C(RESULT_ACCESS) ] = -1, | ||
241 | [ C(RESULT_MISS) ] = -1, | ||
242 | }, | ||
243 | [ C(OP_PREFETCH) ] = { | ||
244 | [ C(RESULT_ACCESS) ] = -1, | ||
245 | [ C(RESULT_MISS) ] = -1, | ||
246 | }, | ||
247 | }, | ||
248 | |||
248 | }; | 249 | }; |
249 | 250 | ||
250 | static __initconst const u64 westmere_hw_cache_event_ids | 251 | static __initconst const u64 westmere_hw_cache_event_ids |
@@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
346 | [ C(RESULT_MISS) ] = -1, | 347 | [ C(RESULT_MISS) ] = -1, |
347 | }, | 348 | }, |
348 | }, | 349 | }, |
350 | [ C(NODE) ] = { | ||
351 | [ C(OP_READ) ] = { | ||
352 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
353 | [ C(RESULT_MISS) ] = 0x01b7, | ||
354 | }, | ||
355 | [ C(OP_WRITE) ] = { | ||
356 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
357 | [ C(RESULT_MISS) ] = 0x01b7, | ||
358 | }, | ||
359 | [ C(OP_PREFETCH) ] = { | ||
360 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
361 | [ C(RESULT_MISS) ] = 0x01b7, | ||
362 | }, | ||
363 | }, | ||
349 | }; | 364 | }; |
350 | 365 | ||
351 | /* | 366 | /* |
@@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs | |||
398 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, | 413 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, |
399 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, | 414 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, |
400 | }, | 415 | }, |
401 | } | 416 | }, |
417 | [ C(NODE) ] = { | ||
418 | [ C(OP_READ) ] = { | ||
419 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, | ||
420 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM, | ||
421 | }, | ||
422 | [ C(OP_WRITE) ] = { | ||
423 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, | ||
424 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, | ||
425 | }, | ||
426 | [ C(OP_PREFETCH) ] = { | ||
427 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, | ||
428 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, | ||
429 | }, | ||
430 | }, | ||
402 | }; | 431 | }; |
403 | 432 | ||
404 | static __initconst const u64 nehalem_hw_cache_event_ids | 433 | static __initconst const u64 nehalem_hw_cache_event_ids |
@@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
500 | [ C(RESULT_MISS) ] = -1, | 529 | [ C(RESULT_MISS) ] = -1, |
501 | }, | 530 | }, |
502 | }, | 531 | }, |
532 | [ C(NODE) ] = { | ||
533 | [ C(OP_READ) ] = { | ||
534 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
535 | [ C(RESULT_MISS) ] = 0x01b7, | ||
536 | }, | ||
537 | [ C(OP_WRITE) ] = { | ||
538 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
539 | [ C(RESULT_MISS) ] = 0x01b7, | ||
540 | }, | ||
541 | [ C(OP_PREFETCH) ] = { | ||
542 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
543 | [ C(RESULT_MISS) ] = 0x01b7, | ||
544 | }, | ||
545 | }, | ||
503 | }; | 546 | }; |
504 | 547 | ||
505 | static __initconst const u64 core2_hw_cache_event_ids | 548 | static __initconst const u64 core2_hw_cache_event_ids |
@@ -1003,7 +1046,7 @@ again: | |||
1003 | 1046 | ||
1004 | data.period = event->hw.last_period; | 1047 | data.period = event->hw.last_period; |
1005 | 1048 | ||
1006 | if (perf_event_overflow(event, 1, &data, regs)) | 1049 | if (perf_event_overflow(event, &data, regs)) |
1007 | x86_pmu_stop(event, 0); | 1050 | x86_pmu_stop(event, 0); |
1008 | } | 1051 | } |
1009 | 1052 | ||
@@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event) | |||
1037 | return NULL; | 1080 | return NULL; |
1038 | } | 1081 | } |
1039 | 1082 | ||
1083 | static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | ||
1084 | { | ||
1085 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) | ||
1086 | return false; | ||
1087 | |||
1088 | if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { | ||
1089 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1090 | event->hw.config |= 0x01bb; | ||
1091 | event->hw.extra_reg.idx = EXTRA_REG_RSP_1; | ||
1092 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | ||
1093 | } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { | ||
1094 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1095 | event->hw.config |= 0x01b7; | ||
1096 | event->hw.extra_reg.idx = EXTRA_REG_RSP_0; | ||
1097 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | ||
1098 | } | ||
1099 | |||
1100 | if (event->hw.extra_reg.idx == orig_idx) | ||
1101 | return false; | ||
1102 | |||
1103 | return true; | ||
1104 | } | ||
1105 | |||
1106 | /* | ||
1107 | * manage allocation of shared extra msr for certain events | ||
1108 | * | ||
1109 | * sharing can be: | ||
1110 | * per-cpu: to be shared between the various events on a single PMU | ||
1111 | * per-core: per-cpu + shared by HT threads | ||
1112 | */ | ||
1040 | static struct event_constraint * | 1113 | static struct event_constraint * |
1041 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1114 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1115 | struct perf_event *event) | ||
1042 | { | 1116 | { |
1043 | struct hw_perf_event *hwc = &event->hw; | 1117 | struct event_constraint *c = &emptyconstraint; |
1044 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | 1118 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; |
1045 | struct event_constraint *c; | ||
1046 | struct intel_percore *pc; | ||
1047 | struct er_account *era; | 1119 | struct er_account *era; |
1048 | int i; | 1120 | unsigned long flags; |
1049 | int free_slot; | 1121 | int orig_idx = reg->idx; |
1050 | int found; | ||
1051 | 1122 | ||
1052 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | 1123 | /* already allocated shared msr */ |
1053 | return NULL; | 1124 | if (reg->alloc) |
1125 | return &unconstrained; | ||
1054 | 1126 | ||
1055 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | 1127 | again: |
1056 | if (e != c->code) | 1128 | era = &cpuc->shared_regs->regs[reg->idx]; |
1057 | continue; | 1129 | /* |
1130 | * we use spin_lock_irqsave() to avoid lockdep issues when | ||
1131 | * passing a fake cpuc | ||
1132 | */ | ||
1133 | raw_spin_lock_irqsave(&era->lock, flags); | ||
1134 | |||
1135 | if (!atomic_read(&era->ref) || era->config == reg->config) { | ||
1136 | |||
1137 | /* lock in msr value */ | ||
1138 | era->config = reg->config; | ||
1139 | era->reg = reg->reg; | ||
1140 | |||
1141 | /* one more user */ | ||
1142 | atomic_inc(&era->ref); | ||
1143 | |||
1144 | /* no need to reallocate during incremental event scheduling */ | ||
1145 | reg->alloc = 1; | ||
1058 | 1146 | ||
1059 | /* | 1147 | /* |
1060 | * Allocate resource per core. | 1148 | * All events using extra_reg are unconstrained. |
1149 | * Avoids calling x86_get_event_constraints() | ||
1150 | * | ||
1151 | * Must revisit if extra_reg controlling events | ||
1152 | * ever have constraints. Worst case we go through | ||
1153 | * the regular event constraint table. | ||
1061 | */ | 1154 | */ |
1062 | pc = cpuc->per_core; | 1155 | c = &unconstrained; |
1063 | if (!pc) | 1156 | } else if (intel_try_alt_er(event, orig_idx)) { |
1064 | break; | 1157 | raw_spin_unlock(&era->lock); |
1065 | c = &emptyconstraint; | 1158 | goto again; |
1066 | raw_spin_lock(&pc->lock); | ||
1067 | free_slot = -1; | ||
1068 | found = 0; | ||
1069 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1070 | era = &pc->regs[i]; | ||
1071 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1072 | /* Allow sharing same config */ | ||
1073 | if (hwc->extra_config == era->extra_config) { | ||
1074 | era->ref++; | ||
1075 | cpuc->percore_used = 1; | ||
1076 | hwc->extra_alloc = 1; | ||
1077 | c = NULL; | ||
1078 | } | ||
1079 | /* else conflict */ | ||
1080 | found = 1; | ||
1081 | break; | ||
1082 | } else if (era->ref == 0 && free_slot == -1) | ||
1083 | free_slot = i; | ||
1084 | } | ||
1085 | if (!found && free_slot != -1) { | ||
1086 | era = &pc->regs[free_slot]; | ||
1087 | era->ref = 1; | ||
1088 | era->extra_reg = hwc->extra_reg; | ||
1089 | era->extra_config = hwc->extra_config; | ||
1090 | cpuc->percore_used = 1; | ||
1091 | hwc->extra_alloc = 1; | ||
1092 | c = NULL; | ||
1093 | } | ||
1094 | raw_spin_unlock(&pc->lock); | ||
1095 | return c; | ||
1096 | } | 1159 | } |
1160 | raw_spin_unlock_irqrestore(&era->lock, flags); | ||
1097 | 1161 | ||
1098 | return NULL; | 1162 | return c; |
1163 | } | ||
1164 | |||
1165 | static void | ||
1166 | __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | ||
1167 | struct hw_perf_event_extra *reg) | ||
1168 | { | ||
1169 | struct er_account *era; | ||
1170 | |||
1171 | /* | ||
1172 | * only put constraint if extra reg was actually | ||
1173 | * allocated. Also takes care of event which do | ||
1174 | * not use an extra shared reg | ||
1175 | */ | ||
1176 | if (!reg->alloc) | ||
1177 | return; | ||
1178 | |||
1179 | era = &cpuc->shared_regs->regs[reg->idx]; | ||
1180 | |||
1181 | /* one fewer user */ | ||
1182 | atomic_dec(&era->ref); | ||
1183 | |||
1184 | /* allocate again next time */ | ||
1185 | reg->alloc = 0; | ||
1186 | } | ||
1187 | |||
1188 | static struct event_constraint * | ||
1189 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | ||
1190 | struct perf_event *event) | ||
1191 | { | ||
1192 | struct event_constraint *c = NULL; | ||
1193 | |||
1194 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | ||
1195 | c = __intel_shared_reg_get_constraints(cpuc, event); | ||
1196 | |||
1197 | return c; | ||
1099 | } | 1198 | } |
1100 | 1199 | ||
1101 | static struct event_constraint * | 1200 | static struct event_constraint * |
@@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
1111 | if (c) | 1210 | if (c) |
1112 | return c; | 1211 | return c; |
1113 | 1212 | ||
1114 | c = intel_percore_constraints(cpuc, event); | 1213 | c = intel_shared_regs_constraints(cpuc, event); |
1115 | if (c) | 1214 | if (c) |
1116 | return c; | 1215 | return c; |
1117 | 1216 | ||
1118 | return x86_get_event_constraints(cpuc, event); | 1217 | return x86_get_event_constraints(cpuc, event); |
1119 | } | 1218 | } |
1120 | 1219 | ||
1121 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1220 | static void |
1221 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | ||
1122 | struct perf_event *event) | 1222 | struct perf_event *event) |
1123 | { | 1223 | { |
1124 | struct extra_reg *er; | 1224 | struct hw_perf_event_extra *reg; |
1125 | struct intel_percore *pc; | ||
1126 | struct er_account *era; | ||
1127 | struct hw_perf_event *hwc = &event->hw; | ||
1128 | int i, allref; | ||
1129 | 1225 | ||
1130 | if (!cpuc->percore_used) | 1226 | reg = &event->hw.extra_reg; |
1131 | return; | 1227 | if (reg->idx != EXTRA_REG_NONE) |
1132 | 1228 | __intel_shared_reg_put_constraints(cpuc, reg); | |
1133 | for (er = x86_pmu.extra_regs; er->msr; er++) { | 1229 | } |
1134 | if (er->event != (hwc->config & er->config_mask)) | ||
1135 | continue; | ||
1136 | 1230 | ||
1137 | pc = cpuc->per_core; | 1231 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1138 | raw_spin_lock(&pc->lock); | 1232 | struct perf_event *event) |
1139 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | 1233 | { |
1140 | era = &pc->regs[i]; | 1234 | intel_put_shared_regs_event_constraints(cpuc, event); |
1141 | if (era->ref > 0 && | ||
1142 | era->extra_config == hwc->extra_config && | ||
1143 | era->extra_reg == er->msr) { | ||
1144 | era->ref--; | ||
1145 | hwc->extra_alloc = 0; | ||
1146 | break; | ||
1147 | } | ||
1148 | } | ||
1149 | allref = 0; | ||
1150 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1151 | allref += pc->regs[i].ref; | ||
1152 | if (allref == 0) | ||
1153 | cpuc->percore_used = 0; | ||
1154 | raw_spin_unlock(&pc->lock); | ||
1155 | break; | ||
1156 | } | ||
1157 | } | 1235 | } |
1158 | 1236 | ||
1159 | static int intel_pmu_hw_config(struct perf_event *event) | 1237 | static int intel_pmu_hw_config(struct perf_event *event) |
@@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = { | |||
1231 | .event_constraints = intel_core_event_constraints, | 1309 | .event_constraints = intel_core_event_constraints, |
1232 | }; | 1310 | }; |
1233 | 1311 | ||
1312 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
1313 | { | ||
1314 | struct intel_shared_regs *regs; | ||
1315 | int i; | ||
1316 | |||
1317 | regs = kzalloc_node(sizeof(struct intel_shared_regs), | ||
1318 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1319 | if (regs) { | ||
1320 | /* | ||
1321 | * initialize the locks to keep lockdep happy | ||
1322 | */ | ||
1323 | for (i = 0; i < EXTRA_REG_MAX; i++) | ||
1324 | raw_spin_lock_init(®s->regs[i].lock); | ||
1325 | |||
1326 | regs->core_id = -1; | ||
1327 | } | ||
1328 | return regs; | ||
1329 | } | ||
1330 | |||
1234 | static int intel_pmu_cpu_prepare(int cpu) | 1331 | static int intel_pmu_cpu_prepare(int cpu) |
1235 | { | 1332 | { |
1236 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1333 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1237 | 1334 | ||
1238 | if (!cpu_has_ht_siblings()) | 1335 | if (!x86_pmu.extra_regs) |
1239 | return NOTIFY_OK; | 1336 | return NOTIFY_OK; |
1240 | 1337 | ||
1241 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | 1338 | cpuc->shared_regs = allocate_shared_regs(cpu); |
1242 | GFP_KERNEL, cpu_to_node(cpu)); | 1339 | if (!cpuc->shared_regs) |
1243 | if (!cpuc->per_core) | ||
1244 | return NOTIFY_BAD; | 1340 | return NOTIFY_BAD; |
1245 | 1341 | ||
1246 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1247 | cpuc->per_core->core_id = -1; | ||
1248 | return NOTIFY_OK; | 1342 | return NOTIFY_OK; |
1249 | } | 1343 | } |
1250 | 1344 | ||
@@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1260 | */ | 1354 | */ |
1261 | intel_pmu_lbr_reset(); | 1355 | intel_pmu_lbr_reset(); |
1262 | 1356 | ||
1263 | if (!cpu_has_ht_siblings()) | 1357 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) |
1264 | return; | 1358 | return; |
1265 | 1359 | ||
1266 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1360 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1267 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | 1361 | struct intel_shared_regs *pc; |
1268 | 1362 | ||
1363 | pc = per_cpu(cpu_hw_events, i).shared_regs; | ||
1269 | if (pc && pc->core_id == core_id) { | 1364 | if (pc && pc->core_id == core_id) { |
1270 | kfree(cpuc->per_core); | 1365 | kfree(cpuc->shared_regs); |
1271 | cpuc->per_core = pc; | 1366 | cpuc->shared_regs = pc; |
1272 | break; | 1367 | break; |
1273 | } | 1368 | } |
1274 | } | 1369 | } |
1275 | 1370 | ||
1276 | cpuc->per_core->core_id = core_id; | 1371 | cpuc->shared_regs->core_id = core_id; |
1277 | cpuc->per_core->refcnt++; | 1372 | cpuc->shared_regs->refcnt++; |
1278 | } | 1373 | } |
1279 | 1374 | ||
1280 | static void intel_pmu_cpu_dying(int cpu) | 1375 | static void intel_pmu_cpu_dying(int cpu) |
1281 | { | 1376 | { |
1282 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1377 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1283 | struct intel_percore *pc = cpuc->per_core; | 1378 | struct intel_shared_regs *pc; |
1284 | 1379 | ||
1380 | pc = cpuc->shared_regs; | ||
1285 | if (pc) { | 1381 | if (pc) { |
1286 | if (pc->core_id == -1 || --pc->refcnt == 0) | 1382 | if (pc->core_id == -1 || --pc->refcnt == 0) |
1287 | kfree(pc); | 1383 | kfree(pc); |
1288 | cpuc->per_core = NULL; | 1384 | cpuc->shared_regs = NULL; |
1289 | } | 1385 | } |
1290 | 1386 | ||
1291 | fini_debug_store_on_cpu(cpu); | 1387 | fini_debug_store_on_cpu(cpu); |
@@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void) | |||
1436 | 1532 | ||
1437 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1533 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1438 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | 1534 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
1439 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1535 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1536 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | 1537 | ||
@@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void) | |||
1481 | intel_pmu_lbr_init_nhm(); | 1576 | intel_pmu_lbr_init_nhm(); |
1482 | 1577 | ||
1483 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1578 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1484 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1485 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1579 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1486 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1580 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1487 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 1581 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
1582 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
1488 | 1583 | ||
1489 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1584 | /* UOPS_ISSUED.STALLED_CYCLES */ |
1490 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1585 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; |
@@ -1495,6 +1590,7 @@ static __init int intel_pmu_init(void) | |||
1495 | break; | 1590 | break; |
1496 | 1591 | ||
1497 | case 42: /* SandyBridge */ | 1592 | case 42: /* SandyBridge */ |
1593 | case 45: /* SandyBridge, "Romely-EP" */ | ||
1498 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1594 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1499 | sizeof(hw_cache_event_ids)); | 1595 | sizeof(hw_cache_event_ids)); |
1500 | 1596 | ||
@@ -1502,6 +1598,10 @@ static __init int intel_pmu_init(void) | |||
1502 | 1598 | ||
1503 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1599 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1504 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | 1600 | x86_pmu.pebs_constraints = intel_snb_pebs_events; |
1601 | x86_pmu.extra_regs = intel_snb_extra_regs; | ||
1602 | /* all extra regs are per-cpu when HT is on */ | ||
1603 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
1604 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | ||
1505 | 1605 | ||
1506 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 1606 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
1507 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1607 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; |
@@ -1512,11 +1612,19 @@ static __init int intel_pmu_init(void) | |||
1512 | break; | 1612 | break; |
1513 | 1613 | ||
1514 | default: | 1614 | default: |
1515 | /* | 1615 | switch (x86_pmu.version) { |
1516 | * default constraints for v2 and up | 1616 | case 1: |
1517 | */ | 1617 | x86_pmu.event_constraints = intel_v1_event_constraints; |
1518 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1618 | pr_cont("generic architected perfmon v1, "); |
1519 | pr_cont("generic architected perfmon, "); | 1619 | break; |
1620 | default: | ||
1621 | /* | ||
1622 | * default constraints for v2 and up | ||
1623 | */ | ||
1624 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
1625 | pr_cont("generic architected perfmon, "); | ||
1626 | break; | ||
1627 | } | ||
1520 | } | 1628 | } |
1521 | return 0; | 1629 | return 0; |
1522 | } | 1630 | } |
@@ -1528,4 +1636,8 @@ static int intel_pmu_init(void) | |||
1528 | return 0; | 1636 | return 0; |
1529 | } | 1637 | } |
1530 | 1638 | ||
1639 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
1640 | { | ||
1641 | return NULL; | ||
1642 | } | ||
1531 | #endif /* CONFIG_CPU_SUP_INTEL */ | 1643 | #endif /* CONFIG_CPU_SUP_INTEL */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index bab491b8ee2..3213c52db76 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void) | |||
340 | */ | 340 | */ |
341 | perf_prepare_sample(&header, &data, event, ®s); | 341 | perf_prepare_sample(&header, &data, event, ®s); |
342 | 342 | ||
343 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | 343 | if (perf_output_begin(&handle, event, header.size * (top - at))) |
344 | return 1; | 344 | return 1; |
345 | 345 | ||
346 | for (; at < top; at++) { | 346 | for (; at < top; at++) { |
@@ -508,6 +508,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
508 | unsigned long from = cpuc->lbr_entries[0].from; | 508 | unsigned long from = cpuc->lbr_entries[0].from; |
509 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | 509 | unsigned long old_to, to = cpuc->lbr_entries[0].to; |
510 | unsigned long ip = regs->ip; | 510 | unsigned long ip = regs->ip; |
511 | int is_64bit = 0; | ||
511 | 512 | ||
512 | /* | 513 | /* |
513 | * We don't need to fixup if the PEBS assist is fault like | 514 | * We don't need to fixup if the PEBS assist is fault like |
@@ -559,7 +560,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
559 | } else | 560 | } else |
560 | kaddr = (void *)to; | 561 | kaddr = (void *)to; |
561 | 562 | ||
562 | kernel_insn_init(&insn, kaddr); | 563 | #ifdef CONFIG_X86_64 |
564 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); | ||
565 | #endif | ||
566 | insn_init(&insn, kaddr, is_64bit); | ||
563 | insn_get_length(&insn); | 567 | insn_get_length(&insn); |
564 | to += insn.length; | 568 | to += insn.length; |
565 | } while (to < ip); | 569 | } while (to < ip); |
@@ -616,7 +620,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
616 | else | 620 | else |
617 | regs.flags &= ~PERF_EFLAGS_EXACT; | 621 | regs.flags &= ~PERF_EFLAGS_EXACT; |
618 | 622 | ||
619 | if (perf_event_overflow(event, 1, &data, ®s)) | 623 | if (perf_event_overflow(event, &data, ®s)) |
620 | x86_pmu_stop(event, 0); | 624 | x86_pmu_stop(event, 0); |
621 | } | 625 | } |
622 | 626 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ead584fb6a7..7809d2bcb20 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids | |||
554 | [ C(RESULT_MISS) ] = -1, | 554 | [ C(RESULT_MISS) ] = -1, |
555 | }, | 555 | }, |
556 | }, | 556 | }, |
557 | [ C(NODE) ] = { | ||
558 | [ C(OP_READ) ] = { | ||
559 | [ C(RESULT_ACCESS) ] = -1, | ||
560 | [ C(RESULT_MISS) ] = -1, | ||
561 | }, | ||
562 | [ C(OP_WRITE) ] = { | ||
563 | [ C(RESULT_ACCESS) ] = -1, | ||
564 | [ C(RESULT_MISS) ] = -1, | ||
565 | }, | ||
566 | [ C(OP_PREFETCH) ] = { | ||
567 | [ C(RESULT_ACCESS) ] = -1, | ||
568 | [ C(RESULT_MISS) ] = -1, | ||
569 | }, | ||
570 | }, | ||
557 | }; | 571 | }; |
558 | 572 | ||
573 | /* | ||
574 | * Because of Netburst being quite restricted in how many | ||
575 | * identical events may run simultaneously, we introduce event aliases, | ||
576 | * ie the different events which have the same functionality but | ||
577 | * utilize non-intersected resources (ESCR/CCCR/counter registers). | ||
578 | * | ||
579 | * This allow us to relax restrictions a bit and run two or more | ||
580 | * identical events together. | ||
581 | * | ||
582 | * Never set any custom internal bits such as P4_CONFIG_HT, | ||
583 | * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are | ||
584 | * either up to date automatically or not applicable at all. | ||
585 | */ | ||
586 | struct p4_event_alias { | ||
587 | u64 original; | ||
588 | u64 alternative; | ||
589 | } p4_event_aliases[] = { | ||
590 | { | ||
591 | /* | ||
592 | * Non-halted cycles can be substituted with non-sleeping cycles (see | ||
593 | * Intel SDM Vol3b for details). We need this alias to be able | ||
594 | * to run nmi-watchdog and 'perf top' (or any other user space tool | ||
595 | * which is interested in running PERF_COUNT_HW_CPU_CYCLES) | ||
596 | * simultaneously. | ||
597 | */ | ||
598 | .original = | ||
599 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
600 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
601 | .alternative = | ||
602 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | | ||
603 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| | ||
604 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| | ||
605 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| | ||
606 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| | ||
607 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | | ||
608 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | | ||
609 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | | ||
610 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| | ||
611 | p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | | ||
612 | P4_CCCR_COMPARE), | ||
613 | }, | ||
614 | }; | ||
615 | |||
616 | static u64 p4_get_alias_event(u64 config) | ||
617 | { | ||
618 | u64 config_match; | ||
619 | int i; | ||
620 | |||
621 | /* | ||
622 | * Only event with special mark is allowed, | ||
623 | * we're to be sure it didn't come as malformed | ||
624 | * RAW event. | ||
625 | */ | ||
626 | if (!(config & P4_CONFIG_ALIASABLE)) | ||
627 | return 0; | ||
628 | |||
629 | config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; | ||
630 | |||
631 | for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { | ||
632 | if (config_match == p4_event_aliases[i].original) { | ||
633 | config_match = p4_event_aliases[i].alternative; | ||
634 | break; | ||
635 | } else if (config_match == p4_event_aliases[i].alternative) { | ||
636 | config_match = p4_event_aliases[i].original; | ||
637 | break; | ||
638 | } | ||
639 | } | ||
640 | |||
641 | if (i >= ARRAY_SIZE(p4_event_aliases)) | ||
642 | return 0; | ||
643 | |||
644 | return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); | ||
645 | } | ||
646 | |||
559 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | 647 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { |
560 | /* non-halted CPU clocks */ | 648 | /* non-halted CPU clocks */ |
561 | [PERF_COUNT_HW_CPU_CYCLES] = | 649 | [PERF_COUNT_HW_CPU_CYCLES] = |
562 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | 650 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | |
563 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | 651 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) | |
652 | P4_CONFIG_ALIASABLE, | ||
564 | 653 | ||
565 | /* | 654 | /* |
566 | * retired instructions | 655 | * retired instructions |
@@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
945 | 1034 | ||
946 | if (!x86_perf_event_set_period(event)) | 1035 | if (!x86_perf_event_set_period(event)) |
947 | continue; | 1036 | continue; |
948 | if (perf_event_overflow(event, 1, &data, regs)) | 1037 | if (perf_event_overflow(event, &data, regs)) |
949 | x86_pmu_stop(event, 0); | 1038 | x86_pmu_stop(event, 0); |
950 | } | 1039 | } |
951 | 1040 | ||
@@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
1120 | struct p4_event_bind *bind; | 1209 | struct p4_event_bind *bind; |
1121 | unsigned int i, thread, num; | 1210 | unsigned int i, thread, num; |
1122 | int cntr_idx, escr_idx; | 1211 | int cntr_idx, escr_idx; |
1212 | u64 config_alias; | ||
1213 | int pass; | ||
1123 | 1214 | ||
1124 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 1215 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1125 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | 1216 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); |
@@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
1128 | 1219 | ||
1129 | hwc = &cpuc->event_list[i]->hw; | 1220 | hwc = &cpuc->event_list[i]->hw; |
1130 | thread = p4_ht_thread(cpu); | 1221 | thread = p4_ht_thread(cpu); |
1222 | pass = 0; | ||
1223 | |||
1224 | again: | ||
1225 | /* | ||
1226 | * It's possible to hit a circular lock | ||
1227 | * between original and alternative events | ||
1228 | * if both are scheduled already. | ||
1229 | */ | ||
1230 | if (pass > 2) | ||
1231 | goto done; | ||
1232 | |||
1131 | bind = p4_config_get_bind(hwc->config); | 1233 | bind = p4_config_get_bind(hwc->config); |
1132 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | 1234 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); |
1133 | if (unlikely(escr_idx == -1)) | 1235 | if (unlikely(escr_idx == -1)) |
@@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
1141 | } | 1243 | } |
1142 | 1244 | ||
1143 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | 1245 | cntr_idx = p4_next_cntr(thread, used_mask, bind); |
1144 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | 1246 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { |
1145 | goto done; | 1247 | /* |
1248 | * Check whether an event alias is still available. | ||
1249 | */ | ||
1250 | config_alias = p4_get_alias_event(hwc->config); | ||
1251 | if (!config_alias) | ||
1252 | goto done; | ||
1253 | hwc->config = config_alias; | ||
1254 | pass++; | ||
1255 | goto again; | ||
1256 | } | ||
1146 | 1257 | ||
1147 | p4_pmu_swap_config_ts(hwc, cpu); | 1258 | p4_pmu_swap_config_ts(hwc, cpu); |
1148 | if (assign) | 1259 | if (assign) |