aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-02-12 07:49:36 -0500
committerH. Peter Anvin <hpa@zytor.com>2009-02-24 16:41:00 -0500
commit88ccbedd9ca85d1aca6a6f99df48dce87b7c02d4 (patch)
tree9951e6f3554789523006f187e69286f5ed541b50 /arch/x86/kernel/cpu
parent03195c6b40f2b4db92545921daa7c3a19b4e4c32 (diff)
x86, mce, cmci: add CMCI support
Impact: Major new feature Intel CMCI (Corrected Machine Check Interrupt) is a new feature on Nehalem CPUs. It allows the CPU to trigger interrupts on corrected events, which allows faster reaction to them instead of with the traditional polling timer. Also use CMCI to discover shared banks. Machine check banks can be shared by CPU threads or even cores. Using the CMCI enable bit it is possible to detect the fact that another CPU already saw a specific bank. Use this to assign shared banks only to one CPU to avoid reporting duplicated events. On CPU hot unplug bank sharing is re discovered. This is done using a thread that cycles through all the CPUs. To avoid races between the poller and CMCI we only poll for banks that are not CMCI capable and only check CMCI owned banks on a interrupt. The shared banks ownership information is currently only used for CMCI interrupts, not polled banks. The sharing discovery code follows the algorithm recommended in the IA32 SDM Vol3a 14.5.2.1 The CMCI interrupt handler just calls the machine check poller to pick up the machine check event that caused the interrupt. I decided not to implement a separate threshold event like the AMD version has, because the threshold is always one currently and adding another event didn't seem to add any value. Some code inspired by Yunhong Jiang's Xen implementation, which was in term inspired by a earlier CMCI implementation by me. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c16
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c205
2 files changed, 218 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index a8ff38bfa6e..bfbd5323a63 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -166,7 +166,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
166 panic(msg); 166 panic(msg);
167} 167}
168 168
169static int mce_available(struct cpuinfo_x86 *c) 169int mce_available(struct cpuinfo_x86 *c)
170{ 170{
171 if (mce_dont_init) 171 if (mce_dont_init)
172 return 0; 172 return 0;
@@ -1060,9 +1060,12 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1060static void mce_disable_cpu(void *h) 1060static void mce_disable_cpu(void *h)
1061{ 1061{
1062 int i; 1062 int i;
1063 unsigned long action = *(unsigned long *)h;
1063 1064
1064 if (!mce_available(&current_cpu_data)) 1065 if (!mce_available(&current_cpu_data))
1065 return; 1066 return;
1067 if (!(action & CPU_TASKS_FROZEN))
1068 cmci_clear();
1066 for (i = 0; i < banks; i++) 1069 for (i = 0; i < banks; i++)
1067 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1070 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1068} 1071}
@@ -1070,9 +1073,12 @@ static void mce_disable_cpu(void *h)
1070static void mce_reenable_cpu(void *h) 1073static void mce_reenable_cpu(void *h)
1071{ 1074{
1072 int i; 1075 int i;
1076 unsigned long action = *(unsigned long *)h;
1073 1077
1074 if (!mce_available(&current_cpu_data)) 1078 if (!mce_available(&current_cpu_data))
1075 return; 1079 return;
1080 if (!(action & CPU_TASKS_FROZEN))
1081 cmci_reenable();
1076 for (i = 0; i < banks; i++) 1082 for (i = 0; i < banks; i++)
1077 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1083 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1078} 1084}
@@ -1100,13 +1106,17 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
1100 case CPU_DOWN_PREPARE: 1106 case CPU_DOWN_PREPARE:
1101 case CPU_DOWN_PREPARE_FROZEN: 1107 case CPU_DOWN_PREPARE_FROZEN:
1102 del_timer_sync(t); 1108 del_timer_sync(t);
1103 smp_call_function_single(cpu, mce_disable_cpu, NULL, 1); 1109 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
1104 break; 1110 break;
1105 case CPU_DOWN_FAILED: 1111 case CPU_DOWN_FAILED:
1106 case CPU_DOWN_FAILED_FROZEN: 1112 case CPU_DOWN_FAILED_FROZEN:
1107 t->expires = round_jiffies_relative(jiffies + next_interval); 1113 t->expires = round_jiffies_relative(jiffies + next_interval);
1108 add_timer_on(t, cpu); 1114 add_timer_on(t, cpu);
1109 smp_call_function_single(cpu, mce_reenable_cpu, NULL, 1); 1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1116 break;
1117 case CPU_POST_DEAD:
1118 /* intentionally ignoring frozen here */
1119 cmci_rediscover(cpu);
1110 break; 1120 break;
1111 } 1121 }
1112 return NOTIFY_OK; 1122 return NOTIFY_OK;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 1b1491a76b5..a518ec8c6f8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -1,6 +1,8 @@
1/* 1/*
2 * Intel specific MCE features. 2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
4 */ 6 */
5 7
6#include <linux/init.h> 8#include <linux/init.h>
@@ -12,6 +14,7 @@
12#include <asm/hw_irq.h> 14#include <asm/hw_irq.h>
13#include <asm/idle.h> 15#include <asm/idle.h>
14#include <asm/therm_throt.h> 16#include <asm/therm_throt.h>
17#include <asm/apic.h>
15 18
16asmlinkage void smp_thermal_interrupt(void) 19asmlinkage void smp_thermal_interrupt(void)
17{ 20{
@@ -84,7 +87,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
84 return; 87 return;
85} 88}
86 89
90/*
91 * Support for Intel Correct Machine Check Interrupts. This allows
92 * the CPU to raise an interrupt when a corrected machine check happened.
93 * Normally we pick those up using a regular polling timer.
94 * Also supports reliable discovery of shared banks.
95 */
96
97static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
98
99/*
100 * cmci_discover_lock protects against parallel discovery attempts
101 * which could race against each other.
102 */
103static DEFINE_SPINLOCK(cmci_discover_lock);
104
105#define CMCI_THRESHOLD 1
106
107static __cpuinit int cmci_supported(int *banks)
108{
109 u64 cap;
110
111 /*
112 * Vendor check is not strictly needed, but the initial
113 * initialization is vendor keyed and this
114 * makes sure none of the backdoors are entered otherwise.
115 */
116 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
117 return 0;
118 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
119 return 0;
120 rdmsrl(MSR_IA32_MCG_CAP, cap);
121 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
122 return !!(cap & MCG_CMCI_P);
123}
124
125/*
126 * The interrupt handler. This is called on every event.
127 * Just call the poller directly to log any events.
128 * This could in theory increase the threshold under high load,
129 * but doesn't for now.
130 */
131static void intel_threshold_interrupt(void)
132{
133 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
134 mce_notify_user();
135}
136
137static void print_update(char *type, int *hdr, int num)
138{
139 if (*hdr == 0)
140 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
141 *hdr = 1;
142 printk(KERN_CONT " %s:%d", type, num);
143}
144
145/*
146 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
147 * on this CPU. Use the algorithm recommended in the SDM to discover shared
148 * banks.
149 */
150static __cpuinit void cmci_discover(int banks, int boot)
151{
152 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
153 int hdr = 0;
154 int i;
155
156 spin_lock(&cmci_discover_lock);
157 for (i = 0; i < banks; i++) {
158 u64 val;
159
160 if (test_bit(i, owned))
161 continue;
162
163 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
164
165 /* Already owned by someone else? */
166 if (val & CMCI_EN) {
167 if (test_and_clear_bit(i, owned) || boot)
168 print_update("SHD", &hdr, i);
169 __clear_bit(i, __get_cpu_var(mce_poll_banks));
170 continue;
171 }
172
173 val |= CMCI_EN | CMCI_THRESHOLD;
174 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
175 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
176
177 /* Did the enable bit stick? -- the bank supports CMCI */
178 if (val & CMCI_EN) {
179 if (!test_and_set_bit(i, owned) || boot)
180 print_update("CMCI", &hdr, i);
181 __clear_bit(i, __get_cpu_var(mce_poll_banks));
182 } else {
183 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
184 }
185 }
186 spin_unlock(&cmci_discover_lock);
187 if (hdr)
188 printk(KERN_CONT "\n");
189}
190
191/*
192 * Just in case we missed an event during initialization check
193 * all the CMCI owned banks.
194 */
195__cpuinit void cmci_recheck(void)
196{
197 unsigned long flags;
198 int banks;
199
200 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
201 return;
202 local_irq_save(flags);
203 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
204 local_irq_restore(flags);
205}
206
207/*
208 * Disable CMCI on this CPU for all banks it owns when it goes down.
209 * This allows other CPUs to claim the banks on rediscovery.
210 */
211void __cpuexit cmci_clear(void)
212{
213 int i;
214 int banks;
215 u64 val;
216
217 if (!cmci_supported(&banks))
218 return;
219 spin_lock(&cmci_discover_lock);
220 for (i = 0; i < banks; i++) {
221 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
222 continue;
223 /* Disable CMCI */
224 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
225 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
226 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
227 __clear_bit(i, __get_cpu_var(mce_banks_owned));
228 }
229 spin_unlock(&cmci_discover_lock);
230}
231
232/*
233 * After a CPU went down cycle through all the others and rediscover
234 * Must run in process context.
235 */
236void __cpuexit cmci_rediscover(int dying)
237{
238 int banks;
239 int cpu;
240 cpumask_var_t old;
241
242 if (!cmci_supported(&banks))
243 return;
244 if (!alloc_cpumask_var(&old, GFP_KERNEL))
245 return;
246 cpumask_copy(old, &current->cpus_allowed);
247
248 for_each_online_cpu (cpu) {
249 if (cpu == dying)
250 continue;
251 if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
252 continue;
253 /* Recheck banks in case CPUs don't all have the same */
254 if (cmci_supported(&banks))
255 cmci_discover(banks, 0);
256 }
257
258 set_cpus_allowed_ptr(current, old);
259 free_cpumask_var(old);
260}
261
262/*
263 * Reenable CMCI on this CPU in case a CPU down failed.
264 */
265void cmci_reenable(void)
266{
267 int banks;
268 if (cmci_supported(&banks))
269 cmci_discover(banks, 0);
270}
271
272static __cpuinit void intel_init_cmci(void)
273{
274 int banks;
275
276 if (!cmci_supported(&banks))
277 return;
278
279 mce_threshold_vector = intel_threshold_interrupt;
280 cmci_discover(banks, 1);
281 /*
282 * For CPU #0 this runs with still disabled APIC, but that's
283 * ok because only the vector is set up. We still do another
284 * check for the banks later for CPU #0 just to make sure
285 * to not miss any events.
286 */
287 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
288 cmci_recheck();
289}
290
87void mce_intel_feature_init(struct cpuinfo_x86 *c) 291void mce_intel_feature_init(struct cpuinfo_x86 *c)
88{ 292{
89 intel_init_thermal(c); 293 intel_init_thermal(c);
294 intel_init_cmci();
90} 295}