aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86_64/kernel/mce.c101
1 files changed, 67 insertions, 34 deletions
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 968613572b9a..7c8ab423abe3 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -37,8 +37,13 @@ atomic_t mce_entry;
37 37
38static int mce_dont_init; 38static int mce_dont_init;
39 39
40/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, 40/*
41 3: never panic or exit (for testing only) */ 41 * Tolerant levels:
42 * 0: always panic on uncorrected errors, log corrected errors
43 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
44 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
45 * 3: never panic or SIGBUS, log all errors (for testing only)
46 */
42static int tolerant = 1; 47static int tolerant = 1;
43static int banks; 48static int banks;
44static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; 49static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
@@ -132,9 +137,6 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
132{ 137{
133 int i; 138 int i;
134 139
135 if (tolerant >= 3)
136 return;
137
138 oops_begin(); 140 oops_begin();
139 for (i = 0; i < MCE_LOG_LEN; i++) { 141 for (i = 0; i < MCE_LOG_LEN; i++) {
140 unsigned long tsc = mcelog.entry[i].tsc; 142 unsigned long tsc = mcelog.entry[i].tsc;
@@ -178,11 +180,19 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
178void do_machine_check(struct pt_regs * regs, long error_code) 180void do_machine_check(struct pt_regs * regs, long error_code)
179{ 181{
180 struct mce m, panicm; 182 struct mce m, panicm;
181 int nowayout = (tolerant < 1);
182 int kill_it = 0;
183 u64 mcestart = 0; 183 u64 mcestart = 0;
184 int i; 184 int i;
185 int panicm_found = 0; 185 int panicm_found = 0;
186 /*
187 * If no_way_out gets set, there is no safe way to recover from this
188 * MCE. If tolerant is cranked up, we'll try anyway.
189 */
190 int no_way_out = 0;
191 /*
192 * If kill_it gets set, there might be a way to recover from this
193 * error.
194 */
195 int kill_it = 0;
186 196
187 atomic_inc(&mce_entry); 197 atomic_inc(&mce_entry);
188 198
@@ -194,8 +204,9 @@ void do_machine_check(struct pt_regs * regs, long error_code)
194 memset(&m, 0, sizeof(struct mce)); 204 memset(&m, 0, sizeof(struct mce));
195 m.cpu = smp_processor_id(); 205 m.cpu = smp_processor_id();
196 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 206 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
207 /* if the restart IP is not valid, we're done for */
197 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 208 if (!(m.mcgstatus & MCG_STATUS_RIPV))
198 kill_it = 1; 209 no_way_out = 1;
199 210
200 rdtscll(mcestart); 211 rdtscll(mcestart);
201 barrier(); 212 barrier();
@@ -214,10 +225,18 @@ void do_machine_check(struct pt_regs * regs, long error_code)
214 continue; 225 continue;
215 226
216 if (m.status & MCI_STATUS_EN) { 227 if (m.status & MCI_STATUS_EN) {
217 /* In theory _OVER could be a nowayout too, but 228 /* if PCC was set, there's no way out */
218 assume any overflowed errors were no fatal. */ 229 no_way_out |= !!(m.status & MCI_STATUS_PCC);
219 nowayout |= !!(m.status & MCI_STATUS_PCC); 230 /*
220 kill_it |= !!(m.status & MCI_STATUS_UC); 231 * If this error was uncorrectable and there was
232 * an overflow, we're in trouble. If no overflow,
233 * we might get away with just killing a task.
234 */
235 if (m.status & MCI_STATUS_UC) {
236 if (tolerant < 1 || m.status & MCI_STATUS_OVER)
237 no_way_out = 1;
238 kill_it = 1;
239 }
221 } 240 }
222 241
223 if (m.status & MCI_STATUS_MISCV) 242 if (m.status & MCI_STATUS_MISCV)
@@ -228,7 +247,6 @@ void do_machine_check(struct pt_regs * regs, long error_code)
228 mce_get_rip(&m, regs); 247 mce_get_rip(&m, regs);
229 if (error_code >= 0) 248 if (error_code >= 0)
230 rdtscll(m.tsc); 249 rdtscll(m.tsc);
231 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
232 if (error_code != -2) 250 if (error_code != -2)
233 mce_log(&m); 251 mce_log(&m);
234 252
@@ -251,37 +269,52 @@ void do_machine_check(struct pt_regs * regs, long error_code)
251 the last one (shouldn't happen, just being safe). */ 269 the last one (shouldn't happen, just being safe). */
252 if (!panicm_found) 270 if (!panicm_found)
253 panicm = m; 271 panicm = m;
254 if (nowayout) 272
273 /*
274 * If we have decided that we just CAN'T continue, and the user
275 * has not set tolerant to an insane level, give up and die.
276 */
277 if (no_way_out && tolerant < 3)
255 mce_panic("Machine check", &panicm, mcestart); 278 mce_panic("Machine check", &panicm, mcestart);
256 if (kill_it) { 279
280 /*
281 * If the error seems to be unrecoverable, something should be
282 * done. Try to kill as little as possible. If we can kill just
283 * one task, do that. If the user has set the tolerance very
284 * high, don't try to do anything at all.
285 */
286 if (kill_it && tolerant < 3) {
257 int user_space = 0; 287 int user_space = 0;
258 288
259 if (m.mcgstatus & MCG_STATUS_RIPV) 289 /*
290 * If the EIPV bit is set, it means the saved IP is the
291 * instruction which caused the MCE.
292 */
293 if (m.mcgstatus & MCG_STATUS_EIPV)
260 user_space = panicm.rip && (panicm.cs & 3); 294 user_space = panicm.rip && (panicm.cs & 3);
261 295
262 /* When the machine was in user space and the CPU didn't get 296 /*
263 confused it's normally not necessary to panic, unless you 297 * If we know that the error was in user space, send a
264 are paranoid (tolerant == 0) 298 * SIGBUS. Otherwise, panic if tolerance is low.
265 299 *
266 RED-PEN could be more tolerant for MCEs in idle, 300 * do_exit() takes an awful lot of locks and has a slight
267 but most likely they occur at boot anyways, where 301 * risk of deadlocking.
268 it is best to just halt the machine. */ 302 */
269 if ((!user_space && (panic_on_oops || tolerant < 2)) || 303 if (user_space) {
270 (unsigned)current->pid <= 1)
271 mce_panic("Uncorrected machine check", &panicm, mcestart);
272
273 /* do_exit takes an awful lot of locks and has as
274 slight risk of deadlocking. If you don't want that
275 don't set tolerant >= 2 */
276 if (tolerant < 3)
277 do_exit(SIGBUS); 304 do_exit(SIGBUS);
305 } else if (panic_on_oops || tolerant < 2) {
306 mce_panic("Uncorrected machine check",
307 &panicm, mcestart);
308 }
278 } 309 }
279 310
280 /* notify userspace ASAP */ 311 /* notify userspace ASAP */
281 set_thread_flag(TIF_MCE_NOTIFY); 312 set_thread_flag(TIF_MCE_NOTIFY);
282 313
283 out: 314 out:
284 /* Last thing done in the machine check exception to clear state. */ 315 /* the last thing we do is clear state */
316 for (i = 0; i < banks; i++)
317 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
285 wrmsrl(MSR_IA32_MCG_STATUS, 0); 318 wrmsrl(MSR_IA32_MCG_STATUS, 0);
286 out2: 319 out2:
287 atomic_dec(&mce_entry); 320 atomic_dec(&mce_entry);
@@ -506,7 +539,7 @@ static int mce_open(struct inode *inode, struct file *file)
506 539
507 spin_unlock(&mce_state_lock); 540 spin_unlock(&mce_state_lock);
508 541
509 return 0; 542 return nonseekable_open(inode, file);
510} 543}
511 544
512static int mce_release(struct inode *inode, struct file *file) 545static int mce_release(struct inode *inode, struct file *file)