aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-03 18:47:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-03 18:47:40 -0400
commit06b77b97338e906e8af73a9b5f97b9162aac239d (patch)
treefc4fc5c5153dc5a1979e7d46b08dea20054a1d9e
parent4aef77b2fe373cdba461925589b9d1d4468ee016 (diff)
parent716079f66eacd31d040db9cd0627ca0d625d6126 (diff)
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next
Pull x86 RAS changes from Ingo Molnar: "Improve mcheck device initialization and bootstrap robustness" * 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: mce: Panic when a core has reached a timeout x86/mce: Improve mcheck_init_device() error handling
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c50
1 files changed, 41 insertions, 9 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 68317c80de7f..6cc800381d14 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -704,8 +704,7 @@ static int mce_timed_out(u64 *t)
704 if (!mca_cfg.monarch_timeout) 704 if (!mca_cfg.monarch_timeout)
705 goto out; 705 goto out;
706 if ((s64)*t < SPINUNIT) { 706 if ((s64)*t < SPINUNIT) {
707 /* CHECKME: Make panic default for 1 too? */ 707 if (mca_cfg.tolerant <= 1)
708 if (mca_cfg.tolerant < 1)
709 mce_panic("Timeout synchronizing machine check over CPUs", 708 mce_panic("Timeout synchronizing machine check over CPUs",
710 NULL, NULL); 709 NULL, NULL);
711 cpu_missing = 1; 710 cpu_missing = 1;
@@ -2437,32 +2436,65 @@ static __init int mcheck_init_device(void)
2437 int err; 2436 int err;
2438 int i = 0; 2437 int i = 0;
2439 2438
2440 if (!mce_available(&boot_cpu_data)) 2439 if (!mce_available(&boot_cpu_data)) {
2441 return -EIO; 2440 err = -EIO;
2441 goto err_out;
2442 }
2442 2443
2443 zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); 2444 if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
2445 err = -ENOMEM;
2446 goto err_out;
2447 }
2444 2448
2445 mce_init_banks(); 2449 mce_init_banks();
2446 2450
2447 err = subsys_system_register(&mce_subsys, NULL); 2451 err = subsys_system_register(&mce_subsys, NULL);
2448 if (err) 2452 if (err)
2449 return err; 2453 goto err_out_mem;
2450 2454
2451 cpu_notifier_register_begin(); 2455 cpu_notifier_register_begin();
2452 for_each_online_cpu(i) { 2456 for_each_online_cpu(i) {
2453 err = mce_device_create(i); 2457 err = mce_device_create(i);
2454 if (err) { 2458 if (err) {
2455 cpu_notifier_register_done(); 2459 cpu_notifier_register_done();
2456 return err; 2460 goto err_device_create;
2457 } 2461 }
2458 } 2462 }
2459 2463
2460 register_syscore_ops(&mce_syscore_ops);
2461 __register_hotcpu_notifier(&mce_cpu_notifier); 2464 __register_hotcpu_notifier(&mce_cpu_notifier);
2462 cpu_notifier_register_done(); 2465 cpu_notifier_register_done();
2463 2466
2467 register_syscore_ops(&mce_syscore_ops);
2468
2464 /* register character device /dev/mcelog */ 2469 /* register character device /dev/mcelog */
2465 misc_register(&mce_chrdev_device); 2470 err = misc_register(&mce_chrdev_device);
2471 if (err)
2472 goto err_register;
2473
2474 return 0;
2475
2476err_register:
2477 unregister_syscore_ops(&mce_syscore_ops);
2478
2479 cpu_notifier_register_begin();
2480 __unregister_hotcpu_notifier(&mce_cpu_notifier);
2481 cpu_notifier_register_done();
2482
2483err_device_create:
2484 /*
2485 * We didn't keep track of which devices were created above, but
2486 * even if we had, the set of online cpus might have changed.
2487 * Play safe and remove for every possible cpu, since
2488 * mce_device_remove() will do the right thing.
2489 */
2490 for_each_possible_cpu(i)
2491 mce_device_remove(i);
2492
2493err_out_mem:
2494 free_cpumask_var(mce_device_initialized);
2495
2496err_out:
2497 pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
2466 2498
2467 return err; 2499 return err;
2468} 2500}