diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-03 18:47:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-03 18:47:40 -0400 |
commit | 06b77b97338e906e8af73a9b5f97b9162aac239d (patch) | |
tree | fc4fc5c5153dc5a1979e7d46b08dea20054a1d9e | |
parent | 4aef77b2fe373cdba461925589b9d1d4468ee016 (diff) | |
parent | 716079f66eacd31d040db9cd0627ca0d625d6126 (diff) |
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next
Pull x86 RAS changes from Ingo Molnar:
"Improve mcheck device initialization and bootstrap robustness"
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
mce: Panic when a core has reached a timeout
x86/mce: Improve mcheck_init_device() error handling
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 50 |
1 files changed, 41 insertions, 9 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 68317c80de7f..6cc800381d14 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -704,8 +704,7 @@ static int mce_timed_out(u64 *t) | |||
704 | if (!mca_cfg.monarch_timeout) | 704 | if (!mca_cfg.monarch_timeout) |
705 | goto out; | 705 | goto out; |
706 | if ((s64)*t < SPINUNIT) { | 706 | if ((s64)*t < SPINUNIT) { |
707 | /* CHECKME: Make panic default for 1 too? */ | 707 | if (mca_cfg.tolerant <= 1) |
708 | if (mca_cfg.tolerant < 1) | ||
709 | mce_panic("Timeout synchronizing machine check over CPUs", | 708 | mce_panic("Timeout synchronizing machine check over CPUs", |
710 | NULL, NULL); | 709 | NULL, NULL); |
711 | cpu_missing = 1; | 710 | cpu_missing = 1; |
@@ -2437,32 +2436,65 @@ static __init int mcheck_init_device(void) | |||
2437 | int err; | 2436 | int err; |
2438 | int i = 0; | 2437 | int i = 0; |
2439 | 2438 | ||
2440 | if (!mce_available(&boot_cpu_data)) | 2439 | if (!mce_available(&boot_cpu_data)) { |
2441 | return -EIO; | 2440 | err = -EIO; |
2441 | goto err_out; | ||
2442 | } | ||
2442 | 2443 | ||
2443 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); | 2444 | if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { |
2445 | err = -ENOMEM; | ||
2446 | goto err_out; | ||
2447 | } | ||
2444 | 2448 | ||
2445 | mce_init_banks(); | 2449 | mce_init_banks(); |
2446 | 2450 | ||
2447 | err = subsys_system_register(&mce_subsys, NULL); | 2451 | err = subsys_system_register(&mce_subsys, NULL); |
2448 | if (err) | 2452 | if (err) |
2449 | return err; | 2453 | goto err_out_mem; |
2450 | 2454 | ||
2451 | cpu_notifier_register_begin(); | 2455 | cpu_notifier_register_begin(); |
2452 | for_each_online_cpu(i) { | 2456 | for_each_online_cpu(i) { |
2453 | err = mce_device_create(i); | 2457 | err = mce_device_create(i); |
2454 | if (err) { | 2458 | if (err) { |
2455 | cpu_notifier_register_done(); | 2459 | cpu_notifier_register_done(); |
2456 | return err; | 2460 | goto err_device_create; |
2457 | } | 2461 | } |
2458 | } | 2462 | } |
2459 | 2463 | ||
2460 | register_syscore_ops(&mce_syscore_ops); | ||
2461 | __register_hotcpu_notifier(&mce_cpu_notifier); | 2464 | __register_hotcpu_notifier(&mce_cpu_notifier); |
2462 | cpu_notifier_register_done(); | 2465 | cpu_notifier_register_done(); |
2463 | 2466 | ||
2467 | register_syscore_ops(&mce_syscore_ops); | ||
2468 | |||
2464 | /* register character device /dev/mcelog */ | 2469 | /* register character device /dev/mcelog */ |
2465 | misc_register(&mce_chrdev_device); | 2470 | err = misc_register(&mce_chrdev_device); |
2471 | if (err) | ||
2472 | goto err_register; | ||
2473 | |||
2474 | return 0; | ||
2475 | |||
2476 | err_register: | ||
2477 | unregister_syscore_ops(&mce_syscore_ops); | ||
2478 | |||
2479 | cpu_notifier_register_begin(); | ||
2480 | __unregister_hotcpu_notifier(&mce_cpu_notifier); | ||
2481 | cpu_notifier_register_done(); | ||
2482 | |||
2483 | err_device_create: | ||
2484 | /* | ||
2485 | * We didn't keep track of which devices were created above, but | ||
2486 | * even if we had, the set of online cpus might have changed. | ||
2487 | * Play safe and remove for every possible cpu, since | ||
2488 | * mce_device_remove() will do the right thing. | ||
2489 | */ | ||
2490 | for_each_possible_cpu(i) | ||
2491 | mce_device_remove(i); | ||
2492 | |||
2493 | err_out_mem: | ||
2494 | free_cpumask_var(mce_device_initialized); | ||
2495 | |||
2496 | err_out: | ||
2497 | pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err); | ||
2466 | 2498 | ||
2467 | return err; | 2499 | return err; |
2468 | } | 2500 | } |