diff options
author | Chen Gong <gong.chen@linux.intel.com> | 2012-05-08 19:40:12 -0400 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-06-11 10:49:51 -0400 |
commit | e35fca4791fcdd43dc1fd769797df40c562ab491 (patch) | |
tree | 929b8537dabe15d32179430e82305493269979dd /drivers/edac | |
parent | cfaf025112d3856637ff34a767ef785ef5cf2ca9 (diff) |
edac: avoid mce decoding crash after edac driver unloaded
Some edac drivers register themselves as mce decoders via
notifier_chain. But in current notifier_chain implementation logic,
it doesn't accept same notifier registered twice. If so, it will be
wrong when adding/removing the element from the list. For example,
on one SandyBridge platform, remove module sb_edac and then trigger
one error, it will hit oops because it has no mce decoder registered
but related notifier_chain still points to an invalid callback
function. Here is an example:
Call Trace:
[<ffffffff8150ef6a>] atomic_notifier_call_chain+0x1a/0x20
[<ffffffff8102b936>] mce_log+0x46/0x180
[<ffffffff8102eaea>] apei_mce_report_mem_error+0x4a/0x60
[<ffffffff812e19d2>] ghes_do_proc+0x192/0x210
[<ffffffff812e2066>] ghes_proc+0x46/0x70
[<ffffffff812e20d8>] ghes_notify_sci+0x48/0x80
[<ffffffff8150ef05>] notifier_call_chain+0x55/0x80
[<ffffffff81076f1a>] __blocking_notifier_call_chain+0x5a/0x80
[<ffffffff812aea11>] ? acpi_os_wait_events_complete+0x23/0x23
[<ffffffff81076f56>] blocking_notifier_call_chain+0x16/0x20
[<ffffffff812ddc4d>] acpi_hed_notify+0x19/0x1b
[<ffffffff812b16bd>] acpi_device_notify+0x19/0x1b
[<ffffffff812beb38>] acpi_ev_notify_dispatch+0x67/0x7f
[<ffffffff812aea3a>] acpi_os_execute_deferred+0x29/0x36
[<ffffffff81069dc2>] process_one_work+0x132/0x450
[<ffffffff8106bbcb>] worker_thread+0x17b/0x3c0
[<ffffffff8106ba50>] ? manage_workers+0x120/0x120
[<ffffffff81070aee>] kthread+0x9e/0xb0
[<ffffffff81514724>] kernel_thread_helper+0x4/0x10
[<ffffffff81070a50>] ? kthread_freezable_should_stop+0x70/0x70
[<ffffffff81514720>] ? gs_change+0x13/0x13
Code: f3 49 89 d4 45 85 ed 4d 89 c6 48 8b 0f 74 48 48 85 c9 75 17 eb 41
0f 1f 80 00 00 00 00 41 83 ed 01 4c 89 f9 74 22 4d 85 ff 74 1d <4c> 8b
79 08 4c 89 e2 48 89 de 48 89 cf ff 11 4d 85 f6 74 04 41
RIP [<ffffffff8150eef6>] notifier_call_chain+0x46/0x80
RSP <ffff88042868fb20>
CR2: ffffffffa01af838
---[ end trace 0100930068e73e6f ]---
BUG: unable to handle kernel paging request at fffffffffffffff8
IP: [<ffffffff810705b0>] kthread_data+0x10/0x20
PGD 1a0d067 PUD 1a0e067 PMD 0
Oops: 0000 [#2] SMP
Only i7core_edac and sb_edac have such issues because they have more
than one memory controller which means they have to register mce
decoder many times.
Cc: <stable@vger.kernel.org> # 3.2 and upper
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/i7core_edac.c | 15 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 8 |
2 files changed, 8 insertions, 15 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index d27778f65a5d..a499c7ed820a 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -1814,12 +1814,6 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, | |||
1814 | if (mce->bank != 8) | 1814 | if (mce->bank != 8) |
1815 | return NOTIFY_DONE; | 1815 | return NOTIFY_DONE; |
1816 | 1816 | ||
1817 | #ifdef CONFIG_SMP | ||
1818 | /* Only handle if it is the right mc controller */ | ||
1819 | if (mce->socketid != pvt->i7core_dev->socket) | ||
1820 | return NOTIFY_DONE; | ||
1821 | #endif | ||
1822 | |||
1823 | smp_rmb(); | 1817 | smp_rmb(); |
1824 | if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { | 1818 | if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { |
1825 | smp_wmb(); | 1819 | smp_wmb(); |
@@ -2116,8 +2110,6 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) | |||
2116 | if (pvt->enable_scrub) | 2110 | if (pvt->enable_scrub) |
2117 | disable_sdram_scrub_setting(mci); | 2111 | disable_sdram_scrub_setting(mci); |
2118 | 2112 | ||
2119 | mce_unregister_decode_chain(&i7_mce_dec); | ||
2120 | |||
2121 | /* Disable EDAC polling */ | 2113 | /* Disable EDAC polling */ |
2122 | i7core_pci_ctl_release(pvt); | 2114 | i7core_pci_ctl_release(pvt); |
2123 | 2115 | ||
@@ -2222,8 +2214,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) | |||
2222 | /* DCLK for scrub rate setting */ | 2214 | /* DCLK for scrub rate setting */ |
2223 | pvt->dclk_freq = get_dclk_freq(); | 2215 | pvt->dclk_freq = get_dclk_freq(); |
2224 | 2216 | ||
2225 | mce_register_decode_chain(&i7_mce_dec); | ||
2226 | |||
2227 | return 0; | 2217 | return 0; |
2228 | 2218 | ||
2229 | fail0: | 2219 | fail0: |
@@ -2367,8 +2357,10 @@ static int __init i7core_init(void) | |||
2367 | 2357 | ||
2368 | pci_rc = pci_register_driver(&i7core_driver); | 2358 | pci_rc = pci_register_driver(&i7core_driver); |
2369 | 2359 | ||
2370 | if (pci_rc >= 0) | 2360 | if (pci_rc >= 0) { |
2361 | mce_register_decode_chain(&i7_mce_dec); | ||
2371 | return 0; | 2362 | return 0; |
2363 | } | ||
2372 | 2364 | ||
2373 | i7core_printk(KERN_ERR, "Failed to register device with error %d.\n", | 2365 | i7core_printk(KERN_ERR, "Failed to register device with error %d.\n", |
2374 | pci_rc); | 2366 | pci_rc); |
@@ -2384,6 +2376,7 @@ static void __exit i7core_exit(void) | |||
2384 | { | 2376 | { |
2385 | debugf2("MC: " __FILE__ ": %s()\n", __func__); | 2377 | debugf2("MC: " __FILE__ ": %s()\n", __func__); |
2386 | pci_unregister_driver(&i7core_driver); | 2378 | pci_unregister_driver(&i7core_driver); |
2379 | mce_unregister_decode_chain(&i7_mce_dec); | ||
2387 | } | 2380 | } |
2388 | 2381 | ||
2389 | module_init(i7core_init); | 2382 | module_init(i7core_init); |
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4adaf4b7da99..a21ace0b709d 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c | |||
@@ -1604,8 +1604,6 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) | |||
1604 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", | 1604 | debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", |
1605 | __func__, mci, &sbridge_dev->pdev[0]->dev); | 1605 | __func__, mci, &sbridge_dev->pdev[0]->dev); |
1606 | 1606 | ||
1607 | mce_unregister_decode_chain(&sbridge_mce_dec); | ||
1608 | |||
1609 | /* Remove MC sysfs nodes */ | 1607 | /* Remove MC sysfs nodes */ |
1610 | edac_mc_del_mc(mci->dev); | 1608 | edac_mc_del_mc(mci->dev); |
1611 | 1609 | ||
@@ -1682,7 +1680,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) | |||
1682 | goto fail0; | 1680 | goto fail0; |
1683 | } | 1681 | } |
1684 | 1682 | ||
1685 | mce_register_decode_chain(&sbridge_mce_dec); | ||
1686 | return 0; | 1683 | return 0; |
1687 | 1684 | ||
1688 | fail0: | 1685 | fail0: |
@@ -1811,8 +1808,10 @@ static int __init sbridge_init(void) | |||
1811 | 1808 | ||
1812 | pci_rc = pci_register_driver(&sbridge_driver); | 1809 | pci_rc = pci_register_driver(&sbridge_driver); |
1813 | 1810 | ||
1814 | if (pci_rc >= 0) | 1811 | if (pci_rc >= 0) { |
1812 | mce_register_decode_chain(&sbridge_mce_dec); | ||
1815 | return 0; | 1813 | return 0; |
1814 | } | ||
1816 | 1815 | ||
1817 | sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", | 1816 | sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", |
1818 | pci_rc); | 1817 | pci_rc); |
@@ -1828,6 +1827,7 @@ static void __exit sbridge_exit(void) | |||
1828 | { | 1827 | { |
1829 | debugf2("MC: " __FILE__ ": %s()\n", __func__); | 1828 | debugf2("MC: " __FILE__ ": %s()\n", __func__); |
1830 | pci_unregister_driver(&sbridge_driver); | 1829 | pci_unregister_driver(&sbridge_driver); |
1830 | mce_unregister_decode_chain(&sbridge_mce_dec); | ||
1831 | } | 1831 | } |
1832 | 1832 | ||
1833 | module_init(sbridge_init); | 1833 | module_init(sbridge_init); |