aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVishal Verma <vishal.l.verma@intel.com>2016-07-24 00:51:21 -0400
committerDan Williams <dan.j.williams@intel.com>2016-07-24 11:04:04 -0400
commit6839a6d96f4ea0254266d60208c1fbbd53ade546 (patch)
treeb1be166d2e7028ce5185e7ad642b5f1598a0cb07
parentbdf97013ced5f263da0dc9d559f5c09e922d8423 (diff)
nfit: do an ARS scrub on hitting a latent media error
When a latent (unknown to 'badblocks') error is encountered, it will trigger a machine check exception. On a system with machine check recovery, this will only SIGBUS the process(es) which had the bad page mapped (as opposed to a kernel panic on platforms without machine check recovery features). In the former case, we want to trigger a full rescan of that nvdimm bus. This will allow any additional, new errors to be captured in the block devices' badblocks lists, and offending operations on them can be trapped early, avoiding machine checks. This is done by registering a callback function with the x86_mce_decoder_chain and calling the new ars_rescan functionality with the address in the mce notificatiion. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Tony Luck <tony.luck@intel.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/acpi/nfit/Makefile1
-rw-r--r--drivers/acpi/nfit/core.c26
-rw-r--r--drivers/acpi/nfit/mce.c89
-rw-r--r--drivers/acpi/nfit/nfit.h20
-rw-r--r--tools/testing/nvdimm/Kbuild1
5 files changed, 133 insertions, 4 deletions
diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile
index eb95c5aff83b..a407e769f103 100644
--- a/drivers/acpi/nfit/Makefile
+++ b/drivers/acpi/nfit/Makefile
@@ -1,2 +1,3 @@
1obj-$(CONFIG_ACPI_NFIT) := nfit.o 1obj-$(CONFIG_ACPI_NFIT) := nfit.o
2nfit-y := core.o 2nfit-y := core.o
3nfit-$(CONFIG_X86_MCE) += mce.o
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 19d0dfdf9633..69b35b7f97a1 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -51,6 +51,9 @@ module_param(disable_vendor_specific, bool, S_IRUGO);
51MODULE_PARM_DESC(disable_vendor_specific, 51MODULE_PARM_DESC(disable_vendor_specific,
52 "Limit commands to the publicly specified set\n"); 52 "Limit commands to the publicly specified set\n");
53 53
54LIST_HEAD(acpi_descs);
55DEFINE_MUTEX(acpi_desc_lock);
56
54static struct workqueue_struct *nfit_wq; 57static struct workqueue_struct *nfit_wq;
55 58
56struct nfit_table_prev { 59struct nfit_table_prev {
@@ -361,7 +364,7 @@ static const char *spa_type_name(u16 type)
361 return to_name[type]; 364 return to_name[type];
362} 365}
363 366
364static int nfit_spa_type(struct acpi_nfit_system_address *spa) 367int nfit_spa_type(struct acpi_nfit_system_address *spa)
365{ 368{
366 int i; 369 int i;
367 370
@@ -898,8 +901,6 @@ static ssize_t scrub_show(struct device *dev,
898 return rc; 901 return rc;
899} 902}
900 903
901static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
902
903static ssize_t scrub_store(struct device *dev, 904static ssize_t scrub_store(struct device *dev,
904 struct device_attribute *attr, const char *buf, size_t size) 905 struct device_attribute *attr, const char *buf, size_t size)
905{ 906{
@@ -2400,6 +2401,11 @@ static void acpi_nfit_destruct(void *data)
2400 struct acpi_nfit_desc *acpi_desc = data; 2401 struct acpi_nfit_desc *acpi_desc = data;
2401 struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2402 struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2402 2403
2404 /*
2405 * Destruct under acpi_desc_lock so that nfit_handle_mce does not
2406 * race teardown
2407 */
2408 mutex_lock(&acpi_desc_lock);
2403 acpi_desc->cancel = 1; 2409 acpi_desc->cancel = 1;
2404 /* 2410 /*
2405 * Bounce the nvdimm bus lock to make sure any in-flight 2411 * Bounce the nvdimm bus lock to make sure any in-flight
@@ -2414,6 +2420,8 @@ static void acpi_nfit_destruct(void *data)
2414 sysfs_put(acpi_desc->scrub_count_state); 2420 sysfs_put(acpi_desc->scrub_count_state);
2415 nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2421 nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
2416 acpi_desc->nvdimm_bus = NULL; 2422 acpi_desc->nvdimm_bus = NULL;
2423 list_del(&acpi_desc->list);
2424 mutex_unlock(&acpi_desc_lock);
2417} 2425}
2418 2426
2419int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) 2427int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
@@ -2439,6 +2447,11 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
2439 rc = acpi_nfit_desc_init_scrub_attr(acpi_desc); 2447 rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
2440 if (rc) 2448 if (rc)
2441 return rc; 2449 return rc;
2450
2451 /* register this acpi_desc for mce notifications */
2452 mutex_lock(&acpi_desc_lock);
2453 list_add_tail(&acpi_desc->list, &acpi_descs);
2454 mutex_unlock(&acpi_desc_lock);
2442 } 2455 }
2443 2456
2444 mutex_lock(&acpi_desc->init_mutex); 2457 mutex_lock(&acpi_desc->init_mutex);
@@ -2549,7 +2562,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
2549 return 0; 2562 return 0;
2550} 2563}
2551 2564
2552static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) 2565int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
2553{ 2566{
2554 struct device *dev = acpi_desc->dev; 2567 struct device *dev = acpi_desc->dev;
2555 struct nfit_spa *nfit_spa; 2568 struct nfit_spa *nfit_spa;
@@ -2598,6 +2611,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
2598 INIT_LIST_HEAD(&acpi_desc->flushes); 2611 INIT_LIST_HEAD(&acpi_desc->flushes);
2599 INIT_LIST_HEAD(&acpi_desc->memdevs); 2612 INIT_LIST_HEAD(&acpi_desc->memdevs);
2600 INIT_LIST_HEAD(&acpi_desc->dimms); 2613 INIT_LIST_HEAD(&acpi_desc->dimms);
2614 INIT_LIST_HEAD(&acpi_desc->list);
2601 mutex_init(&acpi_desc->init_mutex); 2615 mutex_init(&acpi_desc->init_mutex);
2602 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); 2616 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
2603} 2617}
@@ -2750,13 +2764,17 @@ static __init int nfit_init(void)
2750 if (!nfit_wq) 2764 if (!nfit_wq)
2751 return -ENOMEM; 2765 return -ENOMEM;
2752 2766
2767 nfit_mce_register();
2768
2753 return acpi_bus_register_driver(&acpi_nfit_driver); 2769 return acpi_bus_register_driver(&acpi_nfit_driver);
2754} 2770}
2755 2771
2756static __exit void nfit_exit(void) 2772static __exit void nfit_exit(void)
2757{ 2773{
2774 nfit_mce_unregister();
2758 acpi_bus_unregister_driver(&acpi_nfit_driver); 2775 acpi_bus_unregister_driver(&acpi_nfit_driver);
2759 destroy_workqueue(nfit_wq); 2776 destroy_workqueue(nfit_wq);
2777 WARN_ON(!list_empty(&acpi_descs));
2760} 2778}
2761 2779
2762module_init(nfit_init); 2780module_init(nfit_init);
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
new file mode 100644
index 000000000000..4c745bf389fe
--- /dev/null
+++ b/drivers/acpi/nfit/mce.c
@@ -0,0 +1,89 @@
1/*
2 * NFIT - Machine Check Handler
3 *
4 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15#include <linux/notifier.h>
16#include <linux/acpi.h>
17#include <asm/mce.h>
18#include "nfit.h"
19
20static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
21 void *data)
22{
23 struct mce *mce = (struct mce *)data;
24 struct acpi_nfit_desc *acpi_desc;
25 struct nfit_spa *nfit_spa;
26
27 /* We only care about memory errors */
28 if (!(mce->status & MCACOD))
29 return NOTIFY_DONE;
30
31 /*
32 * mce->addr contains the physical addr accessed that caused the
33 * machine check. We need to walk through the list of NFITs, and see
34 * if any of them matches that address, and only then start a scrub.
35 */
36 mutex_lock(&acpi_desc_lock);
37 list_for_each_entry(acpi_desc, &acpi_descs, list) {
38 struct device *dev = acpi_desc->dev;
39 int found_match = 0;
40
41 mutex_lock(&acpi_desc->init_mutex);
42 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
43 struct acpi_nfit_system_address *spa = nfit_spa->spa;
44
45 if (nfit_spa_type(spa) == NFIT_SPA_PM)
46 continue;
47 /* find the spa that covers the mce addr */
48 if (spa->address > mce->addr)
49 continue;
50 if ((spa->address + spa->length - 1) < mce->addr)
51 continue;
52 found_match = 1;
53 dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
54 __func__, spa->range_index, spa->address,
55 spa->length);
56 /*
57 * We can break at the first match because we're going
58 * to rescan all the SPA ranges. There shouldn't be any
59 * aliasing anyway.
60 */
61 break;
62 }
63 mutex_unlock(&acpi_desc->init_mutex);
64
65 /*
66 * We can ignore an -EBUSY here because if an ARS is already
67 * in progress, just let that be the last authoritative one
68 */
69 if (found_match)
70 acpi_nfit_ars_rescan(acpi_desc);
71 }
72
73 mutex_unlock(&acpi_desc_lock);
74 return NOTIFY_DONE;
75}
76
77static struct notifier_block nfit_mce_dec = {
78 .notifier_call = nfit_handle_mce,
79};
80
81void nfit_mce_register(void)
82{
83 mce_register_decode_chain(&nfit_mce_dec);
84}
85
86void nfit_mce_unregister(void)
87{
88 mce_unregister_decode_chain(&nfit_mce_dec);
89}
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 6ecf337c97aa..ba6074a06958 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -16,6 +16,7 @@
16#define __NFIT_H__ 16#define __NFIT_H__
17#include <linux/workqueue.h> 17#include <linux/workqueue.h>
18#include <linux/libnvdimm.h> 18#include <linux/libnvdimm.h>
19#include <linux/ndctl.h>
19#include <linux/types.h> 20#include <linux/types.h>
20#include <linux/uuid.h> 21#include <linux/uuid.h>
21#include <linux/acpi.h> 22#include <linux/acpi.h>
@@ -148,6 +149,7 @@ struct acpi_nfit_desc {
148 struct nd_cmd_ars_status *ars_status; 149 struct nd_cmd_ars_status *ars_status;
149 size_t ars_status_size; 150 size_t ars_status_size;
150 struct work_struct work; 151 struct work_struct work;
152 struct list_head list;
151 struct kernfs_node *scrub_count_state; 153 struct kernfs_node *scrub_count_state;
152 unsigned int scrub_count; 154 unsigned int scrub_count;
153 unsigned int cancel:1; 155 unsigned int cancel:1;
@@ -187,6 +189,24 @@ struct nfit_blk {
187 u32 dimm_flags; 189 u32 dimm_flags;
188}; 190};
189 191
192extern struct list_head acpi_descs;
193extern struct mutex acpi_desc_lock;
194int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
195
196#ifdef CONFIG_X86_MCE
197void nfit_mce_register(void);
198void nfit_mce_unregister(void);
199#else
200static inline void nfit_mce_register(void)
201{
202}
203static inline void nfit_mce_unregister(void)
204{
205}
206#endif
207
208int nfit_spa_type(struct acpi_nfit_system_address *spa);
209
190static inline struct acpi_nfit_memory_map *__to_nfit_memdev( 210static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
191 struct nfit_mem *nfit_mem) 211 struct nfit_mem *nfit_mem)
192{ 212{
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 0dca8ff7557b..ad6dd0543019 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -30,6 +30,7 @@ obj-$(CONFIG_DEV_DAX) += dax.o
30obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 30obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
31 31
32nfit-y := $(ACPI_SRC)/core.o 32nfit-y := $(ACPI_SRC)/core.o
33nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
33nfit-y += config_check.o 34nfit-y += config_check.o
34 35
35nd_pmem-y := $(NVDIMM_SRC)/pmem.o 36nd_pmem-y := $(NVDIMM_SRC)/pmem.o