diff options
author | Qiuxu Zhuo <qiuxu.zhuo@intel.com> | 2018-10-21 03:18:56 -0400 |
---|---|---|
committer | Borislav Petkov <bp@suse.de> | 2018-10-25 10:59:18 -0400 |
commit | ad6e16059d8e00de0887885db11d87cba0bd1512 (patch) | |
tree | d31d3eb322c81a1b231c7e112f912926919a69b3 | |
parent | 36168d7123311d52e085c116f6c66e16f0b84615 (diff) |
EDAC, skx_edac: Add address translation for non-volatile DIMMs
Currently, this driver doesn't support address translation for
non-volatile DIMMs.
The ACPI ADXL DSM method provides address translation for both volatile
and non-volatile DIMMs. Enable it to use the ACPI DSM methods if they
are supported and there are non-volatile DIMMs populated on the system.
Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
CC: Mauro Carvalho Chehab <mchehab@kernel.org>
CC: arozansk@redhat.com
CC: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1540106336-5212-1-git-send-email-qiuxu.zhuo@intel.com
-rw-r--r-- | drivers/edac/Kconfig | 1 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 193 |
2 files changed, 181 insertions, 13 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 57304b2e989f..ffd349c12479 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
@@ -234,6 +234,7 @@ config EDAC_SKX | |||
234 | depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG | 234 | depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG |
235 | depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y | 235 | depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y |
236 | select DMI | 236 | select DMI |
237 | select ACPI_ADXL | ||
237 | help | 238 | help |
238 | Support for error detection and correction the Intel | 239 | Support for error detection and correction the Intel |
239 | Skylake server Integrated Memory Controllers. If your | 240 | Skylake server Integrated Memory Controllers. If your |
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index dd209e0dd9ab..a99ea61dad32 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/bitmap.h> | 26 | #include <linux/bitmap.h> |
27 | #include <linux/math64.h> | 27 | #include <linux/math64.h> |
28 | #include <linux/mod_devicetable.h> | 28 | #include <linux/mod_devicetable.h> |
29 | #include <linux/adxl.h> | ||
29 | #include <acpi/nfit.h> | 30 | #include <acpi/nfit.h> |
30 | #include <asm/cpu_device_id.h> | 31 | #include <asm/cpu_device_id.h> |
31 | #include <asm/intel-family.h> | 32 | #include <asm/intel-family.h> |
@@ -35,6 +36,7 @@ | |||
35 | #include "edac_module.h" | 36 | #include "edac_module.h" |
36 | 37 | ||
37 | #define EDAC_MOD_STR "skx_edac" | 38 | #define EDAC_MOD_STR "skx_edac" |
39 | #define MSG_SIZE 1024 | ||
38 | 40 | ||
39 | /* | 41 | /* |
40 | * Debug macros | 42 | * Debug macros |
@@ -54,6 +56,29 @@ | |||
54 | static LIST_HEAD(skx_edac_list); | 56 | static LIST_HEAD(skx_edac_list); |
55 | 57 | ||
56 | static u64 skx_tolm, skx_tohm; | 58 | static u64 skx_tolm, skx_tohm; |
59 | static char *skx_msg; | ||
60 | static unsigned int nvdimm_count; | ||
61 | |||
62 | enum { | ||
63 | INDEX_SOCKET, | ||
64 | INDEX_MEMCTRL, | ||
65 | INDEX_CHANNEL, | ||
66 | INDEX_DIMM, | ||
67 | INDEX_MAX | ||
68 | }; | ||
69 | |||
70 | static const char * const component_names[] = { | ||
71 | [INDEX_SOCKET] = "ProcessorSocketId", | ||
72 | [INDEX_MEMCTRL] = "MemoryControllerId", | ||
73 | [INDEX_CHANNEL] = "ChannelId", | ||
74 | [INDEX_DIMM] = "DimmSlotId", | ||
75 | }; | ||
76 | |||
77 | static int component_indices[ARRAY_SIZE(component_names)]; | ||
78 | static int adxl_component_count; | ||
79 | static const char * const *adxl_component_names; | ||
80 | static u64 *adxl_values; | ||
81 | static char *adxl_msg; | ||
57 | 82 | ||
58 | #define NUM_IMC 2 /* memory controllers per socket */ | 83 | #define NUM_IMC 2 /* memory controllers per socket */ |
59 | #define NUM_CHANNELS 3 /* channels per memory controller */ | 84 | #define NUM_CHANNELS 3 /* channels per memory controller */ |
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, | |||
393 | u16 flags; | 418 | u16 flags; |
394 | u64 size = 0; | 419 | u64 size = 0; |
395 | 420 | ||
421 | nvdimm_count++; | ||
422 | |||
396 | dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, | 423 | dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, |
397 | imc->src_id, 0); | 424 | imc->src_id, 0); |
398 | 425 | ||
@@ -941,12 +968,46 @@ static void teardown_skx_debug(void) | |||
941 | } | 968 | } |
942 | #endif /*CONFIG_EDAC_DEBUG*/ | 969 | #endif /*CONFIG_EDAC_DEBUG*/ |
943 | 970 | ||
971 | static bool skx_adxl_decode(struct decoded_addr *res) | ||
972 | |||
973 | { | ||
974 | int i, len = 0; | ||
975 | |||
976 | if (res->addr >= skx_tohm || (res->addr >= skx_tolm && | ||
977 | res->addr < BIT_ULL(32))) { | ||
978 | edac_dbg(0, "Address 0x%llx out of range\n", res->addr); | ||
979 | return false; | ||
980 | } | ||
981 | |||
982 | if (adxl_decode(res->addr, adxl_values)) { | ||
983 | edac_dbg(0, "Failed to decode 0x%llx\n", res->addr); | ||
984 | return false; | ||
985 | } | ||
986 | |||
987 | res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; | ||
988 | res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; | ||
989 | res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; | ||
990 | res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; | ||
991 | |||
992 | for (i = 0; i < adxl_component_count; i++) { | ||
993 | if (adxl_values[i] == ~0x0ull) | ||
994 | continue; | ||
995 | |||
996 | len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx", | ||
997 | adxl_component_names[i], adxl_values[i]); | ||
998 | if (MSG_SIZE - len <= 0) | ||
999 | break; | ||
1000 | } | ||
1001 | |||
1002 | return true; | ||
1003 | } | ||
1004 | |||
944 | static void skx_mce_output_error(struct mem_ctl_info *mci, | 1005 | static void skx_mce_output_error(struct mem_ctl_info *mci, |
945 | const struct mce *m, | 1006 | const struct mce *m, |
946 | struct decoded_addr *res) | 1007 | struct decoded_addr *res) |
947 | { | 1008 | { |
948 | enum hw_event_mc_err_type tp_event; | 1009 | enum hw_event_mc_err_type tp_event; |
949 | char *type, *optype, msg[256]; | 1010 | char *type, *optype; |
950 | bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); | 1011 | bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); |
951 | bool overflow = GET_BITFIELD(m->status, 62, 62); | 1012 | bool overflow = GET_BITFIELD(m->status, 62, 62); |
952 | bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); | 1013 | bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); |
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, | |||
1007 | break; | 1068 | break; |
1008 | } | 1069 | } |
1009 | } | 1070 | } |
1071 | if (adxl_component_count) { | ||
1072 | snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s", | ||
1073 | overflow ? " OVERFLOW" : "", | ||
1074 | (uncorrected_error && recoverable) ? " recoverable" : "", | ||
1075 | mscod, errcode, adxl_msg); | ||
1076 | } else { | ||
1077 | snprintf(skx_msg, MSG_SIZE, | ||
1078 | "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", | ||
1079 | overflow ? " OVERFLOW" : "", | ||
1080 | (uncorrected_error && recoverable) ? " recoverable" : "", | ||
1081 | mscod, errcode, | ||
1082 | res->socket, res->imc, res->rank, | ||
1083 | res->bank_group, res->bank_address, res->row, res->column); | ||
1084 | } | ||
1010 | 1085 | ||
1011 | snprintf(msg, sizeof(msg), | 1086 | edac_dbg(0, "%s\n", skx_msg); |
1012 | "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", | ||
1013 | overflow ? " OVERFLOW" : "", | ||
1014 | (uncorrected_error && recoverable) ? " recoverable" : "", | ||
1015 | mscod, errcode, | ||
1016 | res->socket, res->imc, res->rank, | ||
1017 | res->bank_group, res->bank_address, res->row, res->column); | ||
1018 | |||
1019 | edac_dbg(0, "%s\n", msg); | ||
1020 | 1087 | ||
1021 | /* Call the helper to output message */ | 1088 | /* Call the helper to output message */ |
1022 | edac_mc_handle_error(tp_event, mci, core_err_cnt, | 1089 | edac_mc_handle_error(tp_event, mci, core_err_cnt, |
1023 | m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, | 1090 | m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, |
1024 | res->channel, res->dimm, -1, | 1091 | res->channel, res->dimm, -1, |
1025 | optype, msg); | 1092 | optype, skx_msg); |
1093 | } | ||
1094 | |||
1095 | static struct mem_ctl_info *get_mci(int src_id, int lmc) | ||
1096 | { | ||
1097 | struct skx_dev *d; | ||
1098 | |||
1099 | if (lmc > NUM_IMC - 1) { | ||
1100 | skx_printk(KERN_ERR, "Bad lmc %d\n", lmc); | ||
1101 | return NULL; | ||
1102 | } | ||
1103 | |||
1104 | list_for_each_entry(d, &skx_edac_list, list) { | ||
1105 | if (d->imc[0].src_id == src_id) | ||
1106 | return d->imc[lmc].mci; | ||
1107 | } | ||
1108 | |||
1109 | skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc); | ||
1110 | |||
1111 | return NULL; | ||
1026 | } | 1112 | } |
1027 | 1113 | ||
1028 | static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, | 1114 | static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, |
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, | |||
1040 | if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) | 1126 | if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) |
1041 | return NOTIFY_DONE; | 1127 | return NOTIFY_DONE; |
1042 | 1128 | ||
1129 | memset(&res, 0, sizeof(res)); | ||
1043 | res.addr = mce->addr; | 1130 | res.addr = mce->addr; |
1044 | if (!skx_decode(&res)) | 1131 | |
1132 | if (adxl_component_count) { | ||
1133 | if (!skx_adxl_decode(&res)) | ||
1134 | return NOTIFY_DONE; | ||
1135 | |||
1136 | mci = get_mci(res.socket, res.imc); | ||
1137 | } else { | ||
1138 | if (!skx_decode(&res)) | ||
1139 | return NOTIFY_DONE; | ||
1140 | |||
1141 | mci = res.dev->imc[res.imc].mci; | ||
1142 | } | ||
1143 | |||
1144 | if (!mci) | ||
1045 | return NOTIFY_DONE; | 1145 | return NOTIFY_DONE; |
1046 | mci = res.dev->imc[res.imc].mci; | ||
1047 | 1146 | ||
1048 | if (mce->mcgstatus & MCG_STATUS_MCIP) | 1147 | if (mce->mcgstatus & MCG_STATUS_MCIP) |
1049 | type = "Exception"; | 1148 | type = "Exception"; |
@@ -1094,6 +1193,62 @@ static void skx_remove(void) | |||
1094 | } | 1193 | } |
1095 | } | 1194 | } |
1096 | 1195 | ||
1196 | static void __init skx_adxl_get(void) | ||
1197 | { | ||
1198 | const char * const *names; | ||
1199 | int i, j; | ||
1200 | |||
1201 | names = adxl_get_component_names(); | ||
1202 | if (!names) { | ||
1203 | skx_printk(KERN_NOTICE, "No firmware support for address translation."); | ||
1204 | skx_printk(KERN_CONT, " Only decoding DDR4 address!\n"); | ||
1205 | return; | ||
1206 | } | ||
1207 | |||
1208 | for (i = 0; i < INDEX_MAX; i++) { | ||
1209 | for (j = 0; names[j]; j++) { | ||
1210 | if (!strcmp(component_names[i], names[j])) { | ||
1211 | component_indices[i] = j; | ||
1212 | break; | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | if (!names[j]) | ||
1217 | goto err; | ||
1218 | } | ||
1219 | |||
1220 | adxl_component_names = names; | ||
1221 | while (*names++) | ||
1222 | adxl_component_count++; | ||
1223 | |||
1224 | adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), | ||
1225 | GFP_KERNEL); | ||
1226 | if (!adxl_values) { | ||
1227 | adxl_component_count = 0; | ||
1228 | return; | ||
1229 | } | ||
1230 | |||
1231 | adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL); | ||
1232 | if (!adxl_msg) { | ||
1233 | adxl_component_count = 0; | ||
1234 | kfree(adxl_values); | ||
1235 | } | ||
1236 | |||
1237 | return; | ||
1238 | err: | ||
1239 | skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ", | ||
1240 | component_names[i]); | ||
1241 | for (j = 0; names[j]; j++) | ||
1242 | skx_printk(KERN_CONT, "%s ", names[j]); | ||
1243 | skx_printk(KERN_CONT, "\n"); | ||
1244 | } | ||
1245 | |||
1246 | static void __exit skx_adxl_put(void) | ||
1247 | { | ||
1248 | kfree(adxl_values); | ||
1249 | kfree(adxl_msg); | ||
1250 | } | ||
1251 | |||
1097 | /* | 1252 | /* |
1098 | * skx_init: | 1253 | * skx_init: |
1099 | * make sure we are running on the correct cpu model | 1254 | * make sure we are running on the correct cpu model |
@@ -1158,6 +1313,15 @@ static int __init skx_init(void) | |||
1158 | } | 1313 | } |
1159 | } | 1314 | } |
1160 | 1315 | ||
1316 | skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL); | ||
1317 | if (!skx_msg) { | ||
1318 | rc = -ENOMEM; | ||
1319 | goto fail; | ||
1320 | } | ||
1321 | |||
1322 | if (nvdimm_count) | ||
1323 | skx_adxl_get(); | ||
1324 | |||
1161 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ | 1325 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ |
1162 | opstate_init(); | 1326 | opstate_init(); |
1163 | 1327 | ||
@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void) | |||
1176 | edac_dbg(2, "\n"); | 1340 | edac_dbg(2, "\n"); |
1177 | mce_unregister_decode_chain(&skx_mce_dec); | 1341 | mce_unregister_decode_chain(&skx_mce_dec); |
1178 | skx_remove(); | 1342 | skx_remove(); |
1343 | if (nvdimm_count) | ||
1344 | skx_adxl_put(); | ||
1345 | kfree(skx_msg); | ||
1179 | teardown_skx_debug(); | 1346 | teardown_skx_debug(); |
1180 | } | 1347 | } |
1181 | 1348 | ||