diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2010-09-01 08:45:20 -0400 |
---|---|---|
committer | Borislav Petkov <bp@amd64.org> | 2010-10-21 08:47:58 -0400 |
commit | 7cfd4a87441f5ca3018fdd1f7ad67e8a73a05dc2 (patch) | |
tree | a74a1bb40d2ef3b5a66551562cdf775b63a27c8f /drivers/edac | |
parent | 6337583d7dc0dced36ab98dd63de2389c95c22d9 (diff) |
EDAC, MCE: Pass complete MCE info to decoders
... instead of the MCi_STATUS info only for improved handling of certain
types of errors later.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/amd64_edac.c | 13 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_dbg.c | 10 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 74 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.h | 6 |
4 files changed, 56 insertions, 47 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e7d5d6b5dcf6..76f7cc0ee149 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -2073,11 +2073,18 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
2073 | amd64_handle_ue(mci, info); | 2073 | amd64_handle_ue(mci, info); |
2074 | } | 2074 | } |
2075 | 2075 | ||
2076 | void amd64_decode_bus_error(int node_id, struct err_regs *regs) | 2076 | void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) |
2077 | { | 2077 | { |
2078 | struct mem_ctl_info *mci = mci_lookup[node_id]; | 2078 | struct mem_ctl_info *mci = mci_lookup[node_id]; |
2079 | struct err_regs regs; | ||
2079 | 2080 | ||
2080 | __amd64_decode_bus_error(mci, regs); | 2081 | regs.nbsl = (u32) m->status; |
2082 | regs.nbsh = (u32)(m->status >> 32); | ||
2083 | regs.nbeal = (u32) m->addr; | ||
2084 | regs.nbeah = (u32)(m->addr >> 32); | ||
2085 | regs.nbcfg = nbcfg; | ||
2086 | |||
2087 | __amd64_decode_bus_error(mci, ®s); | ||
2081 | 2088 | ||
2082 | /* | 2089 | /* |
2083 | * Check the UE bit of the NB status high register, if set generate some | 2090 | * Check the UE bit of the NB status high register, if set generate some |
@@ -2086,7 +2093,7 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs) | |||
2086 | * | 2093 | * |
2087 | * FIXME: this should go somewhere else, if at all. | 2094 | * FIXME: this should go somewhere else, if at all. |
2088 | */ | 2095 | */ |
2089 | if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) | 2096 | if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors) |
2090 | edac_mc_handle_ue_no_info(mci, "UE bit is set"); | 2097 | edac_mc_handle_ue_no_info(mci, "UE bit is set"); |
2091 | 2098 | ||
2092 | } | 2099 | } |
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c index 22ef3fecf569..f6d5695de5b6 100644 --- a/drivers/edac/amd64_edac_dbg.c +++ b/drivers/edac/amd64_edac_dbg.c | |||
@@ -10,11 +10,14 @@ static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data, | |||
10 | size_t count) | 10 | size_t count) |
11 | { | 11 | { |
12 | struct amd64_pvt *pvt = mci->pvt_info; | 12 | struct amd64_pvt *pvt = mci->pvt_info; |
13 | unsigned long long value; | 13 | u64 value; |
14 | int ret = 0; | 14 | int ret = 0; |
15 | struct mce m; | ||
15 | 16 | ||
16 | ret = strict_strtoull(data, 16, &value); | 17 | ret = strict_strtoull(data, 16, &value); |
17 | if (ret != -EINVAL) { | 18 | if (ret != -EINVAL) { |
19 | struct err_regs *regs = &pvt->ctl_error_info; | ||
20 | |||
18 | debugf0("received NBEA= 0x%llx\n", value); | 21 | debugf0("received NBEA= 0x%llx\n", value); |
19 | 22 | ||
20 | /* place the value into the virtual error packet */ | 23 | /* place the value into the virtual error packet */ |
@@ -22,9 +25,12 @@ static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data, | |||
22 | value >>= 32; | 25 | value >>= 32; |
23 | pvt->ctl_error_info.nbeah = (u32) value; | 26 | pvt->ctl_error_info.nbeah = (u32) value; |
24 | 27 | ||
28 | m.addr = value; | ||
29 | m.status = regs->nbsl | ((u64)regs->nbsh << 32); | ||
30 | |||
25 | /* Process the Mapping request */ | 31 | /* Process the Mapping request */ |
26 | /* TODO: Add race prevention */ | 32 | /* TODO: Add race prevention */ |
27 | amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info); | 33 | amd_decode_nb_mce(pvt->mc_node_id, &m, regs->nbcfg); |
28 | 34 | ||
29 | return count; | 35 | return count; |
30 | } | 36 | } |
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index d0e850eea50a..6cfa881888bc 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include "edac_mce_amd.h" | 2 | #include "edac_mce_amd.h" |
3 | 3 | ||
4 | static bool report_gart_errors; | 4 | static bool report_gart_errors; |
5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); | 5 | static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); |
6 | 6 | ||
7 | void amd_report_gart_errors(bool v) | 7 | void amd_report_gart_errors(bool v) |
8 | { | 8 | { |
@@ -10,13 +10,13 @@ void amd_report_gart_errors(bool v) | |||
10 | } | 10 | } |
11 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); | 11 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); |
12 | 12 | ||
13 | void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)) | 13 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)) |
14 | { | 14 | { |
15 | nb_bus_decoder = f; | 15 | nb_bus_decoder = f; |
16 | } | 16 | } |
17 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); | 17 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); |
18 | 18 | ||
19 | void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)) | 19 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)) |
20 | { | 20 | { |
21 | if (nb_bus_decoder) { | 21 | if (nb_bus_decoder) { |
22 | WARN_ON(nb_bus_decoder != f); | 22 | WARN_ON(nb_bus_decoder != f); |
@@ -97,17 +97,17 @@ const char *ext_msgs[] = { | |||
97 | }; | 97 | }; |
98 | EXPORT_SYMBOL_GPL(ext_msgs); | 98 | EXPORT_SYMBOL_GPL(ext_msgs); |
99 | 99 | ||
100 | static void amd_decode_dc_mce(u64 mc0_status) | 100 | static void amd_decode_dc_mce(struct mce *m) |
101 | { | 101 | { |
102 | u32 ec = mc0_status & 0xffff; | 102 | u32 ec = m->status & 0xffff; |
103 | u32 xec = (mc0_status >> 16) & 0xf; | 103 | u32 xec = (m->status >> 16) & 0xf; |
104 | 104 | ||
105 | pr_emerg(HW_ERR "Data Cache Error: "); | 105 | pr_emerg(HW_ERR "Data Cache Error: "); |
106 | 106 | ||
107 | if (xec == 1 && TLB_ERROR(ec)) | 107 | if (xec == 1 && TLB_ERROR(ec)) |
108 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); | 108 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); |
109 | else if (xec == 0) { | 109 | else if (xec == 0) { |
110 | if (mc0_status & (1ULL << 40)) | 110 | if (m->status & (1ULL << 40)) |
111 | pr_cont(" during Data Scrub.\n"); | 111 | pr_cont(" during Data Scrub.\n"); |
112 | else if (TLB_ERROR(ec)) | 112 | else if (TLB_ERROR(ec)) |
113 | pr_cont(": %s TLB parity error.\n", LL_MSG(ec)); | 113 | pr_cont(": %s TLB parity error.\n", LL_MSG(ec)); |
@@ -140,10 +140,10 @@ wrong_dc_mce: | |||
140 | pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); | 140 | pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); |
141 | } | 141 | } |
142 | 142 | ||
143 | static void amd_decode_ic_mce(u64 mc1_status) | 143 | static void amd_decode_ic_mce(struct mce *m) |
144 | { | 144 | { |
145 | u32 ec = mc1_status & 0xffff; | 145 | u32 ec = m->status & 0xffff; |
146 | u32 xec = (mc1_status >> 16) & 0xf; | 146 | u32 xec = (m->status >> 16) & 0xf; |
147 | 147 | ||
148 | pr_emerg(HW_ERR "Instruction Cache Error"); | 148 | pr_emerg(HW_ERR "Instruction Cache Error"); |
149 | 149 | ||
@@ -154,7 +154,7 @@ static void amd_decode_ic_mce(u64 mc1_status) | |||
154 | pr_cont(": %s TLB Parity error.\n", LL_MSG(ec)); | 154 | pr_cont(": %s TLB Parity error.\n", LL_MSG(ec)); |
155 | else if (BUS_ERROR(ec)) { | 155 | else if (BUS_ERROR(ec)) { |
156 | if (boot_cpu_data.x86 == 0xf && | 156 | if (boot_cpu_data.x86 == 0xf && |
157 | (mc1_status & (1ULL << 58))) | 157 | (m->status & BIT(58))) |
158 | pr_cont(" during system linefill.\n"); | 158 | pr_cont(" during system linefill.\n"); |
159 | else | 159 | else |
160 | pr_cont(" during attempted NB data read.\n"); | 160 | pr_cont(" during attempted NB data read.\n"); |
@@ -197,10 +197,10 @@ wrong_ic_mce: | |||
197 | pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); | 197 | pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); |
198 | } | 198 | } |
199 | 199 | ||
200 | static void amd_decode_bu_mce(u64 mc2_status) | 200 | static void amd_decode_bu_mce(struct mce *m) |
201 | { | 201 | { |
202 | u32 ec = mc2_status & 0xffff; | 202 | u32 ec = m->status & 0xffff; |
203 | u32 xec = (mc2_status >> 16) & 0xf; | 203 | u32 xec = (m->status >> 16) & 0xf; |
204 | 204 | ||
205 | pr_emerg(HW_ERR "Bus Unit Error"); | 205 | pr_emerg(HW_ERR "Bus Unit Error"); |
206 | 206 | ||
@@ -239,10 +239,10 @@ wrong_bu_mce: | |||
239 | pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); | 239 | pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); |
240 | } | 240 | } |
241 | 241 | ||
242 | static void amd_decode_ls_mce(u64 mc3_status) | 242 | static void amd_decode_ls_mce(struct mce *m) |
243 | { | 243 | { |
244 | u32 ec = mc3_status & 0xffff; | 244 | u32 ec = m->status & 0xffff; |
245 | u32 xec = (mc3_status >> 16) & 0xf; | 245 | u32 xec = (m->status >> 16) & 0xf; |
246 | 246 | ||
247 | pr_emerg(HW_ERR "Load Store Error"); | 247 | pr_emerg(HW_ERR "Load Store Error"); |
248 | 248 | ||
@@ -260,9 +260,11 @@ wrong_ls_mce: | |||
260 | pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); | 260 | pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); |
261 | } | 261 | } |
262 | 262 | ||
263 | void amd_decode_nb_mce(int node_id, struct err_regs *regs) | 263 | void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) |
264 | { | 264 | { |
265 | u32 ec = ERROR_CODE(regs->nbsl); | 265 | u32 ec = m->status & 0xffff; |
266 | u32 nbsh = (u32)(m->status >> 32); | ||
267 | u32 nbsl = (u32)m->status; | ||
266 | 268 | ||
267 | /* | 269 | /* |
268 | * GART TLB error reporting is disabled by default. Bail out early. | 270 | * GART TLB error reporting is disabled by default. Bail out early. |
@@ -278,10 +280,10 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs) | |||
278 | */ | 280 | */ |
279 | if ((boot_cpu_data.x86 == 0x10) && | 281 | if ((boot_cpu_data.x86 == 0x10) && |
280 | (boot_cpu_data.x86_model > 7)) { | 282 | (boot_cpu_data.x86_model > 7)) { |
281 | if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) | 283 | if (nbsh & K8_NBSH_ERR_CPU_VAL) |
282 | pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); | 284 | pr_cont(", core: %u\n", (u8)(nbsh & 0xf)); |
283 | } else { | 285 | } else { |
284 | u8 assoc_cpus = regs->nbsh & 0xf; | 286 | u8 assoc_cpus = nbsh & 0xf; |
285 | 287 | ||
286 | if (assoc_cpus > 0) | 288 | if (assoc_cpus > 0) |
287 | pr_cont(", core: %d", fls(assoc_cpus) - 1); | 289 | pr_cont(", core: %d", fls(assoc_cpus) - 1); |
@@ -289,17 +291,17 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs) | |||
289 | pr_cont("\n"); | 291 | pr_cont("\n"); |
290 | } | 292 | } |
291 | 293 | ||
292 | pr_emerg(HW_ERR "%s.\n", EXT_ERR_MSG(regs->nbsl)); | 294 | pr_emerg(HW_ERR "%s.\n", EXT_ERR_MSG(nbsl)); |
293 | 295 | ||
294 | if (BUS_ERROR(ec) && nb_bus_decoder) | 296 | if (BUS_ERROR(ec) && nb_bus_decoder) |
295 | nb_bus_decoder(node_id, regs); | 297 | nb_bus_decoder(node_id, m, nbcfg); |
296 | } | 298 | } |
297 | EXPORT_SYMBOL_GPL(amd_decode_nb_mce); | 299 | EXPORT_SYMBOL_GPL(amd_decode_nb_mce); |
298 | 300 | ||
299 | static void amd_decode_fr_mce(u64 mc5_status) | 301 | static void amd_decode_fr_mce(struct mce *m) |
300 | { | 302 | { |
301 | /* we have only one error signature so match all fields at once. */ | 303 | /* we have only one error signature so match all fields at once. */ |
302 | if ((mc5_status & 0xffff) == 0x0f0f) | 304 | if ((m->status & 0xffff) == 0x0f0f) |
303 | pr_emerg(HW_ERR " FR Error: CPU Watchdog timer expire.\n"); | 305 | pr_emerg(HW_ERR " FR Error: CPU Watchdog timer expire.\n"); |
304 | else | 306 | else |
305 | pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); | 307 | pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); |
@@ -326,7 +328,6 @@ static int amd_decode_mce(struct notifier_block *nb, unsigned long val, | |||
326 | void *data) | 328 | void *data) |
327 | { | 329 | { |
328 | struct mce *m = (struct mce *)data; | 330 | struct mce *m = (struct mce *)data; |
329 | struct err_regs regs; | ||
330 | int node, ecc; | 331 | int node, ecc; |
331 | 332 | ||
332 | pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); | 333 | pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); |
@@ -346,33 +347,28 @@ static int amd_decode_mce(struct notifier_block *nb, unsigned long val, | |||
346 | 347 | ||
347 | switch (m->bank) { | 348 | switch (m->bank) { |
348 | case 0: | 349 | case 0: |
349 | amd_decode_dc_mce(m->status); | 350 | amd_decode_dc_mce(m); |
350 | break; | 351 | break; |
351 | 352 | ||
352 | case 1: | 353 | case 1: |
353 | amd_decode_ic_mce(m->status); | 354 | amd_decode_ic_mce(m); |
354 | break; | 355 | break; |
355 | 356 | ||
356 | case 2: | 357 | case 2: |
357 | amd_decode_bu_mce(m->status); | 358 | amd_decode_bu_mce(m); |
358 | break; | 359 | break; |
359 | 360 | ||
360 | case 3: | 361 | case 3: |
361 | amd_decode_ls_mce(m->status); | 362 | amd_decode_ls_mce(m); |
362 | break; | 363 | break; |
363 | 364 | ||
364 | case 4: | 365 | case 4: |
365 | regs.nbsl = (u32) m->status; | 366 | node = amd_get_nb_id(m->extcpu); |
366 | regs.nbsh = (u32)(m->status >> 32); | 367 | amd_decode_nb_mce(node, m, 0); |
367 | regs.nbeal = (u32) m->addr; | ||
368 | regs.nbeah = (u32)(m->addr >> 32); | ||
369 | node = amd_get_nb_id(m->extcpu); | ||
370 | |||
371 | amd_decode_nb_mce(node, ®s); | ||
372 | break; | 368 | break; |
373 | 369 | ||
374 | case 5: | 370 | case 5: |
375 | amd_decode_fr_mce(m->status); | 371 | amd_decode_fr_mce(m); |
376 | break; | 372 | break; |
377 | 373 | ||
378 | default: | 374 | default: |
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/edac_mce_amd.h index 2ee499d7f898..0fba0e76c25f 100644 --- a/drivers/edac/edac_mce_amd.h +++ b/drivers/edac/edac_mce_amd.h | |||
@@ -63,8 +63,8 @@ struct err_regs { | |||
63 | 63 | ||
64 | 64 | ||
65 | void amd_report_gart_errors(bool); | 65 | void amd_report_gart_errors(bool); |
66 | void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); | 66 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)); |
67 | void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); | 67 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)); |
68 | void amd_decode_nb_mce(int, struct err_regs *); | 68 | void amd_decode_nb_mce(int, struct mce *, u32); |
69 | 69 | ||
70 | #endif /* _EDAC_MCE_AMD_H */ | 70 | #endif /* _EDAC_MCE_AMD_H */ |