diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /drivers/edac/mce_amd.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'drivers/edac/mce_amd.c')
-rw-r--r-- | drivers/edac/mce_amd.c | 920 |
1 files changed, 920 insertions, 0 deletions
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c new file mode 100644 index 000000000000..795cfbc0bf50 --- /dev/null +++ b/drivers/edac/mce_amd.c | |||
@@ -0,0 +1,920 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/slab.h> | ||
3 | |||
4 | #include "mce_amd.h" | ||
5 | |||
6 | static struct amd_decoder_ops *fam_ops; | ||
7 | |||
8 | static u8 xec_mask = 0xf; | ||
9 | static u8 nb_err_cpumask = 0xf; | ||
10 | |||
11 | static bool report_gart_errors; | ||
12 | static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); | ||
13 | |||
14 | void amd_report_gart_errors(bool v) | ||
15 | { | ||
16 | report_gart_errors = v; | ||
17 | } | ||
18 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); | ||
19 | |||
20 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)) | ||
21 | { | ||
22 | nb_bus_decoder = f; | ||
23 | } | ||
24 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); | ||
25 | |||
26 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)) | ||
27 | { | ||
28 | if (nb_bus_decoder) { | ||
29 | WARN_ON(nb_bus_decoder != f); | ||
30 | |||
31 | nb_bus_decoder = NULL; | ||
32 | } | ||
33 | } | ||
34 | EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); | ||
35 | |||
36 | /* | ||
37 | * string representation for the different MCA reported error types, see F3x48 | ||
38 | * or MSR0000_0411. | ||
39 | */ | ||
40 | |||
41 | /* transaction type */ | ||
42 | const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; | ||
43 | EXPORT_SYMBOL_GPL(tt_msgs); | ||
44 | |||
45 | /* cache level */ | ||
46 | const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; | ||
47 | EXPORT_SYMBOL_GPL(ll_msgs); | ||
48 | |||
49 | /* memory transaction type */ | ||
50 | const char *rrrr_msgs[] = { | ||
51 | "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" | ||
52 | }; | ||
53 | EXPORT_SYMBOL_GPL(rrrr_msgs); | ||
54 | |||
55 | /* participating processor */ | ||
56 | const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; | ||
57 | EXPORT_SYMBOL_GPL(pp_msgs); | ||
58 | |||
59 | /* request timeout */ | ||
60 | const char *to_msgs[] = { "no timeout", "timed out" }; | ||
61 | EXPORT_SYMBOL_GPL(to_msgs); | ||
62 | |||
63 | /* memory or i/o */ | ||
64 | const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; | ||
65 | EXPORT_SYMBOL_GPL(ii_msgs); | ||
66 | |||
67 | static const char *f10h_nb_mce_desc[] = { | ||
68 | "HT link data error", | ||
69 | "Protocol error (link, L3, probe filter, etc.)", | ||
70 | "Parity error in NB-internal arrays", | ||
71 | "Link Retry due to IO link transmission error", | ||
72 | "L3 ECC data cache error", | ||
73 | "ECC error in L3 cache tag", | ||
74 | "L3 LRU parity bits error", | ||
75 | "ECC Error in the Probe Filter directory" | ||
76 | }; | ||
77 | |||
78 | static const char * const f15h_ic_mce_desc[] = { | ||
79 | "UC during a demand linefill from L2", | ||
80 | "Parity error during data load from IC", | ||
81 | "Parity error for IC valid bit", | ||
82 | "Main tag parity error", | ||
83 | "Parity error in prediction queue", | ||
84 | "PFB data/address parity error", | ||
85 | "Parity error in the branch status reg", | ||
86 | "PFB promotion address error", | ||
87 | "Tag error during probe/victimization", | ||
88 | "Parity error for IC probe tag valid bit", | ||
89 | "PFB non-cacheable bit parity error", | ||
90 | "PFB valid bit parity error", /* xec = 0xd */ | ||
91 | "patch RAM", /* xec = 010 */ | ||
92 | "uop queue", | ||
93 | "insn buffer", | ||
94 | "predecode buffer", | ||
95 | "fetch address FIFO" | ||
96 | }; | ||
97 | |||
98 | static const char * const f15h_cu_mce_desc[] = { | ||
99 | "Fill ECC error on data fills", /* xec = 0x4 */ | ||
100 | "Fill parity error on insn fills", | ||
101 | "Prefetcher request FIFO parity error", | ||
102 | "PRQ address parity error", | ||
103 | "PRQ data parity error", | ||
104 | "WCC Tag ECC error", | ||
105 | "WCC Data ECC error", | ||
106 | "WCB Data parity error", | ||
107 | "VB Data/ECC error", | ||
108 | "L2 Tag ECC error", /* xec = 0x10 */ | ||
109 | "Hard L2 Tag ECC error", | ||
110 | "Multiple hits on L2 tag", | ||
111 | "XAB parity error", | ||
112 | "PRB address parity error" | ||
113 | }; | ||
114 | |||
115 | static const char * const fr_ex_mce_desc[] = { | ||
116 | "CPU Watchdog timer expire", | ||
117 | "Wakeup array dest tag", | ||
118 | "AG payload array", | ||
119 | "EX payload array", | ||
120 | "IDRF array", | ||
121 | "Retire dispatch queue", | ||
122 | "Mapper checkpoint array", | ||
123 | "Physical register file EX0 port", | ||
124 | "Physical register file EX1 port", | ||
125 | "Physical register file AG0 port", | ||
126 | "Physical register file AG1 port", | ||
127 | "Flag register file", | ||
128 | "DE correctable error could not be corrected" | ||
129 | }; | ||
130 | |||
131 | static bool f12h_dc_mce(u16 ec, u8 xec) | ||
132 | { | ||
133 | bool ret = false; | ||
134 | |||
135 | if (MEM_ERROR(ec)) { | ||
136 | u8 ll = LL(ec); | ||
137 | ret = true; | ||
138 | |||
139 | if (ll == LL_L2) | ||
140 | pr_cont("during L1 linefill from L2.\n"); | ||
141 | else if (ll == LL_L1) | ||
142 | pr_cont("Data/Tag %s error.\n", R4_MSG(ec)); | ||
143 | else | ||
144 | ret = false; | ||
145 | } | ||
146 | return ret; | ||
147 | } | ||
148 | |||
149 | static bool f10h_dc_mce(u16 ec, u8 xec) | ||
150 | { | ||
151 | if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { | ||
152 | pr_cont("during data scrub.\n"); | ||
153 | return true; | ||
154 | } | ||
155 | return f12h_dc_mce(ec, xec); | ||
156 | } | ||
157 | |||
158 | static bool k8_dc_mce(u16 ec, u8 xec) | ||
159 | { | ||
160 | if (BUS_ERROR(ec)) { | ||
161 | pr_cont("during system linefill.\n"); | ||
162 | return true; | ||
163 | } | ||
164 | |||
165 | return f10h_dc_mce(ec, xec); | ||
166 | } | ||
167 | |||
168 | static bool f14h_dc_mce(u16 ec, u8 xec) | ||
169 | { | ||
170 | u8 r4 = R4(ec); | ||
171 | bool ret = true; | ||
172 | |||
173 | if (MEM_ERROR(ec)) { | ||
174 | |||
175 | if (TT(ec) != TT_DATA || LL(ec) != LL_L1) | ||
176 | return false; | ||
177 | |||
178 | switch (r4) { | ||
179 | case R4_DRD: | ||
180 | case R4_DWR: | ||
181 | pr_cont("Data/Tag parity error due to %s.\n", | ||
182 | (r4 == R4_DRD ? "load/hw prf" : "store")); | ||
183 | break; | ||
184 | case R4_EVICT: | ||
185 | pr_cont("Copyback parity error on a tag miss.\n"); | ||
186 | break; | ||
187 | case R4_SNOOP: | ||
188 | pr_cont("Tag parity error during snoop.\n"); | ||
189 | break; | ||
190 | default: | ||
191 | ret = false; | ||
192 | } | ||
193 | } else if (BUS_ERROR(ec)) { | ||
194 | |||
195 | if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG) | ||
196 | return false; | ||
197 | |||
198 | pr_cont("System read data error on a "); | ||
199 | |||
200 | switch (r4) { | ||
201 | case R4_RD: | ||
202 | pr_cont("TLB reload.\n"); | ||
203 | break; | ||
204 | case R4_DWR: | ||
205 | pr_cont("store.\n"); | ||
206 | break; | ||
207 | case R4_DRD: | ||
208 | pr_cont("load.\n"); | ||
209 | break; | ||
210 | default: | ||
211 | ret = false; | ||
212 | } | ||
213 | } else { | ||
214 | ret = false; | ||
215 | } | ||
216 | |||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | static bool f15h_dc_mce(u16 ec, u8 xec) | ||
221 | { | ||
222 | bool ret = true; | ||
223 | |||
224 | if (MEM_ERROR(ec)) { | ||
225 | |||
226 | switch (xec) { | ||
227 | case 0x0: | ||
228 | pr_cont("Data Array access error.\n"); | ||
229 | break; | ||
230 | |||
231 | case 0x1: | ||
232 | pr_cont("UC error during a linefill from L2/NB.\n"); | ||
233 | break; | ||
234 | |||
235 | case 0x2: | ||
236 | case 0x11: | ||
237 | pr_cont("STQ access error.\n"); | ||
238 | break; | ||
239 | |||
240 | case 0x3: | ||
241 | pr_cont("SCB access error.\n"); | ||
242 | break; | ||
243 | |||
244 | case 0x10: | ||
245 | pr_cont("Tag error.\n"); | ||
246 | break; | ||
247 | |||
248 | case 0x12: | ||
249 | pr_cont("LDQ access error.\n"); | ||
250 | break; | ||
251 | |||
252 | default: | ||
253 | ret = false; | ||
254 | } | ||
255 | } else if (BUS_ERROR(ec)) { | ||
256 | |||
257 | if (!xec) | ||
258 | pr_cont("during system linefill.\n"); | ||
259 | else | ||
260 | pr_cont(" Internal %s condition.\n", | ||
261 | ((xec == 1) ? "livelock" : "deadlock")); | ||
262 | } else | ||
263 | ret = false; | ||
264 | |||
265 | return ret; | ||
266 | } | ||
267 | |||
268 | static void amd_decode_dc_mce(struct mce *m) | ||
269 | { | ||
270 | u16 ec = EC(m->status); | ||
271 | u8 xec = XEC(m->status, xec_mask); | ||
272 | |||
273 | pr_emerg(HW_ERR "Data Cache Error: "); | ||
274 | |||
275 | /* TLB error signatures are the same across families */ | ||
276 | if (TLB_ERROR(ec)) { | ||
277 | if (TT(ec) == TT_DATA) { | ||
278 | pr_cont("%s TLB %s.\n", LL_MSG(ec), | ||
279 | ((xec == 2) ? "locked miss" | ||
280 | : (xec ? "multimatch" : "parity"))); | ||
281 | return; | ||
282 | } | ||
283 | } else if (fam_ops->dc_mce(ec, xec)) | ||
284 | ; | ||
285 | else | ||
286 | pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); | ||
287 | } | ||
288 | |||
289 | static bool k8_ic_mce(u16 ec, u8 xec) | ||
290 | { | ||
291 | u8 ll = LL(ec); | ||
292 | bool ret = true; | ||
293 | |||
294 | if (!MEM_ERROR(ec)) | ||
295 | return false; | ||
296 | |||
297 | if (ll == 0x2) | ||
298 | pr_cont("during a linefill from L2.\n"); | ||
299 | else if (ll == 0x1) { | ||
300 | switch (R4(ec)) { | ||
301 | case R4_IRD: | ||
302 | pr_cont("Parity error during data load.\n"); | ||
303 | break; | ||
304 | |||
305 | case R4_EVICT: | ||
306 | pr_cont("Copyback Parity/Victim error.\n"); | ||
307 | break; | ||
308 | |||
309 | case R4_SNOOP: | ||
310 | pr_cont("Tag Snoop error.\n"); | ||
311 | break; | ||
312 | |||
313 | default: | ||
314 | ret = false; | ||
315 | break; | ||
316 | } | ||
317 | } else | ||
318 | ret = false; | ||
319 | |||
320 | return ret; | ||
321 | } | ||
322 | |||
323 | static bool f14h_ic_mce(u16 ec, u8 xec) | ||
324 | { | ||
325 | u8 r4 = R4(ec); | ||
326 | bool ret = true; | ||
327 | |||
328 | if (MEM_ERROR(ec)) { | ||
329 | if (TT(ec) != 0 || LL(ec) != 1) | ||
330 | ret = false; | ||
331 | |||
332 | if (r4 == R4_IRD) | ||
333 | pr_cont("Data/tag array parity error for a tag hit.\n"); | ||
334 | else if (r4 == R4_SNOOP) | ||
335 | pr_cont("Tag error during snoop/victimization.\n"); | ||
336 | else | ||
337 | ret = false; | ||
338 | } | ||
339 | return ret; | ||
340 | } | ||
341 | |||
342 | static bool f15h_ic_mce(u16 ec, u8 xec) | ||
343 | { | ||
344 | bool ret = true; | ||
345 | |||
346 | if (!MEM_ERROR(ec)) | ||
347 | return false; | ||
348 | |||
349 | switch (xec) { | ||
350 | case 0x0 ... 0xa: | ||
351 | pr_cont("%s.\n", f15h_ic_mce_desc[xec]); | ||
352 | break; | ||
353 | |||
354 | case 0xd: | ||
355 | pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); | ||
356 | break; | ||
357 | |||
358 | case 0x10 ... 0x14: | ||
359 | pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); | ||
360 | break; | ||
361 | |||
362 | default: | ||
363 | ret = false; | ||
364 | } | ||
365 | return ret; | ||
366 | } | ||
367 | |||
368 | static void amd_decode_ic_mce(struct mce *m) | ||
369 | { | ||
370 | u16 ec = EC(m->status); | ||
371 | u8 xec = XEC(m->status, xec_mask); | ||
372 | |||
373 | pr_emerg(HW_ERR "Instruction Cache Error: "); | ||
374 | |||
375 | if (TLB_ERROR(ec)) | ||
376 | pr_cont("%s TLB %s.\n", LL_MSG(ec), | ||
377 | (xec ? "multimatch" : "parity error")); | ||
378 | else if (BUS_ERROR(ec)) { | ||
379 | bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); | ||
380 | |||
381 | pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); | ||
382 | } else if (fam_ops->ic_mce(ec, xec)) | ||
383 | ; | ||
384 | else | ||
385 | pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); | ||
386 | } | ||
387 | |||
388 | static void amd_decode_bu_mce(struct mce *m) | ||
389 | { | ||
390 | u16 ec = EC(m->status); | ||
391 | u8 xec = XEC(m->status, xec_mask); | ||
392 | |||
393 | pr_emerg(HW_ERR "Bus Unit Error"); | ||
394 | |||
395 | if (xec == 0x1) | ||
396 | pr_cont(" in the write data buffers.\n"); | ||
397 | else if (xec == 0x3) | ||
398 | pr_cont(" in the victim data buffers.\n"); | ||
399 | else if (xec == 0x2 && MEM_ERROR(ec)) | ||
400 | pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec)); | ||
401 | else if (xec == 0x0) { | ||
402 | if (TLB_ERROR(ec)) | ||
403 | pr_cont(": %s error in a Page Descriptor Cache or " | ||
404 | "Guest TLB.\n", TT_MSG(ec)); | ||
405 | else if (BUS_ERROR(ec)) | ||
406 | pr_cont(": %s/ECC error in data read from NB: %s.\n", | ||
407 | R4_MSG(ec), PP_MSG(ec)); | ||
408 | else if (MEM_ERROR(ec)) { | ||
409 | u8 r4 = R4(ec); | ||
410 | |||
411 | if (r4 >= 0x7) | ||
412 | pr_cont(": %s error during data copyback.\n", | ||
413 | R4_MSG(ec)); | ||
414 | else if (r4 <= 0x1) | ||
415 | pr_cont(": %s parity/ECC error during data " | ||
416 | "access from L2.\n", R4_MSG(ec)); | ||
417 | else | ||
418 | goto wrong_bu_mce; | ||
419 | } else | ||
420 | goto wrong_bu_mce; | ||
421 | } else | ||
422 | goto wrong_bu_mce; | ||
423 | |||
424 | return; | ||
425 | |||
426 | wrong_bu_mce: | ||
427 | pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); | ||
428 | } | ||
429 | |||
430 | static void amd_decode_cu_mce(struct mce *m) | ||
431 | { | ||
432 | u16 ec = EC(m->status); | ||
433 | u8 xec = XEC(m->status, xec_mask); | ||
434 | |||
435 | pr_emerg(HW_ERR "Combined Unit Error: "); | ||
436 | |||
437 | if (TLB_ERROR(ec)) { | ||
438 | if (xec == 0x0) | ||
439 | pr_cont("Data parity TLB read error.\n"); | ||
440 | else if (xec == 0x1) | ||
441 | pr_cont("Poison data provided for TLB fill.\n"); | ||
442 | else | ||
443 | goto wrong_cu_mce; | ||
444 | } else if (BUS_ERROR(ec)) { | ||
445 | if (xec > 2) | ||
446 | goto wrong_cu_mce; | ||
447 | |||
448 | pr_cont("Error during attempted NB data read.\n"); | ||
449 | } else if (MEM_ERROR(ec)) { | ||
450 | switch (xec) { | ||
451 | case 0x4 ... 0xc: | ||
452 | pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); | ||
453 | break; | ||
454 | |||
455 | case 0x10 ... 0x14: | ||
456 | pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); | ||
457 | break; | ||
458 | |||
459 | default: | ||
460 | goto wrong_cu_mce; | ||
461 | } | ||
462 | } | ||
463 | |||
464 | return; | ||
465 | |||
466 | wrong_cu_mce: | ||
467 | pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); | ||
468 | } | ||
469 | |||
470 | static void amd_decode_ls_mce(struct mce *m) | ||
471 | { | ||
472 | u16 ec = EC(m->status); | ||
473 | u8 xec = XEC(m->status, xec_mask); | ||
474 | |||
475 | if (boot_cpu_data.x86 >= 0x14) { | ||
476 | pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," | ||
477 | " please report on LKML.\n"); | ||
478 | return; | ||
479 | } | ||
480 | |||
481 | pr_emerg(HW_ERR "Load Store Error"); | ||
482 | |||
483 | if (xec == 0x0) { | ||
484 | u8 r4 = R4(ec); | ||
485 | |||
486 | if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) | ||
487 | goto wrong_ls_mce; | ||
488 | |||
489 | pr_cont(" during %s.\n", R4_MSG(ec)); | ||
490 | } else | ||
491 | goto wrong_ls_mce; | ||
492 | |||
493 | return; | ||
494 | |||
495 | wrong_ls_mce: | ||
496 | pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); | ||
497 | } | ||
498 | |||
499 | static bool k8_nb_mce(u16 ec, u8 xec) | ||
500 | { | ||
501 | bool ret = true; | ||
502 | |||
503 | switch (xec) { | ||
504 | case 0x1: | ||
505 | pr_cont("CRC error detected on HT link.\n"); | ||
506 | break; | ||
507 | |||
508 | case 0x5: | ||
509 | pr_cont("Invalid GART PTE entry during GART table walk.\n"); | ||
510 | break; | ||
511 | |||
512 | case 0x6: | ||
513 | pr_cont("Unsupported atomic RMW received from an IO link.\n"); | ||
514 | break; | ||
515 | |||
516 | case 0x0: | ||
517 | case 0x8: | ||
518 | if (boot_cpu_data.x86 == 0x11) | ||
519 | return false; | ||
520 | |||
521 | pr_cont("DRAM ECC error detected on the NB.\n"); | ||
522 | break; | ||
523 | |||
524 | case 0xd: | ||
525 | pr_cont("Parity error on the DRAM addr/ctl signals.\n"); | ||
526 | break; | ||
527 | |||
528 | default: | ||
529 | ret = false; | ||
530 | break; | ||
531 | } | ||
532 | |||
533 | return ret; | ||
534 | } | ||
535 | |||
536 | static bool f10h_nb_mce(u16 ec, u8 xec) | ||
537 | { | ||
538 | bool ret = true; | ||
539 | u8 offset = 0; | ||
540 | |||
541 | if (k8_nb_mce(ec, xec)) | ||
542 | return true; | ||
543 | |||
544 | switch(xec) { | ||
545 | case 0xa ... 0xc: | ||
546 | offset = 10; | ||
547 | break; | ||
548 | |||
549 | case 0xe: | ||
550 | offset = 11; | ||
551 | break; | ||
552 | |||
553 | case 0xf: | ||
554 | if (TLB_ERROR(ec)) | ||
555 | pr_cont("GART Table Walk data error.\n"); | ||
556 | else if (BUS_ERROR(ec)) | ||
557 | pr_cont("DMA Exclusion Vector Table Walk error.\n"); | ||
558 | else | ||
559 | ret = false; | ||
560 | |||
561 | goto out; | ||
562 | break; | ||
563 | |||
564 | case 0x19: | ||
565 | if (boot_cpu_data.x86 == 0x15) | ||
566 | pr_cont("Compute Unit Data Error.\n"); | ||
567 | else | ||
568 | ret = false; | ||
569 | |||
570 | goto out; | ||
571 | break; | ||
572 | |||
573 | case 0x1c ... 0x1f: | ||
574 | offset = 24; | ||
575 | break; | ||
576 | |||
577 | default: | ||
578 | ret = false; | ||
579 | |||
580 | goto out; | ||
581 | break; | ||
582 | } | ||
583 | |||
584 | pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]); | ||
585 | |||
586 | out: | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | static bool nb_noop_mce(u16 ec, u8 xec) | ||
591 | { | ||
592 | return false; | ||
593 | } | ||
594 | |||
595 | void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) | ||
596 | { | ||
597 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
598 | u16 ec = EC(m->status); | ||
599 | u8 xec = XEC(m->status, 0x1f); | ||
600 | u32 nbsh = (u32)(m->status >> 32); | ||
601 | int core = -1; | ||
602 | |||
603 | pr_emerg(HW_ERR "Northbridge Error (node %d", node_id); | ||
604 | |||
605 | /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */ | ||
606 | if (c->x86 == 0x10 && c->x86_model > 7) { | ||
607 | if (nbsh & NBSH_ERR_CPU_VAL) | ||
608 | core = nbsh & nb_err_cpumask; | ||
609 | } else { | ||
610 | u8 assoc_cpus = nbsh & nb_err_cpumask; | ||
611 | |||
612 | if (assoc_cpus > 0) | ||
613 | core = fls(assoc_cpus) - 1; | ||
614 | } | ||
615 | |||
616 | if (core >= 0) | ||
617 | pr_cont(", core %d): ", core); | ||
618 | else | ||
619 | pr_cont("): "); | ||
620 | |||
621 | switch (xec) { | ||
622 | case 0x2: | ||
623 | pr_cont("Sync error (sync packets on HT link detected).\n"); | ||
624 | return; | ||
625 | |||
626 | case 0x3: | ||
627 | pr_cont("HT Master abort.\n"); | ||
628 | return; | ||
629 | |||
630 | case 0x4: | ||
631 | pr_cont("HT Target abort.\n"); | ||
632 | return; | ||
633 | |||
634 | case 0x7: | ||
635 | pr_cont("NB Watchdog timeout.\n"); | ||
636 | return; | ||
637 | |||
638 | case 0x9: | ||
639 | pr_cont("SVM DMA Exclusion Vector error.\n"); | ||
640 | return; | ||
641 | |||
642 | default: | ||
643 | break; | ||
644 | } | ||
645 | |||
646 | if (!fam_ops->nb_mce(ec, xec)) | ||
647 | goto wrong_nb_mce; | ||
648 | |||
649 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x15) | ||
650 | if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) | ||
651 | nb_bus_decoder(node_id, m, nbcfg); | ||
652 | |||
653 | return; | ||
654 | |||
655 | wrong_nb_mce: | ||
656 | pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); | ||
657 | } | ||
658 | EXPORT_SYMBOL_GPL(amd_decode_nb_mce); | ||
659 | |||
660 | static void amd_decode_fr_mce(struct mce *m) | ||
661 | { | ||
662 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
663 | u8 xec = XEC(m->status, xec_mask); | ||
664 | |||
665 | if (c->x86 == 0xf || c->x86 == 0x11) | ||
666 | goto wrong_fr_mce; | ||
667 | |||
668 | if (c->x86 != 0x15 && xec != 0x0) | ||
669 | goto wrong_fr_mce; | ||
670 | |||
671 | pr_emerg(HW_ERR "%s Error: ", | ||
672 | (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); | ||
673 | |||
674 | if (xec == 0x0 || xec == 0xc) | ||
675 | pr_cont("%s.\n", fr_ex_mce_desc[xec]); | ||
676 | else if (xec < 0xd) | ||
677 | pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); | ||
678 | else | ||
679 | goto wrong_fr_mce; | ||
680 | |||
681 | return; | ||
682 | |||
683 | wrong_fr_mce: | ||
684 | pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); | ||
685 | } | ||
686 | |||
687 | static void amd_decode_fp_mce(struct mce *m) | ||
688 | { | ||
689 | u8 xec = XEC(m->status, xec_mask); | ||
690 | |||
691 | pr_emerg(HW_ERR "Floating Point Unit Error: "); | ||
692 | |||
693 | switch (xec) { | ||
694 | case 0x1: | ||
695 | pr_cont("Free List"); | ||
696 | break; | ||
697 | |||
698 | case 0x2: | ||
699 | pr_cont("Physical Register File"); | ||
700 | break; | ||
701 | |||
702 | case 0x3: | ||
703 | pr_cont("Retire Queue"); | ||
704 | break; | ||
705 | |||
706 | case 0x4: | ||
707 | pr_cont("Scheduler table"); | ||
708 | break; | ||
709 | |||
710 | case 0x5: | ||
711 | pr_cont("Status Register File"); | ||
712 | break; | ||
713 | |||
714 | default: | ||
715 | goto wrong_fp_mce; | ||
716 | break; | ||
717 | } | ||
718 | |||
719 | pr_cont(" parity error.\n"); | ||
720 | |||
721 | return; | ||
722 | |||
723 | wrong_fp_mce: | ||
724 | pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); | ||
725 | } | ||
726 | |||
727 | static inline void amd_decode_err_code(u16 ec) | ||
728 | { | ||
729 | |||
730 | pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); | ||
731 | |||
732 | if (BUS_ERROR(ec)) | ||
733 | pr_cont(", mem/io: %s", II_MSG(ec)); | ||
734 | else | ||
735 | pr_cont(", tx: %s", TT_MSG(ec)); | ||
736 | |||
737 | if (MEM_ERROR(ec) || BUS_ERROR(ec)) { | ||
738 | pr_cont(", mem-tx: %s", R4_MSG(ec)); | ||
739 | |||
740 | if (BUS_ERROR(ec)) | ||
741 | pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec)); | ||
742 | } | ||
743 | |||
744 | pr_cont("\n"); | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Filter out unwanted MCE signatures here. | ||
749 | */ | ||
750 | static bool amd_filter_mce(struct mce *m) | ||
751 | { | ||
752 | u8 xec = (m->status >> 16) & 0x1f; | ||
753 | |||
754 | /* | ||
755 | * NB GART TLB error reporting is disabled by default. | ||
756 | */ | ||
757 | if (m->bank == 4 && xec == 0x5 && !report_gart_errors) | ||
758 | return true; | ||
759 | |||
760 | return false; | ||
761 | } | ||
762 | |||
763 | int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) | ||
764 | { | ||
765 | struct mce *m = (struct mce *)data; | ||
766 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
767 | int node, ecc; | ||
768 | |||
769 | if (amd_filter_mce(m)) | ||
770 | return NOTIFY_STOP; | ||
771 | |||
772 | pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s", | ||
773 | m->bank, | ||
774 | ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), | ||
775 | ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), | ||
776 | ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), | ||
777 | ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), | ||
778 | ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); | ||
779 | |||
780 | if (c->x86 == 0x15) | ||
781 | pr_cont("|%s|%s", | ||
782 | ((m->status & BIT_64(44)) ? "Deferred" : "-"), | ||
783 | ((m->status & BIT_64(43)) ? "Poison" : "-")); | ||
784 | |||
785 | /* do the two bits[14:13] together */ | ||
786 | ecc = (m->status >> 45) & 0x3; | ||
787 | if (ecc) | ||
788 | pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); | ||
789 | |||
790 | pr_cont("]: 0x%016llx\n", m->status); | ||
791 | |||
792 | |||
793 | switch (m->bank) { | ||
794 | case 0: | ||
795 | amd_decode_dc_mce(m); | ||
796 | break; | ||
797 | |||
798 | case 1: | ||
799 | amd_decode_ic_mce(m); | ||
800 | break; | ||
801 | |||
802 | case 2: | ||
803 | if (c->x86 == 0x15) | ||
804 | amd_decode_cu_mce(m); | ||
805 | else | ||
806 | amd_decode_bu_mce(m); | ||
807 | break; | ||
808 | |||
809 | case 3: | ||
810 | amd_decode_ls_mce(m); | ||
811 | break; | ||
812 | |||
813 | case 4: | ||
814 | node = amd_get_nb_id(m->extcpu); | ||
815 | amd_decode_nb_mce(node, m, 0); | ||
816 | break; | ||
817 | |||
818 | case 5: | ||
819 | amd_decode_fr_mce(m); | ||
820 | break; | ||
821 | |||
822 | case 6: | ||
823 | amd_decode_fp_mce(m); | ||
824 | break; | ||
825 | |||
826 | default: | ||
827 | break; | ||
828 | } | ||
829 | |||
830 | amd_decode_err_code(m->status & 0xffff); | ||
831 | |||
832 | return NOTIFY_STOP; | ||
833 | } | ||
834 | EXPORT_SYMBOL_GPL(amd_decode_mce); | ||
835 | |||
836 | static struct notifier_block amd_mce_dec_nb = { | ||
837 | .notifier_call = amd_decode_mce, | ||
838 | }; | ||
839 | |||
840 | static int __init mce_amd_init(void) | ||
841 | { | ||
842 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
843 | |||
844 | if (c->x86_vendor != X86_VENDOR_AMD) | ||
845 | return 0; | ||
846 | |||
847 | if ((c->x86 < 0xf || c->x86 > 0x12) && | ||
848 | (c->x86 != 0x14 || c->x86_model > 0xf) && | ||
849 | (c->x86 != 0x15 || c->x86_model > 0xf)) | ||
850 | return 0; | ||
851 | |||
852 | fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); | ||
853 | if (!fam_ops) | ||
854 | return -ENOMEM; | ||
855 | |||
856 | switch (c->x86) { | ||
857 | case 0xf: | ||
858 | fam_ops->dc_mce = k8_dc_mce; | ||
859 | fam_ops->ic_mce = k8_ic_mce; | ||
860 | fam_ops->nb_mce = k8_nb_mce; | ||
861 | break; | ||
862 | |||
863 | case 0x10: | ||
864 | fam_ops->dc_mce = f10h_dc_mce; | ||
865 | fam_ops->ic_mce = k8_ic_mce; | ||
866 | fam_ops->nb_mce = f10h_nb_mce; | ||
867 | break; | ||
868 | |||
869 | case 0x11: | ||
870 | fam_ops->dc_mce = k8_dc_mce; | ||
871 | fam_ops->ic_mce = k8_ic_mce; | ||
872 | fam_ops->nb_mce = f10h_nb_mce; | ||
873 | break; | ||
874 | |||
875 | case 0x12: | ||
876 | fam_ops->dc_mce = f12h_dc_mce; | ||
877 | fam_ops->ic_mce = k8_ic_mce; | ||
878 | fam_ops->nb_mce = nb_noop_mce; | ||
879 | break; | ||
880 | |||
881 | case 0x14: | ||
882 | nb_err_cpumask = 0x3; | ||
883 | fam_ops->dc_mce = f14h_dc_mce; | ||
884 | fam_ops->ic_mce = f14h_ic_mce; | ||
885 | fam_ops->nb_mce = nb_noop_mce; | ||
886 | break; | ||
887 | |||
888 | case 0x15: | ||
889 | xec_mask = 0x1f; | ||
890 | fam_ops->dc_mce = f15h_dc_mce; | ||
891 | fam_ops->ic_mce = f15h_ic_mce; | ||
892 | fam_ops->nb_mce = f10h_nb_mce; | ||
893 | break; | ||
894 | |||
895 | default: | ||
896 | printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86); | ||
897 | kfree(fam_ops); | ||
898 | return -EINVAL; | ||
899 | } | ||
900 | |||
901 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); | ||
902 | |||
903 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); | ||
904 | |||
905 | return 0; | ||
906 | } | ||
907 | early_initcall(mce_amd_init); | ||
908 | |||
909 | #ifdef MODULE | ||
910 | static void __exit mce_amd_exit(void) | ||
911 | { | ||
912 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); | ||
913 | kfree(fam_ops); | ||
914 | } | ||
915 | |||
916 | MODULE_DESCRIPTION("AMD MCE decoder"); | ||
917 | MODULE_ALIAS("edac-mce-amd"); | ||
918 | MODULE_LICENSE("GPL"); | ||
919 | module_exit(mce_amd_exit); | ||
920 | #endif | ||