aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 17:54:03 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 17:54:03 -0500
commit128283a47e7cc6754db3d2704004c1ed728d26db (patch)
treeb3b0db4cd62600776f5e3dd5762798a8bfe793d2 /drivers/edac
parent442d1ba237c81304ccfa33887094e843183645f7 (diff)
parent6d5db4668796d903dc3bad2852c82073509c37d2 (diff)
Merge branch 'mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
* 'mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: EDAC, MCE: Fix NB error formatting EDAC, MCE: Use BIT_64() to eliminate warnings on 32-bit EDAC, MCE: Enable MCE decoding on F15h EDAC, MCE: Allow F15h bank 6 MCE injection EDAC, MCE: Shorten error report formatting EDAC, MCE: Overhaul error fields extraction macros EDAC, MCE: Add F15h FP MCE decoder EDAC, MCE: Add F15 EX MCE decoder EDAC, MCE: Add an F15h NB MCE decoder EDAC, MCE: No F15h LS MCE decoder EDAC, MCE: Add F15h CU MCE decoder EDAC, MCE: Add F15h IC MCE decoder EDAC, MCE: Add F15h DC MCE decoder EDAC, MCE: Select extended error code mask
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/amd64_edac.c4
-rw-r--r--drivers/edac/mce_amd.c450
-rw-r--r--drivers/edac/mce_amd.h14
-rw-r--r--drivers/edac/mce_amd_inj.c9
4 files changed, 359 insertions, 118 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6bf7e248e75..4a5ecc58025 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1950,8 +1950,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
1950static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, 1950static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1951 struct err_regs *info) 1951 struct err_regs *info)
1952{ 1952{
1953 u32 ec = ERROR_CODE(info->nbsl); 1953 u16 ec = EC(info->nbsl);
1954 u32 xec = EXT_ERROR_CODE(info->nbsl); 1954 u8 xec = XEC(info->nbsl, 0x1f);
1955 int ecc_type = (info->nbsh >> 13) & 0x3; 1955 int ecc_type = (info->nbsh >> 13) & 0x3;
1956 1956
1957 /* Bail early out if this was an 'observed' error */ 1957 /* Bail early out if this was an 'observed' error */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c0181093b49..f6cf73d9335 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -5,6 +5,7 @@
5 5
6static struct amd_decoder_ops *fam_ops; 6static struct amd_decoder_ops *fam_ops;
7 7
8static u8 xec_mask = 0xf;
8static u8 nb_err_cpumask = 0xf; 9static u8 nb_err_cpumask = 0xf;
9 10
10static bool report_gart_errors; 11static bool report_gart_errors;
@@ -74,57 +75,104 @@ static const char *f10h_nb_mce_desc[] = {
74 "ECC Error in the Probe Filter directory" 75 "ECC Error in the Probe Filter directory"
75}; 76};
76 77
77static bool f12h_dc_mce(u16 ec) 78static const char * const f15h_ic_mce_desc[] = {
79 "UC during a demand linefill from L2",
80 "Parity error during data load from IC",
81 "Parity error for IC valid bit",
82 "Main tag parity error",
83 "Parity error in prediction queue",
84 "PFB data/address parity error",
85 "Parity error in the branch status reg",
86 "PFB promotion address error",
87 "Tag error during probe/victimization",
88 "Parity error for IC probe tag valid bit",
89 "PFB non-cacheable bit parity error",
90 "PFB valid bit parity error", /* xec = 0xd */
91 "patch RAM", /* xec = 010 */
92 "uop queue",
93 "insn buffer",
94 "predecode buffer",
95 "fetch address FIFO"
96};
97
98static const char * const f15h_cu_mce_desc[] = {
99 "Fill ECC error on data fills", /* xec = 0x4 */
100 "Fill parity error on insn fills",
101 "Prefetcher request FIFO parity error",
102 "PRQ address parity error",
103 "PRQ data parity error",
104 "WCC Tag ECC error",
105 "WCC Data ECC error",
106 "WCB Data parity error",
107 "VB Data/ECC error",
108 "L2 Tag ECC error", /* xec = 0x10 */
109 "Hard L2 Tag ECC error",
110 "Multiple hits on L2 tag",
111 "XAB parity error",
112 "PRB address parity error"
113};
114
115static const char * const fr_ex_mce_desc[] = {
116 "CPU Watchdog timer expire",
117 "Wakeup array dest tag",
118 "AG payload array",
119 "EX payload array",
120 "IDRF array",
121 "Retire dispatch queue",
122 "Mapper checkpoint array",
123 "Physical register file EX0 port",
124 "Physical register file EX1 port",
125 "Physical register file AG0 port",
126 "Physical register file AG1 port",
127 "Flag register file",
128 "DE correctable error could not be corrected"
129};
130
131static bool f12h_dc_mce(u16 ec, u8 xec)
78{ 132{
79 bool ret = false; 133 bool ret = false;
80 134
81 if (MEM_ERROR(ec)) { 135 if (MEM_ERROR(ec)) {
82 u8 ll = ec & 0x3; 136 u8 ll = LL(ec);
83 ret = true; 137 ret = true;
84 138
85 if (ll == LL_L2) 139 if (ll == LL_L2)
86 pr_cont("during L1 linefill from L2.\n"); 140 pr_cont("during L1 linefill from L2.\n");
87 else if (ll == LL_L1) 141 else if (ll == LL_L1)
88 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec)); 142 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
89 else 143 else
90 ret = false; 144 ret = false;
91 } 145 }
92 return ret; 146 return ret;
93} 147}
94 148
95static bool f10h_dc_mce(u16 ec) 149static bool f10h_dc_mce(u16 ec, u8 xec)
96{ 150{
97 u8 r4 = (ec >> 4) & 0xf; 151 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
98 u8 ll = ec & 0x3;
99
100 if (r4 == R4_GEN && ll == LL_L1) {
101 pr_cont("during data scrub.\n"); 152 pr_cont("during data scrub.\n");
102 return true; 153 return true;
103 } 154 }
104 return f12h_dc_mce(ec); 155 return f12h_dc_mce(ec, xec);
105} 156}
106 157
107static bool k8_dc_mce(u16 ec) 158static bool k8_dc_mce(u16 ec, u8 xec)
108{ 159{
109 if (BUS_ERROR(ec)) { 160 if (BUS_ERROR(ec)) {
110 pr_cont("during system linefill.\n"); 161 pr_cont("during system linefill.\n");
111 return true; 162 return true;
112 } 163 }
113 164
114 return f10h_dc_mce(ec); 165 return f10h_dc_mce(ec, xec);
115} 166}
116 167
117static bool f14h_dc_mce(u16 ec) 168static bool f14h_dc_mce(u16 ec, u8 xec)
118{ 169{
119 u8 r4 = (ec >> 4) & 0xf; 170 u8 r4 = R4(ec);
120 u8 ll = ec & 0x3;
121 u8 tt = (ec >> 2) & 0x3;
122 u8 ii = tt;
123 bool ret = true; 171 bool ret = true;
124 172
125 if (MEM_ERROR(ec)) { 173 if (MEM_ERROR(ec)) {
126 174
127 if (tt != TT_DATA || ll != LL_L1) 175 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
128 return false; 176 return false;
129 177
130 switch (r4) { 178 switch (r4) {
@@ -144,7 +192,7 @@ static bool f14h_dc_mce(u16 ec)
144 } 192 }
145 } else if (BUS_ERROR(ec)) { 193 } else if (BUS_ERROR(ec)) {
146 194
147 if ((ii != II_MEM && ii != II_IO) || ll != LL_LG) 195 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
148 return false; 196 return false;
149 197
150 pr_cont("System read data error on a "); 198 pr_cont("System read data error on a ");
@@ -169,39 +217,78 @@ static bool f14h_dc_mce(u16 ec)
169 return ret; 217 return ret;
170} 218}
171 219
220static bool f15h_dc_mce(u16 ec, u8 xec)
221{
222 bool ret = true;
223
224 if (MEM_ERROR(ec)) {
225
226 switch (xec) {
227 case 0x0:
228 pr_cont("Data Array access error.\n");
229 break;
230
231 case 0x1:
232 pr_cont("UC error during a linefill from L2/NB.\n");
233 break;
234
235 case 0x2:
236 case 0x11:
237 pr_cont("STQ access error.\n");
238 break;
239
240 case 0x3:
241 pr_cont("SCB access error.\n");
242 break;
243
244 case 0x10:
245 pr_cont("Tag error.\n");
246 break;
247
248 case 0x12:
249 pr_cont("LDQ access error.\n");
250 break;
251
252 default:
253 ret = false;
254 }
255 } else if (BUS_ERROR(ec)) {
256
257 if (!xec)
258 pr_cont("during system linefill.\n");
259 else
260 pr_cont(" Internal %s condition.\n",
261 ((xec == 1) ? "livelock" : "deadlock"));
262 } else
263 ret = false;
264
265 return ret;
266}
267
172static void amd_decode_dc_mce(struct mce *m) 268static void amd_decode_dc_mce(struct mce *m)
173{ 269{
174 u16 ec = m->status & 0xffff; 270 u16 ec = EC(m->status);
175 u8 xec = (m->status >> 16) & 0xf; 271 u8 xec = XEC(m->status, xec_mask);
176 272
177 pr_emerg(HW_ERR "Data Cache Error: "); 273 pr_emerg(HW_ERR "Data Cache Error: ");
178 274
179 /* TLB error signatures are the same across families */ 275 /* TLB error signatures are the same across families */
180 if (TLB_ERROR(ec)) { 276 if (TLB_ERROR(ec)) {
181 u8 tt = (ec >> 2) & 0x3; 277 if (TT(ec) == TT_DATA) {
182
183 if (tt == TT_DATA) {
184 pr_cont("%s TLB %s.\n", LL_MSG(ec), 278 pr_cont("%s TLB %s.\n", LL_MSG(ec),
185 (xec ? "multimatch" : "parity error")); 279 ((xec == 2) ? "locked miss"
280 : (xec ? "multimatch" : "parity")));
186 return; 281 return;
187 } 282 }
188 else 283 } else if (fam_ops->dc_mce(ec, xec))
189 goto wrong_dc_mce; 284 ;
190 } 285 else
191 286 pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
192 if (!fam_ops->dc_mce(ec))
193 goto wrong_dc_mce;
194
195 return;
196
197wrong_dc_mce:
198 pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
199} 287}
200 288
201static bool k8_ic_mce(u16 ec) 289static bool k8_ic_mce(u16 ec, u8 xec)
202{ 290{
203 u8 ll = ec & 0x3; 291 u8 ll = LL(ec);
204 u8 r4 = (ec >> 4) & 0xf;
205 bool ret = true; 292 bool ret = true;
206 293
207 if (!MEM_ERROR(ec)) 294 if (!MEM_ERROR(ec))
@@ -210,7 +297,7 @@ static bool k8_ic_mce(u16 ec)
210 if (ll == 0x2) 297 if (ll == 0x2)
211 pr_cont("during a linefill from L2.\n"); 298 pr_cont("during a linefill from L2.\n");
212 else if (ll == 0x1) { 299 else if (ll == 0x1) {
213 switch (r4) { 300 switch (R4(ec)) {
214 case R4_IRD: 301 case R4_IRD:
215 pr_cont("Parity error during data load.\n"); 302 pr_cont("Parity error during data load.\n");
216 break; 303 break;
@@ -233,15 +320,13 @@ static bool k8_ic_mce(u16 ec)
233 return ret; 320 return ret;
234} 321}
235 322
236static bool f14h_ic_mce(u16 ec) 323static bool f14h_ic_mce(u16 ec, u8 xec)
237{ 324{
238 u8 ll = ec & 0x3; 325 u8 r4 = R4(ec);
239 u8 tt = (ec >> 2) & 0x3;
240 u8 r4 = (ec >> 4) & 0xf;
241 bool ret = true; 326 bool ret = true;
242 327
243 if (MEM_ERROR(ec)) { 328 if (MEM_ERROR(ec)) {
244 if (tt != 0 || ll != 1) 329 if (TT(ec) != 0 || LL(ec) != 1)
245 ret = false; 330 ret = false;
246 331
247 if (r4 == R4_IRD) 332 if (r4 == R4_IRD)
@@ -254,10 +339,36 @@ static bool f14h_ic_mce(u16 ec)
254 return ret; 339 return ret;
255} 340}
256 341
342static bool f15h_ic_mce(u16 ec, u8 xec)
343{
344 bool ret = true;
345
346 if (!MEM_ERROR(ec))
347 return false;
348
349 switch (xec) {
350 case 0x0 ... 0xa:
351 pr_cont("%s.\n", f15h_ic_mce_desc[xec]);
352 break;
353
354 case 0xd:
355 pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]);
356 break;
357
358 case 0x10 ... 0x14:
359 pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]);
360 break;
361
362 default:
363 ret = false;
364 }
365 return ret;
366}
367
257static void amd_decode_ic_mce(struct mce *m) 368static void amd_decode_ic_mce(struct mce *m)
258{ 369{
259 u16 ec = m->status & 0xffff; 370 u16 ec = EC(m->status);
260 u8 xec = (m->status >> 16) & 0xf; 371 u8 xec = XEC(m->status, xec_mask);
261 372
262 pr_emerg(HW_ERR "Instruction Cache Error: "); 373 pr_emerg(HW_ERR "Instruction Cache Error: ");
263 374
@@ -268,7 +379,7 @@ static void amd_decode_ic_mce(struct mce *m)
268 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); 379 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
269 380
270 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); 381 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
271 } else if (fam_ops->ic_mce(ec)) 382 } else if (fam_ops->ic_mce(ec, xec))
272 ; 383 ;
273 else 384 else
274 pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); 385 pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
@@ -276,8 +387,8 @@ static void amd_decode_ic_mce(struct mce *m)
276 387
277static void amd_decode_bu_mce(struct mce *m) 388static void amd_decode_bu_mce(struct mce *m)
278{ 389{
279 u32 ec = m->status & 0xffff; 390 u16 ec = EC(m->status);
280 u32 xec = (m->status >> 16) & 0xf; 391 u8 xec = XEC(m->status, xec_mask);
281 392
282 pr_emerg(HW_ERR "Bus Unit Error"); 393 pr_emerg(HW_ERR "Bus Unit Error");
283 394
@@ -286,23 +397,23 @@ static void amd_decode_bu_mce(struct mce *m)
286 else if (xec == 0x3) 397 else if (xec == 0x3)
287 pr_cont(" in the victim data buffers.\n"); 398 pr_cont(" in the victim data buffers.\n");
288 else if (xec == 0x2 && MEM_ERROR(ec)) 399 else if (xec == 0x2 && MEM_ERROR(ec))
289 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); 400 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
290 else if (xec == 0x0) { 401 else if (xec == 0x0) {
291 if (TLB_ERROR(ec)) 402 if (TLB_ERROR(ec))
292 pr_cont(": %s error in a Page Descriptor Cache or " 403 pr_cont(": %s error in a Page Descriptor Cache or "
293 "Guest TLB.\n", TT_MSG(ec)); 404 "Guest TLB.\n", TT_MSG(ec));
294 else if (BUS_ERROR(ec)) 405 else if (BUS_ERROR(ec))
295 pr_cont(": %s/ECC error in data read from NB: %s.\n", 406 pr_cont(": %s/ECC error in data read from NB: %s.\n",
296 RRRR_MSG(ec), PP_MSG(ec)); 407 R4_MSG(ec), PP_MSG(ec));
297 else if (MEM_ERROR(ec)) { 408 else if (MEM_ERROR(ec)) {
298 u8 rrrr = (ec >> 4) & 0xf; 409 u8 r4 = R4(ec);
299 410
300 if (rrrr >= 0x7) 411 if (r4 >= 0x7)
301 pr_cont(": %s error during data copyback.\n", 412 pr_cont(": %s error during data copyback.\n",
302 RRRR_MSG(ec)); 413 R4_MSG(ec));
303 else if (rrrr <= 0x1) 414 else if (r4 <= 0x1)
304 pr_cont(": %s parity/ECC error during data " 415 pr_cont(": %s parity/ECC error during data "
305 "access from L2.\n", RRRR_MSG(ec)); 416 "access from L2.\n", R4_MSG(ec));
306 else 417 else
307 goto wrong_bu_mce; 418 goto wrong_bu_mce;
308 } else 419 } else
@@ -316,12 +427,52 @@ wrong_bu_mce:
316 pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); 427 pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
317} 428}
318 429
430static void amd_decode_cu_mce(struct mce *m)
431{
432 u16 ec = EC(m->status);
433 u8 xec = XEC(m->status, xec_mask);
434
435 pr_emerg(HW_ERR "Combined Unit Error: ");
436
437 if (TLB_ERROR(ec)) {
438 if (xec == 0x0)
439 pr_cont("Data parity TLB read error.\n");
440 else if (xec == 0x1)
441 pr_cont("Poison data provided for TLB fill.\n");
442 else
443 goto wrong_cu_mce;
444 } else if (BUS_ERROR(ec)) {
445 if (xec > 2)
446 goto wrong_cu_mce;
447
448 pr_cont("Error during attempted NB data read.\n");
449 } else if (MEM_ERROR(ec)) {
450 switch (xec) {
451 case 0x4 ... 0xc:
452 pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]);
453 break;
454
455 case 0x10 ... 0x14:
456 pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]);
457 break;
458
459 default:
460 goto wrong_cu_mce;
461 }
462 }
463
464 return;
465
466wrong_cu_mce:
467 pr_emerg(HW_ERR "Corrupted CU MCE info?\n");
468}
469
319static void amd_decode_ls_mce(struct mce *m) 470static void amd_decode_ls_mce(struct mce *m)
320{ 471{
321 u16 ec = m->status & 0xffff; 472 u16 ec = EC(m->status);
322 u8 xec = (m->status >> 16) & 0xf; 473 u8 xec = XEC(m->status, xec_mask);
323 474
324 if (boot_cpu_data.x86 == 0x14) { 475 if (boot_cpu_data.x86 >= 0x14) {
325 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," 476 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
326 " please report on LKML.\n"); 477 " please report on LKML.\n");
327 return; 478 return;
@@ -330,12 +481,12 @@ static void amd_decode_ls_mce(struct mce *m)
330 pr_emerg(HW_ERR "Load Store Error"); 481 pr_emerg(HW_ERR "Load Store Error");
331 482
332 if (xec == 0x0) { 483 if (xec == 0x0) {
333 u8 r4 = (ec >> 4) & 0xf; 484 u8 r4 = R4(ec);
334 485
335 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) 486 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
336 goto wrong_ls_mce; 487 goto wrong_ls_mce;
337 488
338 pr_cont(" during %s.\n", RRRR_MSG(ec)); 489 pr_cont(" during %s.\n", R4_MSG(ec));
339 } else 490 } else
340 goto wrong_ls_mce; 491 goto wrong_ls_mce;
341 492
@@ -410,6 +561,15 @@ static bool f10h_nb_mce(u16 ec, u8 xec)
410 goto out; 561 goto out;
411 break; 562 break;
412 563
564 case 0x19:
565 if (boot_cpu_data.x86 == 0x15)
566 pr_cont("Compute Unit Data Error.\n");
567 else
568 ret = false;
569
570 goto out;
571 break;
572
413 case 0x1c ... 0x1f: 573 case 0x1c ... 0x1f:
414 offset = 24; 574 offset = 24;
415 break; 575 break;
@@ -434,27 +594,30 @@ static bool nb_noop_mce(u16 ec, u8 xec)
434 594
435void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) 595void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
436{ 596{
437 u8 xec = (m->status >> 16) & 0x1f; 597 u16 ec = EC(m->status);
438 u16 ec = m->status & 0xffff; 598 u8 xec = XEC(m->status, 0x1f);
439 u32 nbsh = (u32)(m->status >> 32); 599 u32 nbsh = (u32)(m->status >> 32);
600 int core = -1;
440 601
441 pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id); 602 pr_emerg(HW_ERR "Northbridge Error (node %d", node_id);
442 603
443 /* 604 /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */
444 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
445 * value encoding has changed so interpret those differently
446 */
447 if ((boot_cpu_data.x86 == 0x10) && 605 if ((boot_cpu_data.x86 == 0x10) &&
448 (boot_cpu_data.x86_model > 7)) { 606 (boot_cpu_data.x86_model > 7)) {
449 if (nbsh & K8_NBSH_ERR_CPU_VAL) 607 if (nbsh & K8_NBSH_ERR_CPU_VAL)
450 pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask)); 608 core = nbsh & nb_err_cpumask;
451 } else { 609 } else {
452 u8 assoc_cpus = nbsh & nb_err_cpumask; 610 u8 assoc_cpus = nbsh & nb_err_cpumask;
453 611
454 if (assoc_cpus > 0) 612 if (assoc_cpus > 0)
455 pr_cont(", core: %d", fls(assoc_cpus) - 1); 613 core = fls(assoc_cpus) - 1;
456 } 614 }
457 615
616 if (core >= 0)
617 pr_cont(", core %d): ", core);
618 else
619 pr_cont("): ");
620
458 switch (xec) { 621 switch (xec) {
459 case 0x2: 622 case 0x2:
460 pr_cont("Sync error (sync packets on HT link detected).\n"); 623 pr_cont("Sync error (sync packets on HT link detected).\n");
@@ -496,35 +659,89 @@ EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
496 659
497static void amd_decode_fr_mce(struct mce *m) 660static void amd_decode_fr_mce(struct mce *m)
498{ 661{
499 if (boot_cpu_data.x86 == 0xf || 662 struct cpuinfo_x86 *c = &boot_cpu_data;
500 boot_cpu_data.x86 == 0x11) 663 u8 xec = XEC(m->status, xec_mask);
664
665 if (c->x86 == 0xf || c->x86 == 0x11)
501 goto wrong_fr_mce; 666 goto wrong_fr_mce;
502 667
503 /* we have only one error signature so match all fields at once. */ 668 if (c->x86 != 0x15 && xec != 0x0)
504 if ((m->status & 0xffff) == 0x0f0f) { 669 goto wrong_fr_mce;
505 pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n"); 670
506 return; 671 pr_emerg(HW_ERR "%s Error: ",
507 } 672 (c->x86 == 0x15 ? "Execution Unit" : "FIROB"));
673
674 if (xec == 0x0 || xec == 0xc)
675 pr_cont("%s.\n", fr_ex_mce_desc[xec]);
676 else if (xec < 0xd)
677 pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]);
678 else
679 goto wrong_fr_mce;
680
681 return;
508 682
509wrong_fr_mce: 683wrong_fr_mce:
510 pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); 684 pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
511} 685}
512 686
687static void amd_decode_fp_mce(struct mce *m)
688{
689 u8 xec = XEC(m->status, xec_mask);
690
691 pr_emerg(HW_ERR "Floating Point Unit Error: ");
692
693 switch (xec) {
694 case 0x1:
695 pr_cont("Free List");
696 break;
697
698 case 0x2:
699 pr_cont("Physical Register File");
700 break;
701
702 case 0x3:
703 pr_cont("Retire Queue");
704 break;
705
706 case 0x4:
707 pr_cont("Scheduler table");
708 break;
709
710 case 0x5:
711 pr_cont("Status Register File");
712 break;
713
714 default:
715 goto wrong_fp_mce;
716 break;
717 }
718
719 pr_cont(" parity error.\n");
720
721 return;
722
723wrong_fp_mce:
724 pr_emerg(HW_ERR "Corrupted FP MCE info?\n");
725}
726
513static inline void amd_decode_err_code(u16 ec) 727static inline void amd_decode_err_code(u16 ec)
514{ 728{
515 if (TLB_ERROR(ec)) { 729
516 pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n", 730 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
517 TT_MSG(ec), LL_MSG(ec)); 731
518 } else if (MEM_ERROR(ec)) { 732 if (BUS_ERROR(ec))
519 pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n", 733 pr_cont(", mem/io: %s", II_MSG(ec));
520 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); 734 else
521 } else if (BUS_ERROR(ec)) { 735 pr_cont(", tx: %s", TT_MSG(ec));
522 pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, " 736
523 "Participating Processor: %s\n", 737 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
524 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), 738 pr_cont(", mem-tx: %s", R4_MSG(ec));
525 PP_MSG(ec)); 739
526 } else 740 if (BUS_ERROR(ec))
527 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); 741 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
742 }
743
744 pr_cont("\n");
528} 745}
529 746
530/* 747/*
@@ -546,25 +763,32 @@ static bool amd_filter_mce(struct mce *m)
546int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) 763int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
547{ 764{
548 struct mce *m = (struct mce *)data; 765 struct mce *m = (struct mce *)data;
766 struct cpuinfo_x86 *c = &boot_cpu_data;
549 int node, ecc; 767 int node, ecc;
550 768
551 if (amd_filter_mce(m)) 769 if (amd_filter_mce(m))
552 return NOTIFY_STOP; 770 return NOTIFY_STOP;
553 771
554 pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); 772 pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s",
773 m->bank,
774 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
775 ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
776 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
777 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
778 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
555 779
556 pr_cont("%sorrected error, other errors lost: %s, " 780 if (c->x86 == 0x15)
557 "CPU context corrupt: %s", 781 pr_cont("|%s|%s",
558 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), 782 ((m->status & BIT_64(44)) ? "Deferred" : "-"),
559 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), 783 ((m->status & BIT_64(43)) ? "Poison" : "-"));
560 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
561 784
562 /* do the two bits[14:13] together */ 785 /* do the two bits[14:13] together */
563 ecc = (m->status >> 45) & 0x3; 786 ecc = (m->status >> 45) & 0x3;
564 if (ecc) 787 if (ecc)
565 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); 788 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
789
790 pr_cont("]: 0x%016llx\n", m->status);
566 791
567 pr_cont("\n");
568 792
569 switch (m->bank) { 793 switch (m->bank) {
570 case 0: 794 case 0:
@@ -576,7 +800,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
576 break; 800 break;
577 801
578 case 2: 802 case 2:
579 amd_decode_bu_mce(m); 803 if (c->x86 == 0x15)
804 amd_decode_cu_mce(m);
805 else
806 amd_decode_bu_mce(m);
580 break; 807 break;
581 808
582 case 3: 809 case 3:
@@ -592,6 +819,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
592 amd_decode_fr_mce(m); 819 amd_decode_fr_mce(m);
593 break; 820 break;
594 821
822 case 6:
823 amd_decode_fp_mce(m);
824 break;
825
595 default: 826 default:
596 break; 827 break;
597 } 828 }
@@ -608,18 +839,21 @@ static struct notifier_block amd_mce_dec_nb = {
608 839
609static int __init mce_amd_init(void) 840static int __init mce_amd_init(void)
610{ 841{
611 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 842 struct cpuinfo_x86 *c = &boot_cpu_data;
843
844 if (c->x86_vendor != X86_VENDOR_AMD)
612 return 0; 845 return 0;
613 846
614 if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) && 847 if ((c->x86 < 0xf || c->x86 > 0x12) &&
615 (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf)) 848 (c->x86 != 0x14 || c->x86_model > 0xf) &&
849 (c->x86 != 0x15 || c->x86_model > 0xf))
616 return 0; 850 return 0;
617 851
618 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); 852 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
619 if (!fam_ops) 853 if (!fam_ops)
620 return -ENOMEM; 854 return -ENOMEM;
621 855
622 switch (boot_cpu_data.x86) { 856 switch (c->x86) {
623 case 0xf: 857 case 0xf:
624 fam_ops->dc_mce = k8_dc_mce; 858 fam_ops->dc_mce = k8_dc_mce;
625 fam_ops->ic_mce = k8_ic_mce; 859 fam_ops->ic_mce = k8_ic_mce;
@@ -651,9 +885,15 @@ static int __init mce_amd_init(void)
651 fam_ops->nb_mce = nb_noop_mce; 885 fam_ops->nb_mce = nb_noop_mce;
652 break; 886 break;
653 887
888 case 0x15:
889 xec_mask = 0x1f;
890 fam_ops->dc_mce = f15h_dc_mce;
891 fam_ops->ic_mce = f15h_ic_mce;
892 fam_ops->nb_mce = f10h_nb_mce;
893 break;
894
654 default: 895 default:
655 printk(KERN_WARNING "Huh? What family is that: %d?!\n", 896 printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86);
656 boot_cpu_data.x86);
657 kfree(fam_ops); 897 kfree(fam_ops);
658 return -EINVAL; 898 return -EINVAL;
659 } 899 }
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 35f6e0e3b29..45dda47173f 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -7,8 +7,8 @@
7 7
8#define BIT_64(n) (U64_C(1) << (n)) 8#define BIT_64(n) (U64_C(1) << (n))
9 9
10#define ERROR_CODE(x) ((x) & 0xffff) 10#define EC(x) ((x) & 0xffff)
11#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) 11#define XEC(x, mask) (((x) >> 16) & mask)
12 12
13#define LOW_SYNDROME(x) (((x) >> 15) & 0xff) 13#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
14#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) 14#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -21,15 +21,15 @@
21#define TT_MSG(x) tt_msgs[TT(x)] 21#define TT_MSG(x) tt_msgs[TT(x)]
22#define II(x) (((x) >> 2) & 0x3) 22#define II(x) (((x) >> 2) & 0x3)
23#define II_MSG(x) ii_msgs[II(x)] 23#define II_MSG(x) ii_msgs[II(x)]
24#define LL(x) (((x) >> 0) & 0x3) 24#define LL(x) ((x) & 0x3)
25#define LL_MSG(x) ll_msgs[LL(x)] 25#define LL_MSG(x) ll_msgs[LL(x)]
26#define TO(x) (((x) >> 8) & 0x1) 26#define TO(x) (((x) >> 8) & 0x1)
27#define TO_MSG(x) to_msgs[TO(x)] 27#define TO_MSG(x) to_msgs[TO(x)]
28#define PP(x) (((x) >> 9) & 0x3) 28#define PP(x) (((x) >> 9) & 0x3)
29#define PP_MSG(x) pp_msgs[PP(x)] 29#define PP_MSG(x) pp_msgs[PP(x)]
30 30
31#define RRRR(x) (((x) >> 4) & 0xf) 31#define R4(x) (((x) >> 4) & 0xf)
32#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!") 32#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
33 33
34#define K8_NBSH 0x4C 34#define K8_NBSH 0x4C
35 35
@@ -100,8 +100,8 @@ struct err_regs {
100 * per-family decoder ops 100 * per-family decoder ops
101 */ 101 */
102struct amd_decoder_ops { 102struct amd_decoder_ops {
103 bool (*dc_mce)(u16); 103 bool (*dc_mce)(u16, u8);
104 bool (*ic_mce)(u16); 104 bool (*ic_mce)(u16, u8);
105 bool (*nb_mce)(u16, u8); 105 bool (*nb_mce)(u16, u8);
106}; 106};
107 107
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index 39faded3cad..733a7e7a8d6 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -88,10 +88,11 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj,
88 return -EINVAL; 88 return -EINVAL;
89 } 89 }
90 90
91 if (value > 5) { 91 if (value > 5)
92 printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); 92 if (boot_cpu_data.x86 != 0x15 || value > 6) {
93 return -EINVAL; 93 printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
94 } 94 return -EINVAL;
95 }
95 96
96 i_mce.bank = value; 97 i_mce.bank = value;
97 98