aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/edac/mce_amd.c158
-rw-r--r--drivers/edac/mce_amd.h40
2 files changed, 171 insertions, 27 deletions
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 5eb8042d0c6a..33985aa61356 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1,6 +1,10 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/slab.h>
3
2#include "mce_amd.h" 4#include "mce_amd.h"
3 5
6static struct amd_decoder_ops *fam_ops;
7
4static bool report_gart_errors; 8static bool report_gart_errors;
5static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); 9static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
6 10
@@ -97,41 +101,116 @@ const char *ext_msgs[] = {
97}; 101};
98EXPORT_SYMBOL_GPL(ext_msgs); 102EXPORT_SYMBOL_GPL(ext_msgs);
99 103
100static void amd_decode_dc_mce(struct mce *m) 104static bool f10h_dc_mce(u16 ec)
101{ 105{
102 u32 ec = m->status & 0xffff; 106 u8 r4 = (ec >> 4) & 0xf;
103 u32 xec = (m->status >> 16) & 0xf; 107 bool ret = false;
104 108
105 pr_emerg(HW_ERR "Data Cache Error: "); 109 if (r4 == R4_GEN) {
110 pr_cont("during data scrub.\n");
111 return true;
112 }
106 113
107 if (xec == 1 && TLB_ERROR(ec)) 114 if (MEM_ERROR(ec)) {
108 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); 115 u8 ll = ec & 0x3;
109 else if (xec == 0) { 116 ret = true;
110 if (m->status & (1ULL << 40))
111 pr_cont(" during Data Scrub.\n");
112 else if (TLB_ERROR(ec))
113 pr_cont(": %s TLB parity error.\n", LL_MSG(ec));
114 else if (MEM_ERROR(ec)) {
115 u8 ll = ec & 0x3;
116 u8 tt = (ec >> 2) & 0x3;
117 u8 rrrr = (ec >> 4) & 0xf;
118 117
119 /* see F10h BKDG (31116), Table 92. */ 118 if (ll == LL_L2)
120 if (ll == 0x1) { 119 pr_cont("during L1 linefill from L2.\n");
121 if (tt != 0x1) 120 else if (ll == LL_L1)
122 goto wrong_dc_mce; 121 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
122 else
123 ret = false;
124 }
125 return ret;
126}
123 127
124 pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec)); 128static bool k8_dc_mce(u16 ec)
129{
130 if (BUS_ERROR(ec)) {
131 pr_cont("during system linefill.\n");
132 return true;
133 }
125 134
126 } else if (ll == 0x2 && rrrr == 0x3) 135 return f10h_dc_mce(ec);
127 pr_cont(" during L1 linefill from L2.\n"); 136}
128 else 137
129 goto wrong_dc_mce; 138static bool f14h_dc_mce(u16 ec)
130 } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf) 139{
131 pr_cont(" during system linefill.\n"); 140 u8 r4 = (ec >> 4) & 0xf;
141 u8 ll = ec & 0x3;
142 u8 tt = (ec >> 2) & 0x3;
143 u8 ii = tt;
144 bool ret = true;
145
146 if (MEM_ERROR(ec)) {
147
148 if (tt != TT_DATA || ll != LL_L1)
149 return false;
150
151 switch (r4) {
152 case R4_DRD:
153 case R4_DWR:
154 pr_cont("Data/Tag parity error due to %s.\n",
155 (r4 == R4_DRD ? "load/hw prf" : "store"));
156 break;
157 case R4_EVICT:
158 pr_cont("Copyback parity error on a tag miss.\n");
159 break;
160 case R4_SNOOP:
161 pr_cont("Tag parity error during snoop.\n");
162 break;
163 default:
164 ret = false;
165 }
166 } else if (BUS_ERROR(ec)) {
167
168 if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
169 return false;
170
171 pr_cont("System read data error on a ");
172
173 switch (r4) {
174 case R4_RD:
175 pr_cont("TLB reload.\n");
176 break;
177 case R4_DWR:
178 pr_cont("store.\n");
179 break;
180 case R4_DRD:
181 pr_cont("load.\n");
182 break;
183 default:
184 ret = false;
185 }
186 } else {
187 ret = false;
188 }
189
190 return ret;
191}
192
193static void amd_decode_dc_mce(struct mce *m)
194{
195 u16 ec = m->status & 0xffff;
196 u8 xec = (m->status >> 16) & 0xf;
197
198 pr_emerg(HW_ERR "Data Cache Error: ");
199
200 /* TLB error signatures are the same across families */
201 if (TLB_ERROR(ec)) {
202 u8 tt = (ec >> 2) & 0x3;
203
204 if (tt == TT_DATA) {
205 pr_cont("%s TLB %s.\n", LL_MSG(ec),
206 (xec ? "multimatch" : "parity error"));
207 return;
208 }
132 else 209 else
133 goto wrong_dc_mce; 210 goto wrong_dc_mce;
134 } else 211 }
212
213 if (!fam_ops->dc_mce(ec))
135 goto wrong_dc_mce; 214 goto wrong_dc_mce;
136 215
137 return; 216 return;
@@ -395,6 +474,30 @@ static int __init mce_amd_init(void)
395 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 474 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
396 return 0; 475 return 0;
397 476
477 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
478 if (!fam_ops)
479 return -ENOMEM;
480
481 switch (boot_cpu_data.x86) {
482 case 0xf:
483 fam_ops->dc_mce = k8_dc_mce;
484 break;
485
486 case 0x10:
487 fam_ops->dc_mce = f10h_dc_mce;
488 break;
489
490 case 0x14:
491 fam_ops->dc_mce = f14h_dc_mce;
492 break;
493
494 default:
495 printk(KERN_WARNING "Huh? What family is that: %d?!\n",
496 boot_cpu_data.x86);
497 kfree(fam_ops);
498 return -EINVAL;
499 }
500
398 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); 501 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
399 502
400 return 0; 503 return 0;
@@ -405,6 +508,7 @@ early_initcall(mce_amd_init);
405static void __exit mce_amd_exit(void) 508static void __exit mce_amd_exit(void)
406{ 509{
407 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); 510 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
511 kfree(fam_ops);
408} 512}
409 513
410MODULE_DESCRIPTION("AMD MCE decoder"); 514MODULE_DESCRIPTION("AMD MCE decoder");
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 2712a906afdf..85985c225442 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -44,6 +44,39 @@
44#define K8_NBSH_UECC BIT(13) 44#define K8_NBSH_UECC BIT(13)
45#define K8_NBSH_ERR_SCRUBER BIT(8) 45#define K8_NBSH_ERR_SCRUBER BIT(8)
46 46
47enum tt_ids {
48 TT_INSTR = 0,
49 TT_DATA,
50 TT_GEN,
51 TT_RESV,
52};
53
54enum ll_ids {
55 LL_RESV = 0,
56 LL_L1,
57 LL_L2,
58 LL_LG,
59};
60
61enum ii_ids {
62 II_MEM = 0,
63 II_RESV,
64 II_IO,
65 II_GEN,
66};
67
68enum rrrr_ids {
69 R4_GEN = 0,
70 R4_RD,
71 R4_WR,
72 R4_DRD,
73 R4_DWR,
74 R4_IRD,
75 R4_PREF,
76 R4_EVICT,
77 R4_SNOOP,
78};
79
47extern const char *tt_msgs[]; 80extern const char *tt_msgs[];
48extern const char *ll_msgs[]; 81extern const char *ll_msgs[];
49extern const char *rrrr_msgs[]; 82extern const char *rrrr_msgs[];
@@ -63,6 +96,13 @@ struct err_regs {
63 u32 nbeal; 96 u32 nbeal;
64}; 97};
65 98
99/*
100 * per-family decoder ops
101 */
102struct amd_decoder_ops {
103 bool (*dc_mce)(u16);
104};
105
66void amd_report_gart_errors(bool); 106void amd_report_gart_errors(bool);
67void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)); 107void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
68void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)); 108void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));