diff options
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 12 | ||||
-rw-r--r-- | drivers/edac/Makefile | 6 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 328 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 71 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_dbg.c | 2 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_err_types.c | 161 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 422 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.h | 69 |
8 files changed, 619 insertions, 452 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 01213048f62f..9bfe9d2ea615 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -183,6 +183,11 @@ void mce_log(struct mce *mce) | |||
183 | set_bit(0, &mce_need_notify); | 183 | set_bit(0, &mce_need_notify); |
184 | } | 184 | } |
185 | 185 | ||
186 | void __weak decode_mce(struct mce *m) | ||
187 | { | ||
188 | return; | ||
189 | } | ||
190 | |||
186 | static void print_mce(struct mce *m) | 191 | static void print_mce(struct mce *m) |
187 | { | 192 | { |
188 | printk(KERN_EMERG | 193 | printk(KERN_EMERG |
@@ -205,6 +210,8 @@ static void print_mce(struct mce *m) | |||
205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 210 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
206 | m->cpuvendor, m->cpuid, m->time, m->socketid, | 211 | m->cpuvendor, m->cpuid, m->time, m->socketid, |
207 | m->apicid); | 212 | m->apicid); |
213 | |||
214 | decode_mce(m); | ||
208 | } | 215 | } |
209 | 216 | ||
210 | static void print_mce_head(void) | 217 | static void print_mce_head(void) |
@@ -215,7 +222,10 @@ static void print_mce_head(void) | |||
215 | static void print_mce_tail(void) | 222 | static void print_mce_tail(void) |
216 | { | 223 | { |
217 | printk(KERN_EMERG "This is not a software problem!\n" | 224 | printk(KERN_EMERG "This is not a software problem!\n" |
218 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | 225 | #if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD)) |
226 | "Run through mcelog --ascii to decode and contact your hardware vendor\n" | ||
227 | #endif | ||
228 | ); | ||
219 | } | 229 | } |
220 | 230 | ||
221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 231 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 98aa4a7db412..cfa033ce53a7 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
@@ -17,6 +17,10 @@ ifdef CONFIG_PCI | |||
17 | edac_core-objs += edac_pci.o edac_pci_sysfs.o | 17 | edac_core-objs += edac_pci.o edac_pci_sysfs.o |
18 | endif | 18 | endif |
19 | 19 | ||
20 | ifdef CONFIG_CPU_SUP_AMD | ||
21 | edac_core-objs += edac_mce_amd.o | ||
22 | endif | ||
23 | |||
20 | obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o | 24 | obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o |
21 | obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o | 25 | obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o |
22 | obj-$(CONFIG_EDAC_I5000) += i5000_edac.o | 26 | obj-$(CONFIG_EDAC_I5000) += i5000_edac.o |
@@ -32,7 +36,7 @@ obj-$(CONFIG_EDAC_X38) += x38_edac.o | |||
32 | obj-$(CONFIG_EDAC_I82860) += i82860_edac.o | 36 | obj-$(CONFIG_EDAC_I82860) += i82860_edac.o |
33 | obj-$(CONFIG_EDAC_R82600) += r82600_edac.o | 37 | obj-$(CONFIG_EDAC_R82600) += r82600_edac.o |
34 | 38 | ||
35 | amd64_edac_mod-y := amd64_edac_err_types.o amd64_edac.o | 39 | amd64_edac_mod-y := amd64_edac.o |
36 | amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o | 40 | amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o |
37 | amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o | 41 | amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o |
38 | 42 | ||
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e2a10bcba7a1..173dc4a84166 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -19,6 +19,63 @@ static struct mem_ctl_info *mci_lookup[MAX_NUMNODES]; | |||
19 | static struct amd64_pvt *pvt_lookup[MAX_NUMNODES]; | 19 | static struct amd64_pvt *pvt_lookup[MAX_NUMNODES]; |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only | ||
23 | * for DDR2 DRAM mapping. | ||
24 | */ | ||
25 | u32 revf_quad_ddr2_shift[] = { | ||
26 | 0, /* 0000b NULL DIMM (128mb) */ | ||
27 | 28, /* 0001b 256mb */ | ||
28 | 29, /* 0010b 512mb */ | ||
29 | 29, /* 0011b 512mb */ | ||
30 | 29, /* 0100b 512mb */ | ||
31 | 30, /* 0101b 1gb */ | ||
32 | 30, /* 0110b 1gb */ | ||
33 | 31, /* 0111b 2gb */ | ||
34 | 31, /* 1000b 2gb */ | ||
35 | 32, /* 1001b 4gb */ | ||
36 | 32, /* 1010b 4gb */ | ||
37 | 33, /* 1011b 8gb */ | ||
38 | 0, /* 1100b future */ | ||
39 | 0, /* 1101b future */ | ||
40 | 0, /* 1110b future */ | ||
41 | 0 /* 1111b future */ | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing | ||
46 | * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- | ||
47 | * or higher value'. | ||
48 | * | ||
49 | *FIXME: Produce a better mapping/linearisation. | ||
50 | */ | ||
51 | |||
52 | struct scrubrate scrubrates[] = { | ||
53 | { 0x01, 1600000000UL}, | ||
54 | { 0x02, 800000000UL}, | ||
55 | { 0x03, 400000000UL}, | ||
56 | { 0x04, 200000000UL}, | ||
57 | { 0x05, 100000000UL}, | ||
58 | { 0x06, 50000000UL}, | ||
59 | { 0x07, 25000000UL}, | ||
60 | { 0x08, 12284069UL}, | ||
61 | { 0x09, 6274509UL}, | ||
62 | { 0x0A, 3121951UL}, | ||
63 | { 0x0B, 1560975UL}, | ||
64 | { 0x0C, 781440UL}, | ||
65 | { 0x0D, 390720UL}, | ||
66 | { 0x0E, 195300UL}, | ||
67 | { 0x0F, 97650UL}, | ||
68 | { 0x10, 48854UL}, | ||
69 | { 0x11, 24427UL}, | ||
70 | { 0x12, 12213UL}, | ||
71 | { 0x13, 6101UL}, | ||
72 | { 0x14, 3051UL}, | ||
73 | { 0x15, 1523UL}, | ||
74 | { 0x16, 761UL}, | ||
75 | { 0x00, 0UL}, /* scrubbing off */ | ||
76 | }; | ||
77 | |||
78 | /* | ||
22 | * Memory scrubber control interface. For K8, memory scrubbing is handled by | 79 | * Memory scrubber control interface. For K8, memory scrubbing is handled by |
23 | * hardware and can involve L2 cache, dcache as well as the main memory. With | 80 | * hardware and can involve L2 cache, dcache as well as the main memory. With |
24 | * F10, this is extended to L3 cache scrubbing on CPU models sporting that | 81 | * F10, this is extended to L3 cache scrubbing on CPU models sporting that |
@@ -693,7 +750,7 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow, | |||
693 | * specific. | 750 | * specific. |
694 | */ | 751 | */ |
695 | static u64 extract_error_address(struct mem_ctl_info *mci, | 752 | static u64 extract_error_address(struct mem_ctl_info *mci, |
696 | struct amd64_error_info_regs *info) | 753 | struct err_regs *info) |
697 | { | 754 | { |
698 | struct amd64_pvt *pvt = mci->pvt_info; | 755 | struct amd64_pvt *pvt = mci->pvt_info; |
699 | 756 | ||
@@ -1049,7 +1106,7 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) | |||
1049 | 1106 | ||
1050 | /* extract the ERROR ADDRESS for the K8 CPUs */ | 1107 | /* extract the ERROR ADDRESS for the K8 CPUs */ |
1051 | static u64 k8_get_error_address(struct mem_ctl_info *mci, | 1108 | static u64 k8_get_error_address(struct mem_ctl_info *mci, |
1052 | struct amd64_error_info_regs *info) | 1109 | struct err_regs *info) |
1053 | { | 1110 | { |
1054 | return (((u64) (info->nbeah & 0xff)) << 32) + | 1111 | return (((u64) (info->nbeah & 0xff)) << 32) + |
1055 | (info->nbeal & ~0x03); | 1112 | (info->nbeal & ~0x03); |
@@ -1092,7 +1149,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) | |||
1092 | } | 1149 | } |
1093 | 1150 | ||
1094 | static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, | 1151 | static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, |
1095 | struct amd64_error_info_regs *info, | 1152 | struct err_regs *info, |
1096 | u64 SystemAddress) | 1153 | u64 SystemAddress) |
1097 | { | 1154 | { |
1098 | struct mem_ctl_info *src_mci; | 1155 | struct mem_ctl_info *src_mci; |
@@ -1101,8 +1158,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, | |||
1101 | u32 page, offset; | 1158 | u32 page, offset; |
1102 | 1159 | ||
1103 | /* Extract the syndrome parts and form a 16-bit syndrome */ | 1160 | /* Extract the syndrome parts and form a 16-bit syndrome */ |
1104 | syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8; | 1161 | syndrome = HIGH_SYNDROME(info->nbsl) << 8; |
1105 | syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh); | 1162 | syndrome |= LOW_SYNDROME(info->nbsh); |
1106 | 1163 | ||
1107 | /* CHIPKILL enabled */ | 1164 | /* CHIPKILL enabled */ |
1108 | if (info->nbcfg & K8_NBCFG_CHIPKILL) { | 1165 | if (info->nbcfg & K8_NBCFG_CHIPKILL) { |
@@ -1311,7 +1368,7 @@ static void amd64_teardown(struct amd64_pvt *pvt) | |||
1311 | } | 1368 | } |
1312 | 1369 | ||
1313 | static u64 f10_get_error_address(struct mem_ctl_info *mci, | 1370 | static u64 f10_get_error_address(struct mem_ctl_info *mci, |
1314 | struct amd64_error_info_regs *info) | 1371 | struct err_regs *info) |
1315 | { | 1372 | { |
1316 | return (((u64) (info->nbeah & 0xffff)) << 32) + | 1373 | return (((u64) (info->nbeah & 0xffff)) << 32) + |
1317 | (info->nbeal & ~0x01); | 1374 | (info->nbeal & ~0x01); |
@@ -1688,7 +1745,7 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, | |||
1688 | * The @sys_addr is usually an error address received from the hardware. | 1745 | * The @sys_addr is usually an error address received from the hardware. |
1689 | */ | 1746 | */ |
1690 | static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, | 1747 | static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, |
1691 | struct amd64_error_info_regs *info, | 1748 | struct err_regs *info, |
1692 | u64 sys_addr) | 1749 | u64 sys_addr) |
1693 | { | 1750 | { |
1694 | struct amd64_pvt *pvt = mci->pvt_info; | 1751 | struct amd64_pvt *pvt = mci->pvt_info; |
@@ -1701,8 +1758,8 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, | |||
1701 | if (csrow >= 0) { | 1758 | if (csrow >= 0) { |
1702 | error_address_to_page_and_offset(sys_addr, &page, &offset); | 1759 | error_address_to_page_and_offset(sys_addr, &page, &offset); |
1703 | 1760 | ||
1704 | syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8; | 1761 | syndrome = HIGH_SYNDROME(info->nbsl) << 8; |
1705 | syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh); | 1762 | syndrome |= LOW_SYNDROME(info->nbsh); |
1706 | 1763 | ||
1707 | /* | 1764 | /* |
1708 | * Is CHIPKILL on? If so, then we can attempt to use the | 1765 | * Is CHIPKILL on? If so, then we can attempt to use the |
@@ -2045,7 +2102,7 @@ static int get_channel_from_ecc_syndrome(unsigned short syndrome) | |||
2045 | * - 0: if no valid error is indicated | 2102 | * - 0: if no valid error is indicated |
2046 | */ | 2103 | */ |
2047 | static int amd64_get_error_info_regs(struct mem_ctl_info *mci, | 2104 | static int amd64_get_error_info_regs(struct mem_ctl_info *mci, |
2048 | struct amd64_error_info_regs *regs) | 2105 | struct err_regs *regs) |
2049 | { | 2106 | { |
2050 | struct amd64_pvt *pvt; | 2107 | struct amd64_pvt *pvt; |
2051 | struct pci_dev *misc_f3_ctl; | 2108 | struct pci_dev *misc_f3_ctl; |
@@ -2094,10 +2151,10 @@ err_reg: | |||
2094 | * - 0: if no error is found | 2151 | * - 0: if no error is found |
2095 | */ | 2152 | */ |
2096 | static int amd64_get_error_info(struct mem_ctl_info *mci, | 2153 | static int amd64_get_error_info(struct mem_ctl_info *mci, |
2097 | struct amd64_error_info_regs *info) | 2154 | struct err_regs *info) |
2098 | { | 2155 | { |
2099 | struct amd64_pvt *pvt; | 2156 | struct amd64_pvt *pvt; |
2100 | struct amd64_error_info_regs regs; | 2157 | struct err_regs regs; |
2101 | 2158 | ||
2102 | pvt = mci->pvt_info; | 2159 | pvt = mci->pvt_info; |
2103 | 2160 | ||
@@ -2152,48 +2209,12 @@ static int amd64_get_error_info(struct mem_ctl_info *mci, | |||
2152 | return 1; | 2209 | return 1; |
2153 | } | 2210 | } |
2154 | 2211 | ||
2155 | static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci, | ||
2156 | struct amd64_error_info_regs *info) | ||
2157 | { | ||
2158 | u32 err_code; | ||
2159 | u32 ec_tt; /* error code transaction type (2b) */ | ||
2160 | u32 ec_ll; /* error code cache level (2b) */ | ||
2161 | |||
2162 | err_code = EXTRACT_ERROR_CODE(info->nbsl); | ||
2163 | ec_ll = EXTRACT_LL_CODE(err_code); | ||
2164 | ec_tt = EXTRACT_TT_CODE(err_code); | ||
2165 | |||
2166 | amd64_mc_printk(mci, KERN_ERR, | ||
2167 | "GART TLB event: transaction type(%s), " | ||
2168 | "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]); | ||
2169 | } | ||
2170 | |||
2171 | static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci, | ||
2172 | struct amd64_error_info_regs *info) | ||
2173 | { | ||
2174 | u32 err_code; | ||
2175 | u32 ec_rrrr; /* error code memory transaction (4b) */ | ||
2176 | u32 ec_tt; /* error code transaction type (2b) */ | ||
2177 | u32 ec_ll; /* error code cache level (2b) */ | ||
2178 | |||
2179 | err_code = EXTRACT_ERROR_CODE(info->nbsl); | ||
2180 | ec_ll = EXTRACT_LL_CODE(err_code); | ||
2181 | ec_tt = EXTRACT_TT_CODE(err_code); | ||
2182 | ec_rrrr = EXTRACT_RRRR_CODE(err_code); | ||
2183 | |||
2184 | amd64_mc_printk(mci, KERN_ERR, | ||
2185 | "cache hierarchy error: memory transaction type(%s), " | ||
2186 | "transaction type(%s), cache level(%s)\n", | ||
2187 | rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]); | ||
2188 | } | ||
2189 | |||
2190 | |||
2191 | /* | 2212 | /* |
2192 | * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR | 2213 | * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR |
2193 | * ADDRESS and process. | 2214 | * ADDRESS and process. |
2194 | */ | 2215 | */ |
2195 | static void amd64_handle_ce(struct mem_ctl_info *mci, | 2216 | static void amd64_handle_ce(struct mem_ctl_info *mci, |
2196 | struct amd64_error_info_regs *info) | 2217 | struct err_regs *info) |
2197 | { | 2218 | { |
2198 | struct amd64_pvt *pvt = mci->pvt_info; | 2219 | struct amd64_pvt *pvt = mci->pvt_info; |
2199 | u64 SystemAddress; | 2220 | u64 SystemAddress; |
@@ -2216,7 +2237,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, | |||
2216 | 2237 | ||
2217 | /* Handle any Un-correctable Errors (UEs) */ | 2238 | /* Handle any Un-correctable Errors (UEs) */ |
2218 | static void amd64_handle_ue(struct mem_ctl_info *mci, | 2239 | static void amd64_handle_ue(struct mem_ctl_info *mci, |
2219 | struct amd64_error_info_regs *info) | 2240 | struct err_regs *info) |
2220 | { | 2241 | { |
2221 | int csrow; | 2242 | int csrow; |
2222 | u64 SystemAddress; | 2243 | u64 SystemAddress; |
@@ -2261,59 +2282,24 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, | |||
2261 | } | 2282 | } |
2262 | } | 2283 | } |
2263 | 2284 | ||
2264 | static void amd64_decode_bus_error(struct mem_ctl_info *mci, | 2285 | static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, |
2265 | struct amd64_error_info_regs *info) | 2286 | struct err_regs *info) |
2266 | { | 2287 | { |
2267 | u32 err_code, ext_ec; | 2288 | u32 ec = ERROR_CODE(info->nbsl); |
2268 | u32 ec_pp; /* error code participating processor (2p) */ | 2289 | u32 xec = EXT_ERROR_CODE(info->nbsl); |
2269 | u32 ec_to; /* error code timed out (1b) */ | 2290 | int ecc_type = info->nbsh & (0x3 << 13); |
2270 | u32 ec_rrrr; /* error code memory transaction (4b) */ | ||
2271 | u32 ec_ii; /* error code memory or I/O (2b) */ | ||
2272 | u32 ec_ll; /* error code cache level (2b) */ | ||
2273 | 2291 | ||
2274 | ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl); | 2292 | /* Bail early out if this was an 'observed' error */ |
2275 | err_code = EXTRACT_ERROR_CODE(info->nbsl); | 2293 | if (PP(ec) == K8_NBSL_PP_OBS) |
2276 | 2294 | return; | |
2277 | ec_ll = EXTRACT_LL_CODE(err_code); | ||
2278 | ec_ii = EXTRACT_II_CODE(err_code); | ||
2279 | ec_rrrr = EXTRACT_RRRR_CODE(err_code); | ||
2280 | ec_to = EXTRACT_TO_CODE(err_code); | ||
2281 | ec_pp = EXTRACT_PP_CODE(err_code); | ||
2282 | |||
2283 | amd64_mc_printk(mci, KERN_ERR, | ||
2284 | "BUS ERROR:\n" | ||
2285 | " time-out(%s) mem or i/o(%s)\n" | ||
2286 | " participating processor(%s)\n" | ||
2287 | " memory transaction type(%s)\n" | ||
2288 | " cache level(%s) Error Found by: %s\n", | ||
2289 | to_msgs[ec_to], | ||
2290 | ii_msgs[ec_ii], | ||
2291 | pp_msgs[ec_pp], | ||
2292 | rrrr_msgs[ec_rrrr], | ||
2293 | ll_msgs[ec_ll], | ||
2294 | (info->nbsh & K8_NBSH_ERR_SCRUBER) ? | ||
2295 | "Scrubber" : "Normal Operation"); | ||
2296 | |||
2297 | /* If this was an 'observed' error, early out */ | ||
2298 | if (ec_pp == K8_NBSL_PP_OBS) | ||
2299 | return; /* We aren't the node involved */ | ||
2300 | |||
2301 | /* Parse out the extended error code for ECC events */ | ||
2302 | switch (ext_ec) { | ||
2303 | /* F10 changed to one Extended ECC error code */ | ||
2304 | case F10_NBSL_EXT_ERR_RES: /* Reserved field */ | ||
2305 | case F10_NBSL_EXT_ERR_ECC: /* F10 ECC ext err code */ | ||
2306 | break; | ||
2307 | 2295 | ||
2308 | default: | 2296 | /* Do only ECC errors */ |
2309 | amd64_mc_printk(mci, KERN_ERR, "NOT ECC: no special error " | 2297 | if (xec && xec != F10_NBSL_EXT_ERR_ECC) |
2310 | "handling for this error\n"); | ||
2311 | return; | 2298 | return; |
2312 | } | ||
2313 | 2299 | ||
2314 | if (info->nbsh & K8_NBSH_CECC) | 2300 | if (ecc_type == 2) |
2315 | amd64_handle_ce(mci, info); | 2301 | amd64_handle_ce(mci, info); |
2316 | else if (info->nbsh & K8_NBSH_UECC) | 2302 | else if (ecc_type == 1) |
2317 | amd64_handle_ue(mci, info); | 2303 | amd64_handle_ue(mci, info); |
2318 | 2304 | ||
2319 | /* | 2305 | /* |
@@ -2324,139 +2310,26 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
2324 | * catastrophic. | 2310 | * catastrophic. |
2325 | */ | 2311 | */ |
2326 | if (info->nbsh & K8_NBSH_OVERFLOW) | 2312 | if (info->nbsh & K8_NBSH_OVERFLOW) |
2327 | edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR | 2313 | edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow"); |
2328 | "Error Overflow set"); | ||
2329 | } | 2314 | } |
2330 | 2315 | ||
2331 | int amd64_process_error_info(struct mem_ctl_info *mci, | 2316 | void amd64_decode_bus_error(int node_id, struct err_regs *regs) |
2332 | struct amd64_error_info_regs *info, | ||
2333 | int handle_errors) | ||
2334 | { | 2317 | { |
2335 | struct amd64_pvt *pvt; | 2318 | struct mem_ctl_info *mci = mci_lookup[node_id]; |
2336 | struct amd64_error_info_regs *regs; | ||
2337 | u32 err_code, ext_ec; | ||
2338 | int gart_tlb_error = 0; | ||
2339 | |||
2340 | pvt = mci->pvt_info; | ||
2341 | |||
2342 | /* If caller doesn't want us to process the error, return */ | ||
2343 | if (!handle_errors) | ||
2344 | return 1; | ||
2345 | |||
2346 | regs = info; | ||
2347 | |||
2348 | debugf1("NorthBridge ERROR: mci(0x%p)\n", mci); | ||
2349 | debugf1(" MC node(%d) Error-Address(0x%.8x-%.8x)\n", | ||
2350 | pvt->mc_node_id, regs->nbeah, regs->nbeal); | ||
2351 | debugf1(" nbsh(0x%.8x) nbsl(0x%.8x)\n", | ||
2352 | regs->nbsh, regs->nbsl); | ||
2353 | debugf1(" Valid Error=%s Overflow=%s\n", | ||
2354 | (regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False", | ||
2355 | (regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False"); | ||
2356 | debugf1(" Err Uncorrected=%s MCA Error Reporting=%s\n", | ||
2357 | (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ? | ||
2358 | "True" : "False", | ||
2359 | (regs->nbsh & K8_NBSH_ERR_ENABLE) ? | ||
2360 | "True" : "False"); | ||
2361 | debugf1(" MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n", | ||
2362 | (regs->nbsh & K8_NBSH_MISC_ERR_VALID) ? | ||
2363 | "True" : "False", | ||
2364 | (regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ? | ||
2365 | "True" : "False", | ||
2366 | (regs->nbsh & K8_NBSH_PCC) ? | ||
2367 | "True" : "False"); | ||
2368 | debugf1(" CECC=%s UECC=%s Found by Scruber=%s\n", | ||
2369 | (regs->nbsh & K8_NBSH_CECC) ? | ||
2370 | "True" : "False", | ||
2371 | (regs->nbsh & K8_NBSH_UECC) ? | ||
2372 | "True" : "False", | ||
2373 | (regs->nbsh & K8_NBSH_ERR_SCRUBER) ? | ||
2374 | "True" : "False"); | ||
2375 | debugf1(" CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n", | ||
2376 | (regs->nbsh & K8_NBSH_CORE0) ? "True" : "False", | ||
2377 | (regs->nbsh & K8_NBSH_CORE1) ? "True" : "False", | ||
2378 | (regs->nbsh & K8_NBSH_CORE2) ? "True" : "False", | ||
2379 | (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False"); | ||
2380 | |||
2381 | |||
2382 | err_code = EXTRACT_ERROR_CODE(regs->nbsl); | ||
2383 | |||
2384 | /* Determine which error type: | ||
2385 | * 1) GART errors - non-fatal, developmental events | ||
2386 | * 2) MEMORY errors | ||
2387 | * 3) BUS errors | ||
2388 | * 4) Unknown error | ||
2389 | */ | ||
2390 | if (TEST_TLB_ERROR(err_code)) { | ||
2391 | /* | ||
2392 | * GART errors are intended to help graphics driver developers | ||
2393 | * to detect bad GART PTEs. It is recommended by AMD to disable | ||
2394 | * GART table walk error reporting by default[1] (currently | ||
2395 | * being disabled in mce_cpu_quirks()) and according to the | ||
2396 | * comment in mce_cpu_quirks(), such GART errors can be | ||
2397 | * incorrectly triggered. We may see these errors anyway and | ||
2398 | * unless requested by the user, they won't be reported. | ||
2399 | * | ||
2400 | * [1] section 13.10.1 on BIOS and Kernel Developers Guide for | ||
2401 | * AMD NPT family 0Fh processors | ||
2402 | */ | ||
2403 | if (report_gart_errors == 0) | ||
2404 | return 1; | ||
2405 | |||
2406 | /* | ||
2407 | * Only if GART error reporting is requested should we generate | ||
2408 | * any logs. | ||
2409 | */ | ||
2410 | gart_tlb_error = 1; | ||
2411 | |||
2412 | debugf1("GART TLB error\n"); | ||
2413 | amd64_decode_gart_tlb_error(mci, info); | ||
2414 | } else if (TEST_MEM_ERROR(err_code)) { | ||
2415 | debugf1("Memory/Cache error\n"); | ||
2416 | amd64_decode_mem_cache_error(mci, info); | ||
2417 | } else if (TEST_BUS_ERROR(err_code)) { | ||
2418 | debugf1("Bus (Link/DRAM) error\n"); | ||
2419 | amd64_decode_bus_error(mci, info); | ||
2420 | } else { | ||
2421 | /* shouldn't reach here! */ | ||
2422 | amd64_mc_printk(mci, KERN_WARNING, | ||
2423 | "%s(): unknown MCE error 0x%x\n", __func__, | ||
2424 | err_code); | ||
2425 | } | ||
2426 | |||
2427 | ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl); | ||
2428 | amd64_mc_printk(mci, KERN_ERR, | ||
2429 | "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]); | ||
2430 | 2319 | ||
2431 | if (((ext_ec >= F10_NBSL_EXT_ERR_CRC && | 2320 | __amd64_decode_bus_error(mci, regs); |
2432 | ext_ec <= F10_NBSL_EXT_ERR_TGT) || | ||
2433 | (ext_ec == F10_NBSL_EXT_ERR_RMW)) && | ||
2434 | EXTRACT_LDT_LINK(info->nbsh)) { | ||
2435 | |||
2436 | amd64_mc_printk(mci, KERN_ERR, | ||
2437 | "Error on hypertransport link: %s\n", | ||
2438 | htlink_msgs[ | ||
2439 | EXTRACT_LDT_LINK(info->nbsh)]); | ||
2440 | } | ||
2441 | 2321 | ||
2442 | /* | 2322 | /* |
2443 | * Check the UE bit of the NB status high register, if set generate some | 2323 | * Check the UE bit of the NB status high register, if set generate some |
2444 | * logs. If NOT a GART error, then process the event as a NO-INFO event. | 2324 | * logs. If NOT a GART error, then process the event as a NO-INFO event. |
2445 | * If it was a GART error, skip that process. | 2325 | * If it was a GART error, skip that process. |
2326 | * | ||
2327 | * FIXME: this should go somewhere else, if at all. | ||
2446 | */ | 2328 | */ |
2447 | if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) { | 2329 | if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) |
2448 | amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n"); | 2330 | edac_mc_handle_ue_no_info(mci, "UE bit is set"); |
2449 | if (!gart_tlb_error) | ||
2450 | edac_mc_handle_ue_no_info(mci, "UE bit is set\n"); | ||
2451 | } | ||
2452 | |||
2453 | if (regs->nbsh & K8_NBSH_PCC) | ||
2454 | amd64_mc_printk(mci, KERN_CRIT, | ||
2455 | "PCC (processor context corrupt) set\n"); | ||
2456 | 2331 | ||
2457 | return 1; | ||
2458 | } | 2332 | } |
2459 | EXPORT_SYMBOL_GPL(amd64_process_error_info); | ||
2460 | 2333 | ||
2461 | /* | 2334 | /* |
2462 | * The main polling 'check' function, called FROM the edac core to perform the | 2335 | * The main polling 'check' function, called FROM the edac core to perform the |
@@ -2464,10 +2337,12 @@ EXPORT_SYMBOL_GPL(amd64_process_error_info); | |||
2464 | */ | 2337 | */ |
2465 | static void amd64_check(struct mem_ctl_info *mci) | 2338 | static void amd64_check(struct mem_ctl_info *mci) |
2466 | { | 2339 | { |
2467 | struct amd64_error_info_regs info; | 2340 | struct err_regs regs; |
2468 | 2341 | ||
2469 | if (amd64_get_error_info(mci, &info)) | 2342 | if (amd64_get_error_info(mci, ®s)) { |
2470 | amd64_process_error_info(mci, &info, 1); | 2343 | struct amd64_pvt *pvt = mci->pvt_info; |
2344 | amd_decode_nb_mce(pvt->mc_node_id, ®s, 1); | ||
2345 | } | ||
2471 | } | 2346 | } |
2472 | 2347 | ||
2473 | /* | 2348 | /* |
@@ -3163,6 +3038,13 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) | |||
3163 | 3038 | ||
3164 | mci_lookup[node_id] = mci; | 3039 | mci_lookup[node_id] = mci; |
3165 | pvt_lookup[node_id] = NULL; | 3040 | pvt_lookup[node_id] = NULL; |
3041 | |||
3042 | /* register stuff with EDAC MCE */ | ||
3043 | if (report_gart_errors) | ||
3044 | amd_report_gart_errors(true); | ||
3045 | |||
3046 | amd_register_ecc_decoder(amd64_decode_bus_error); | ||
3047 | |||
3166 | return 0; | 3048 | return 0; |
3167 | 3049 | ||
3168 | err_add_mc: | 3050 | err_add_mc: |
@@ -3229,6 +3111,10 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) | |||
3229 | 3111 | ||
3230 | mci_lookup[pvt->mc_node_id] = NULL; | 3112 | mci_lookup[pvt->mc_node_id] = NULL; |
3231 | 3113 | ||
3114 | /* unregister from EDAC MCE */ | ||
3115 | amd_report_gart_errors(false); | ||
3116 | amd_unregister_ecc_decoder(amd64_decode_bus_error); | ||
3117 | |||
3232 | /* Free the EDAC CORE resources */ | 3118 | /* Free the EDAC CORE resources */ |
3233 | edac_mc_free(mci); | 3119 | edac_mc_free(mci); |
3234 | } | 3120 | } |
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index ba73015af8e4..8ea07e2715dc 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h | |||
@@ -72,6 +72,7 @@ | |||
72 | #include <linux/edac.h> | 72 | #include <linux/edac.h> |
73 | #include <asm/msr.h> | 73 | #include <asm/msr.h> |
74 | #include "edac_core.h" | 74 | #include "edac_core.h" |
75 | #include "edac_mce_amd.h" | ||
75 | 76 | ||
76 | #define amd64_printk(level, fmt, arg...) \ | 77 | #define amd64_printk(level, fmt, arg...) \ |
77 | edac_printk(level, "amd64", fmt, ##arg) | 78 | edac_printk(level, "amd64", fmt, ##arg) |
@@ -303,21 +304,9 @@ enum { | |||
303 | #define K8_NBSL 0x48 | 304 | #define K8_NBSL 0x48 |
304 | 305 | ||
305 | 306 | ||
306 | #define EXTRACT_HIGH_SYNDROME(x) (((x) >> 24) & 0xff) | ||
307 | #define EXTRACT_EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) | ||
308 | |||
309 | /* Family F10h: Normalized Extended Error Codes */ | 307 | /* Family F10h: Normalized Extended Error Codes */ |
310 | #define F10_NBSL_EXT_ERR_RES 0x0 | 308 | #define F10_NBSL_EXT_ERR_RES 0x0 |
311 | #define F10_NBSL_EXT_ERR_CRC 0x1 | ||
312 | #define F10_NBSL_EXT_ERR_SYNC 0x2 | ||
313 | #define F10_NBSL_EXT_ERR_MST 0x3 | ||
314 | #define F10_NBSL_EXT_ERR_TGT 0x4 | ||
315 | #define F10_NBSL_EXT_ERR_GART 0x5 | ||
316 | #define F10_NBSL_EXT_ERR_RMW 0x6 | ||
317 | #define F10_NBSL_EXT_ERR_WDT 0x7 | ||
318 | #define F10_NBSL_EXT_ERR_ECC 0x8 | 309 | #define F10_NBSL_EXT_ERR_ECC 0x8 |
319 | #define F10_NBSL_EXT_ERR_DEV 0x9 | ||
320 | #define F10_NBSL_EXT_ERR_LINK_DATA 0xA | ||
321 | 310 | ||
322 | /* Next two are overloaded values */ | 311 | /* Next two are overloaded values */ |
323 | #define F10_NBSL_EXT_ERR_LINK_PROTO 0xB | 312 | #define F10_NBSL_EXT_ERR_LINK_PROTO 0xB |
@@ -348,17 +337,6 @@ enum { | |||
348 | #define K8_NBSL_EXT_ERR_CHIPKILL_ECC 0x8 | 337 | #define K8_NBSL_EXT_ERR_CHIPKILL_ECC 0x8 |
349 | #define K8_NBSL_EXT_ERR_DRAM_PARITY 0xD | 338 | #define K8_NBSL_EXT_ERR_DRAM_PARITY 0xD |
350 | 339 | ||
351 | #define EXTRACT_ERROR_CODE(x) ((x) & 0xffff) | ||
352 | #define TEST_TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) | ||
353 | #define TEST_MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) | ||
354 | #define TEST_BUS_ERROR(x) (((x) & 0xF800) == 0x0800) | ||
355 | #define EXTRACT_TT_CODE(x) (((x) >> 2) & 0x3) | ||
356 | #define EXTRACT_II_CODE(x) (((x) >> 2) & 0x3) | ||
357 | #define EXTRACT_LL_CODE(x) (((x) >> 0) & 0x3) | ||
358 | #define EXTRACT_RRRR_CODE(x) (((x) >> 4) & 0xf) | ||
359 | #define EXTRACT_TO_CODE(x) (((x) >> 8) & 0x1) | ||
360 | #define EXTRACT_PP_CODE(x) (((x) >> 9) & 0x3) | ||
361 | |||
362 | /* | 340 | /* |
363 | * The following are for BUS type errors AFTER values have been normalized by | 341 | * The following are for BUS type errors AFTER values have been normalized by |
364 | * shifting right | 342 | * shifting right |
@@ -368,28 +346,7 @@ enum { | |||
368 | #define K8_NBSL_PP_OBS 0x2 | 346 | #define K8_NBSL_PP_OBS 0x2 |
369 | #define K8_NBSL_PP_GENERIC 0x3 | 347 | #define K8_NBSL_PP_GENERIC 0x3 |
370 | 348 | ||
371 | |||
372 | #define K8_NBSH 0x4C | ||
373 | |||
374 | #define K8_NBSH_VALID_BIT BIT(31) | ||
375 | #define K8_NBSH_OVERFLOW BIT(30) | ||
376 | #define K8_NBSH_UNCORRECTED_ERR BIT(29) | ||
377 | #define K8_NBSH_ERR_ENABLE BIT(28) | ||
378 | #define K8_NBSH_MISC_ERR_VALID BIT(27) | ||
379 | #define K8_NBSH_VALID_ERROR_ADDR BIT(26) | ||
380 | #define K8_NBSH_PCC BIT(25) | ||
381 | #define K8_NBSH_CECC BIT(14) | ||
382 | #define K8_NBSH_UECC BIT(13) | ||
383 | #define K8_NBSH_ERR_SCRUBER BIT(8) | ||
384 | #define K8_NBSH_CORE3 BIT(3) | ||
385 | #define K8_NBSH_CORE2 BIT(2) | ||
386 | #define K8_NBSH_CORE1 BIT(1) | ||
387 | #define K8_NBSH_CORE0 BIT(0) | ||
388 | |||
389 | #define EXTRACT_LDT_LINK(x) (((x) >> 4) & 0x7) | ||
390 | #define EXTRACT_ERR_CPU_MAP(x) ((x) & 0xF) | 349 | #define EXTRACT_ERR_CPU_MAP(x) ((x) & 0xF) |
391 | #define EXTRACT_LOW_SYNDROME(x) (((x) >> 15) & 0xff) | ||
392 | |||
393 | 350 | ||
394 | #define K8_NBEAL 0x50 | 351 | #define K8_NBEAL 0x50 |
395 | #define K8_NBEAH 0x54 | 352 | #define K8_NBEAH 0x54 |
@@ -455,23 +412,6 @@ enum amd64_chipset_families { | |||
455 | F11_CPUS, | 412 | F11_CPUS, |
456 | }; | 413 | }; |
457 | 414 | ||
458 | /* | ||
459 | * Structure to hold: | ||
460 | * | ||
461 | * 1) dynamically read status and error address HW registers | ||
462 | * 2) sysfs entered values | ||
463 | * 3) MCE values | ||
464 | * | ||
465 | * Depends on entry into the modules | ||
466 | */ | ||
467 | struct amd64_error_info_regs { | ||
468 | u32 nbcfg; | ||
469 | u32 nbsh; | ||
470 | u32 nbsl; | ||
471 | u32 nbeah; | ||
472 | u32 nbeal; | ||
473 | }; | ||
474 | |||
475 | /* Error injection control structure */ | 415 | /* Error injection control structure */ |
476 | struct error_injection { | 416 | struct error_injection { |
477 | u32 section; | 417 | u32 section; |
@@ -542,7 +482,7 @@ struct amd64_pvt { | |||
542 | u32 online_spare; /* On-Line spare Reg */ | 482 | u32 online_spare; /* On-Line spare Reg */ |
543 | 483 | ||
544 | /* temp storage for when input is received from sysfs */ | 484 | /* temp storage for when input is received from sysfs */ |
545 | struct amd64_error_info_regs ctl_error_info; | 485 | struct err_regs ctl_error_info; |
546 | 486 | ||
547 | /* place to store error injection parameters prior to issue */ | 487 | /* place to store error injection parameters prior to issue */ |
548 | struct error_injection injection; | 488 | struct error_injection injection; |
@@ -601,11 +541,11 @@ struct low_ops { | |||
601 | int (*early_channel_count)(struct amd64_pvt *pvt); | 541 | int (*early_channel_count)(struct amd64_pvt *pvt); |
602 | 542 | ||
603 | u64 (*get_error_address)(struct mem_ctl_info *mci, | 543 | u64 (*get_error_address)(struct mem_ctl_info *mci, |
604 | struct amd64_error_info_regs *info); | 544 | struct err_regs *info); |
605 | void (*read_dram_base_limit)(struct amd64_pvt *pvt, int dram); | 545 | void (*read_dram_base_limit)(struct amd64_pvt *pvt, int dram); |
606 | void (*read_dram_ctl_register)(struct amd64_pvt *pvt); | 546 | void (*read_dram_ctl_register)(struct amd64_pvt *pvt); |
607 | void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, | 547 | void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, |
608 | struct amd64_error_info_regs *info, | 548 | struct err_regs *info, |
609 | u64 SystemAddr); | 549 | u64 SystemAddr); |
610 | int (*dbam_map_to_pages)(struct amd64_pvt *pvt, int dram_map); | 550 | int (*dbam_map_to_pages)(struct amd64_pvt *pvt, int dram_map); |
611 | }; | 551 | }; |
@@ -637,8 +577,5 @@ static inline struct low_ops *family_ops(int index) | |||
637 | #define F10_MIN_SCRUB_RATE_BITS 0x5 | 577 | #define F10_MIN_SCRUB_RATE_BITS 0x5 |
638 | #define F11_MIN_SCRUB_RATE_BITS 0x6 | 578 | #define F11_MIN_SCRUB_RATE_BITS 0x6 |
639 | 579 | ||
640 | int amd64_process_error_info(struct mem_ctl_info *mci, | ||
641 | struct amd64_error_info_regs *info, | ||
642 | int handle_errors); | ||
643 | int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, | 580 | int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, |
644 | u64 *hole_offset, u64 *hole_size); | 581 | u64 *hole_offset, u64 *hole_size); |
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c index 0a41b248a4ad..59cf2cf6e11e 100644 --- a/drivers/edac/amd64_edac_dbg.c +++ b/drivers/edac/amd64_edac_dbg.c | |||
@@ -24,7 +24,7 @@ static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data, | |||
24 | 24 | ||
25 | /* Process the Mapping request */ | 25 | /* Process the Mapping request */ |
26 | /* TODO: Add race prevention */ | 26 | /* TODO: Add race prevention */ |
27 | amd64_process_error_info(mci, &pvt->ctl_error_info, 1); | 27 | amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1); |
28 | 28 | ||
29 | return count; | 29 | return count; |
30 | } | 30 | } |
diff --git a/drivers/edac/amd64_edac_err_types.c b/drivers/edac/amd64_edac_err_types.c deleted file mode 100644 index f212ff12a9d8..000000000000 --- a/drivers/edac/amd64_edac_err_types.c +++ /dev/null | |||
@@ -1,161 +0,0 @@ | |||
1 | #include "amd64_edac.h" | ||
2 | |||
3 | /* | ||
4 | * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only | ||
5 | * for DDR2 DRAM mapping. | ||
6 | */ | ||
7 | u32 revf_quad_ddr2_shift[] = { | ||
8 | 0, /* 0000b NULL DIMM (128mb) */ | ||
9 | 28, /* 0001b 256mb */ | ||
10 | 29, /* 0010b 512mb */ | ||
11 | 29, /* 0011b 512mb */ | ||
12 | 29, /* 0100b 512mb */ | ||
13 | 30, /* 0101b 1gb */ | ||
14 | 30, /* 0110b 1gb */ | ||
15 | 31, /* 0111b 2gb */ | ||
16 | 31, /* 1000b 2gb */ | ||
17 | 32, /* 1001b 4gb */ | ||
18 | 32, /* 1010b 4gb */ | ||
19 | 33, /* 1011b 8gb */ | ||
20 | 0, /* 1100b future */ | ||
21 | 0, /* 1101b future */ | ||
22 | 0, /* 1110b future */ | ||
23 | 0 /* 1111b future */ | ||
24 | }; | ||
25 | |||
26 | /* | ||
27 | * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing | ||
28 | * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- | ||
29 | * or higher value'. | ||
30 | * | ||
31 | *FIXME: Produce a better mapping/linearisation. | ||
32 | */ | ||
33 | |||
34 | struct scrubrate scrubrates[] = { | ||
35 | { 0x01, 1600000000UL}, | ||
36 | { 0x02, 800000000UL}, | ||
37 | { 0x03, 400000000UL}, | ||
38 | { 0x04, 200000000UL}, | ||
39 | { 0x05, 100000000UL}, | ||
40 | { 0x06, 50000000UL}, | ||
41 | { 0x07, 25000000UL}, | ||
42 | { 0x08, 12284069UL}, | ||
43 | { 0x09, 6274509UL}, | ||
44 | { 0x0A, 3121951UL}, | ||
45 | { 0x0B, 1560975UL}, | ||
46 | { 0x0C, 781440UL}, | ||
47 | { 0x0D, 390720UL}, | ||
48 | { 0x0E, 195300UL}, | ||
49 | { 0x0F, 97650UL}, | ||
50 | { 0x10, 48854UL}, | ||
51 | { 0x11, 24427UL}, | ||
52 | { 0x12, 12213UL}, | ||
53 | { 0x13, 6101UL}, | ||
54 | { 0x14, 3051UL}, | ||
55 | { 0x15, 1523UL}, | ||
56 | { 0x16, 761UL}, | ||
57 | { 0x00, 0UL}, /* scrubbing off */ | ||
58 | }; | ||
59 | |||
60 | /* | ||
61 | * string representation for the different MCA reported error types, see F3x48 | ||
62 | * or MSR0000_0411. | ||
63 | */ | ||
64 | const char *tt_msgs[] = { /* transaction type */ | ||
65 | "instruction", | ||
66 | "data", | ||
67 | "generic", | ||
68 | "reserved" | ||
69 | }; | ||
70 | |||
71 | const char *ll_msgs[] = { /* cache level */ | ||
72 | "L0", | ||
73 | "L1", | ||
74 | "L2", | ||
75 | "L3/generic" | ||
76 | }; | ||
77 | |||
78 | const char *rrrr_msgs[] = { | ||
79 | "generic", | ||
80 | "generic read", | ||
81 | "generic write", | ||
82 | "data read", | ||
83 | "data write", | ||
84 | "inst fetch", | ||
85 | "prefetch", | ||
86 | "evict", | ||
87 | "snoop", | ||
88 | "reserved RRRR= 9", | ||
89 | "reserved RRRR= 10", | ||
90 | "reserved RRRR= 11", | ||
91 | "reserved RRRR= 12", | ||
92 | "reserved RRRR= 13", | ||
93 | "reserved RRRR= 14", | ||
94 | "reserved RRRR= 15" | ||
95 | }; | ||
96 | |||
97 | const char *pp_msgs[] = { /* participating processor */ | ||
98 | "local node originated (SRC)", | ||
99 | "local node responded to request (RES)", | ||
100 | "local node observed as 3rd party (OBS)", | ||
101 | "generic" | ||
102 | }; | ||
103 | |||
104 | const char *to_msgs[] = { | ||
105 | "no timeout", | ||
106 | "timed out" | ||
107 | }; | ||
108 | |||
109 | const char *ii_msgs[] = { /* memory or i/o */ | ||
110 | "mem access", | ||
111 | "reserved", | ||
112 | "i/o access", | ||
113 | "generic" | ||
114 | }; | ||
115 | |||
116 | /* Map the 5 bits of Extended Error code to the string table. */ | ||
117 | const char *ext_msgs[] = { /* extended error */ | ||
118 | "K8 ECC error/F10 reserved", /* 0_0000b */ | ||
119 | "CRC error", /* 0_0001b */ | ||
120 | "sync error", /* 0_0010b */ | ||
121 | "mst abort", /* 0_0011b */ | ||
122 | "tgt abort", /* 0_0100b */ | ||
123 | "GART error", /* 0_0101b */ | ||
124 | "RMW error", /* 0_0110b */ | ||
125 | "Wdog timer error", /* 0_0111b */ | ||
126 | "F10-ECC/K8-Chipkill error", /* 0_1000b */ | ||
127 | "DEV Error", /* 0_1001b */ | ||
128 | "Link Data error", /* 0_1010b */ | ||
129 | "Link or L3 Protocol error", /* 0_1011b */ | ||
130 | "NB Array error", /* 0_1100b */ | ||
131 | "DRAM Parity error", /* 0_1101b */ | ||
132 | "Link Retry/GART Table Walk/DEV Table Walk error", /* 0_1110b */ | ||
133 | "Res 0x0ff error", /* 0_1111b */ | ||
134 | "Res 0x100 error", /* 1_0000b */ | ||
135 | "Res 0x101 error", /* 1_0001b */ | ||
136 | "Res 0x102 error", /* 1_0010b */ | ||
137 | "Res 0x103 error", /* 1_0011b */ | ||
138 | "Res 0x104 error", /* 1_0100b */ | ||
139 | "Res 0x105 error", /* 1_0101b */ | ||
140 | "Res 0x106 error", /* 1_0110b */ | ||
141 | "Res 0x107 error", /* 1_0111b */ | ||
142 | "Res 0x108 error", /* 1_1000b */ | ||
143 | "Res 0x109 error", /* 1_1001b */ | ||
144 | "Res 0x10A error", /* 1_1010b */ | ||
145 | "Res 0x10B error", /* 1_1011b */ | ||
146 | "L3 Cache Data error", /* 1_1100b */ | ||
147 | "L3 CacheTag error", /* 1_1101b */ | ||
148 | "L3 Cache LRU error", /* 1_1110b */ | ||
149 | "Res 0x1FF error" /* 1_1111b */ | ||
150 | }; | ||
151 | |||
152 | const char *htlink_msgs[] = { | ||
153 | "none", | ||
154 | "1", | ||
155 | "2", | ||
156 | "1 2", | ||
157 | "3", | ||
158 | "1 3", | ||
159 | "2 3", | ||
160 | "1 2 3" | ||
161 | }; | ||
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c new file mode 100644 index 000000000000..c8ca7136dacc --- /dev/null +++ b/drivers/edac/edac_mce_amd.c | |||
@@ -0,0 +1,422 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include "edac_mce_amd.h" | ||
3 | |||
4 | static bool report_gart_errors; | ||
5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); | ||
6 | |||
7 | void amd_report_gart_errors(bool v) | ||
8 | { | ||
9 | report_gart_errors = v; | ||
10 | } | ||
11 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); | ||
12 | |||
13 | void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)) | ||
14 | { | ||
15 | nb_bus_decoder = f; | ||
16 | } | ||
17 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); | ||
18 | |||
19 | void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)) | ||
20 | { | ||
21 | if (nb_bus_decoder) { | ||
22 | WARN_ON(nb_bus_decoder != f); | ||
23 | |||
24 | nb_bus_decoder = NULL; | ||
25 | } | ||
26 | } | ||
27 | EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); | ||
28 | |||
29 | /* | ||
30 | * string representation for the different MCA reported error types, see F3x48 | ||
31 | * or MSR0000_0411. | ||
32 | */ | ||
33 | const char *tt_msgs[] = { /* transaction type */ | ||
34 | "instruction", | ||
35 | "data", | ||
36 | "generic", | ||
37 | "reserved" | ||
38 | }; | ||
39 | EXPORT_SYMBOL_GPL(tt_msgs); | ||
40 | |||
41 | const char *ll_msgs[] = { /* cache level */ | ||
42 | "L0", | ||
43 | "L1", | ||
44 | "L2", | ||
45 | "L3/generic" | ||
46 | }; | ||
47 | EXPORT_SYMBOL_GPL(ll_msgs); | ||
48 | |||
49 | const char *rrrr_msgs[] = { | ||
50 | "generic", | ||
51 | "generic read", | ||
52 | "generic write", | ||
53 | "data read", | ||
54 | "data write", | ||
55 | "inst fetch", | ||
56 | "prefetch", | ||
57 | "evict", | ||
58 | "snoop", | ||
59 | "reserved RRRR= 9", | ||
60 | "reserved RRRR= 10", | ||
61 | "reserved RRRR= 11", | ||
62 | "reserved RRRR= 12", | ||
63 | "reserved RRRR= 13", | ||
64 | "reserved RRRR= 14", | ||
65 | "reserved RRRR= 15" | ||
66 | }; | ||
67 | EXPORT_SYMBOL_GPL(rrrr_msgs); | ||
68 | |||
69 | const char *pp_msgs[] = { /* participating processor */ | ||
70 | "local node originated (SRC)", | ||
71 | "local node responded to request (RES)", | ||
72 | "local node observed as 3rd party (OBS)", | ||
73 | "generic" | ||
74 | }; | ||
75 | EXPORT_SYMBOL_GPL(pp_msgs); | ||
76 | |||
77 | const char *to_msgs[] = { | ||
78 | "no timeout", | ||
79 | "timed out" | ||
80 | }; | ||
81 | EXPORT_SYMBOL_GPL(to_msgs); | ||
82 | |||
83 | const char *ii_msgs[] = { /* memory or i/o */ | ||
84 | "mem access", | ||
85 | "reserved", | ||
86 | "i/o access", | ||
87 | "generic" | ||
88 | }; | ||
89 | EXPORT_SYMBOL_GPL(ii_msgs); | ||
90 | |||
91 | /* | ||
92 | * Map the 4 or 5 (family-specific) bits of Extended Error code to the | ||
93 | * string table. | ||
94 | */ | ||
95 | const char *ext_msgs[] = { | ||
96 | "K8 ECC error", /* 0_0000b */ | ||
97 | "CRC error on link", /* 0_0001b */ | ||
98 | "Sync error packets on link", /* 0_0010b */ | ||
99 | "Master Abort during link operation", /* 0_0011b */ | ||
100 | "Target Abort during link operation", /* 0_0100b */ | ||
101 | "Invalid GART PTE entry during table walk", /* 0_0101b */ | ||
102 | "Unsupported atomic RMW command received", /* 0_0110b */ | ||
103 | "WDT error: NB transaction timeout", /* 0_0111b */ | ||
104 | "ECC/ChipKill ECC error", /* 0_1000b */ | ||
105 | "SVM DEV Error", /* 0_1001b */ | ||
106 | "Link Data error", /* 0_1010b */ | ||
107 | "Link/L3/Probe Filter Protocol error", /* 0_1011b */ | ||
108 | "NB Internal Arrays Parity error", /* 0_1100b */ | ||
109 | "DRAM Address/Control Parity error", /* 0_1101b */ | ||
110 | "Link Transmission error", /* 0_1110b */ | ||
111 | "GART/DEV Table Walk Data error" /* 0_1111b */ | ||
112 | "Res 0x100 error", /* 1_0000b */ | ||
113 | "Res 0x101 error", /* 1_0001b */ | ||
114 | "Res 0x102 error", /* 1_0010b */ | ||
115 | "Res 0x103 error", /* 1_0011b */ | ||
116 | "Res 0x104 error", /* 1_0100b */ | ||
117 | "Res 0x105 error", /* 1_0101b */ | ||
118 | "Res 0x106 error", /* 1_0110b */ | ||
119 | "Res 0x107 error", /* 1_0111b */ | ||
120 | "Res 0x108 error", /* 1_1000b */ | ||
121 | "Res 0x109 error", /* 1_1001b */ | ||
122 | "Res 0x10A error", /* 1_1010b */ | ||
123 | "Res 0x10B error", /* 1_1011b */ | ||
124 | "ECC error in L3 Cache Data", /* 1_1100b */ | ||
125 | "L3 Cache Tag error", /* 1_1101b */ | ||
126 | "L3 Cache LRU Parity error", /* 1_1110b */ | ||
127 | "Probe Filter error" /* 1_1111b */ | ||
128 | }; | ||
129 | EXPORT_SYMBOL_GPL(ext_msgs); | ||
130 | |||
131 | static void amd_decode_dc_mce(u64 mc0_status) | ||
132 | { | ||
133 | u32 ec = mc0_status & 0xffff; | ||
134 | u32 xec = (mc0_status >> 16) & 0xf; | ||
135 | |||
136 | pr_emerg(" Data Cache Error"); | ||
137 | |||
138 | if (xec == 1 && TLB_ERROR(ec)) | ||
139 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); | ||
140 | else if (xec == 0) { | ||
141 | if (mc0_status & (1ULL << 40)) | ||
142 | pr_cont(" during Data Scrub.\n"); | ||
143 | else if (TLB_ERROR(ec)) | ||
144 | pr_cont(": %s TLB parity error.\n", LL_MSG(ec)); | ||
145 | else if (MEM_ERROR(ec)) { | ||
146 | u8 ll = ec & 0x3; | ||
147 | u8 tt = (ec >> 2) & 0x3; | ||
148 | u8 rrrr = (ec >> 4) & 0xf; | ||
149 | |||
150 | /* see F10h BKDG (31116), Table 92. */ | ||
151 | if (ll == 0x1) { | ||
152 | if (tt != 0x1) | ||
153 | goto wrong_dc_mce; | ||
154 | |||
155 | pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec)); | ||
156 | |||
157 | } else if (ll == 0x2 && rrrr == 0x3) | ||
158 | pr_cont(" during L1 linefill from L2.\n"); | ||
159 | else | ||
160 | goto wrong_dc_mce; | ||
161 | } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf) | ||
162 | pr_cont(" during system linefill.\n"); | ||
163 | else | ||
164 | goto wrong_dc_mce; | ||
165 | } else | ||
166 | goto wrong_dc_mce; | ||
167 | |||
168 | return; | ||
169 | |||
170 | wrong_dc_mce: | ||
171 | pr_warning("Corrupted DC MCE info?\n"); | ||
172 | } | ||
173 | |||
174 | static void amd_decode_ic_mce(u64 mc1_status) | ||
175 | { | ||
176 | u32 ec = mc1_status & 0xffff; | ||
177 | u32 xec = (mc1_status >> 16) & 0xf; | ||
178 | |||
179 | pr_emerg(" Instruction Cache Error"); | ||
180 | |||
181 | if (xec == 1 && TLB_ERROR(ec)) | ||
182 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); | ||
183 | else if (xec == 0) { | ||
184 | if (TLB_ERROR(ec)) | ||
185 | pr_cont(": %s TLB Parity error.\n", LL_MSG(ec)); | ||
186 | else if (BUS_ERROR(ec)) { | ||
187 | if (boot_cpu_data.x86 == 0xf && | ||
188 | (mc1_status & (1ULL << 58))) | ||
189 | pr_cont(" during system linefill.\n"); | ||
190 | else | ||
191 | pr_cont(" during attempted NB data read.\n"); | ||
192 | } else if (MEM_ERROR(ec)) { | ||
193 | u8 ll = ec & 0x3; | ||
194 | u8 rrrr = (ec >> 4) & 0xf; | ||
195 | |||
196 | if (ll == 0x2) | ||
197 | pr_cont(" during a linefill from L2.\n"); | ||
198 | else if (ll == 0x1) { | ||
199 | |||
200 | switch (rrrr) { | ||
201 | case 0x5: | ||
202 | pr_cont(": Parity error during " | ||
203 | "data load.\n"); | ||
204 | break; | ||
205 | |||
206 | case 0x7: | ||
207 | pr_cont(": Copyback Parity/Victim" | ||
208 | " error.\n"); | ||
209 | break; | ||
210 | |||
211 | case 0x8: | ||
212 | pr_cont(": Tag Snoop error.\n"); | ||
213 | break; | ||
214 | |||
215 | default: | ||
216 | goto wrong_ic_mce; | ||
217 | break; | ||
218 | } | ||
219 | } | ||
220 | } else | ||
221 | goto wrong_ic_mce; | ||
222 | } else | ||
223 | goto wrong_ic_mce; | ||
224 | |||
225 | return; | ||
226 | |||
227 | wrong_ic_mce: | ||
228 | pr_warning("Corrupted IC MCE info?\n"); | ||
229 | } | ||
230 | |||
231 | static void amd_decode_bu_mce(u64 mc2_status) | ||
232 | { | ||
233 | u32 ec = mc2_status & 0xffff; | ||
234 | u32 xec = (mc2_status >> 16) & 0xf; | ||
235 | |||
236 | pr_emerg(" Bus Unit Error"); | ||
237 | |||
238 | if (xec == 0x1) | ||
239 | pr_cont(" in the write data buffers.\n"); | ||
240 | else if (xec == 0x3) | ||
241 | pr_cont(" in the victim data buffers.\n"); | ||
242 | else if (xec == 0x2 && MEM_ERROR(ec)) | ||
243 | pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); | ||
244 | else if (xec == 0x0) { | ||
245 | if (TLB_ERROR(ec)) | ||
246 | pr_cont(": %s error in a Page Descriptor Cache or " | ||
247 | "Guest TLB.\n", TT_MSG(ec)); | ||
248 | else if (BUS_ERROR(ec)) | ||
249 | pr_cont(": %s/ECC error in data read from NB: %s.\n", | ||
250 | RRRR_MSG(ec), PP_MSG(ec)); | ||
251 | else if (MEM_ERROR(ec)) { | ||
252 | u8 rrrr = (ec >> 4) & 0xf; | ||
253 | |||
254 | if (rrrr >= 0x7) | ||
255 | pr_cont(": %s error during data copyback.\n", | ||
256 | RRRR_MSG(ec)); | ||
257 | else if (rrrr <= 0x1) | ||
258 | pr_cont(": %s parity/ECC error during data " | ||
259 | "access from L2.\n", RRRR_MSG(ec)); | ||
260 | else | ||
261 | goto wrong_bu_mce; | ||
262 | } else | ||
263 | goto wrong_bu_mce; | ||
264 | } else | ||
265 | goto wrong_bu_mce; | ||
266 | |||
267 | return; | ||
268 | |||
269 | wrong_bu_mce: | ||
270 | pr_warning("Corrupted BU MCE info?\n"); | ||
271 | } | ||
272 | |||
273 | static void amd_decode_ls_mce(u64 mc3_status) | ||
274 | { | ||
275 | u32 ec = mc3_status & 0xffff; | ||
276 | u32 xec = (mc3_status >> 16) & 0xf; | ||
277 | |||
278 | pr_emerg(" Load Store Error"); | ||
279 | |||
280 | if (xec == 0x0) { | ||
281 | u8 rrrr = (ec >> 4) & 0xf; | ||
282 | |||
283 | if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4)) | ||
284 | goto wrong_ls_mce; | ||
285 | |||
286 | pr_cont(" during %s.\n", RRRR_MSG(ec)); | ||
287 | } | ||
288 | return; | ||
289 | |||
290 | wrong_ls_mce: | ||
291 | pr_warning("Corrupted LS MCE info?\n"); | ||
292 | } | ||
293 | |||
294 | void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors) | ||
295 | { | ||
296 | u32 ec = ERROR_CODE(regs->nbsl); | ||
297 | u32 xec = EXT_ERROR_CODE(regs->nbsl); | ||
298 | |||
299 | if (!handle_errors) | ||
300 | return; | ||
301 | |||
302 | pr_emerg(" Northbridge Error, node %d", node_id); | ||
303 | |||
304 | /* | ||
305 | * F10h, revD can disable ErrCpu[3:0] so check that first and also the | ||
306 | * value encoding has changed so interpret those differently | ||
307 | */ | ||
308 | if ((boot_cpu_data.x86 == 0x10) && | ||
309 | (boot_cpu_data.x86_model > 8)) { | ||
310 | if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) | ||
311 | pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); | ||
312 | } else { | ||
313 | pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf))); | ||
314 | } | ||
315 | |||
316 | |||
317 | pr_emerg("%s.\n", EXT_ERR_MSG(xec)); | ||
318 | |||
319 | if (BUS_ERROR(ec) && nb_bus_decoder) | ||
320 | nb_bus_decoder(node_id, regs); | ||
321 | } | ||
322 | EXPORT_SYMBOL_GPL(amd_decode_nb_mce); | ||
323 | |||
324 | static void amd_decode_fr_mce(u64 mc5_status) | ||
325 | { | ||
326 | /* we have only one error signature so match all fields at once. */ | ||
327 | if ((mc5_status & 0xffff) == 0x0f0f) | ||
328 | pr_emerg(" FR Error: CPU Watchdog timer expire.\n"); | ||
329 | else | ||
330 | pr_warning("Corrupted FR MCE info?\n"); | ||
331 | } | ||
332 | |||
333 | static inline void amd_decode_err_code(unsigned int ec) | ||
334 | { | ||
335 | if (TLB_ERROR(ec)) { | ||
336 | /* | ||
337 | * GART errors are intended to help graphics driver developers | ||
338 | * to detect bad GART PTEs. It is recommended by AMD to disable | ||
339 | * GART table walk error reporting by default[1] (currently | ||
340 | * being disabled in mce_cpu_quirks()) and according to the | ||
341 | * comment in mce_cpu_quirks(), such GART errors can be | ||
342 | * incorrectly triggered. We may see these errors anyway and | ||
343 | * unless requested by the user, they won't be reported. | ||
344 | * | ||
345 | * [1] section 13.10.1 on BIOS and Kernel Developers Guide for | ||
346 | * AMD NPT family 0Fh processors | ||
347 | */ | ||
348 | if (!report_gart_errors) | ||
349 | return; | ||
350 | |||
351 | pr_emerg(" Transaction: %s, Cache Level %s\n", | ||
352 | TT_MSG(ec), LL_MSG(ec)); | ||
353 | } else if (MEM_ERROR(ec)) { | ||
354 | pr_emerg(" Transaction: %s, Type: %s, Cache Level: %s", | ||
355 | RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); | ||
356 | } else if (BUS_ERROR(ec)) { | ||
357 | pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s, " | ||
358 | "Participating Processor: %s\n", | ||
359 | RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), | ||
360 | PP_MSG(ec)); | ||
361 | } else | ||
362 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); | ||
363 | } | ||
364 | |||
365 | void decode_mce(struct mce *m) | ||
366 | { | ||
367 | struct err_regs regs; | ||
368 | int node, ecc; | ||
369 | |||
370 | pr_emerg("MC%d_STATUS: ", m->bank); | ||
371 | |||
372 | pr_cont("%sorrected error, report: %s, MiscV: %svalid, " | ||
373 | "CPU context corrupt: %s", | ||
374 | ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), | ||
375 | ((m->status & MCI_STATUS_EN) ? "yes" : "no"), | ||
376 | ((m->status & MCI_STATUS_MISCV) ? "" : "in"), | ||
377 | ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); | ||
378 | |||
379 | /* do the two bits[14:13] together */ | ||
380 | ecc = m->status & (3ULL << 45); | ||
381 | if (ecc) | ||
382 | pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); | ||
383 | |||
384 | pr_cont("\n"); | ||
385 | |||
386 | switch (m->bank) { | ||
387 | case 0: | ||
388 | amd_decode_dc_mce(m->status); | ||
389 | break; | ||
390 | |||
391 | case 1: | ||
392 | amd_decode_ic_mce(m->status); | ||
393 | break; | ||
394 | |||
395 | case 2: | ||
396 | amd_decode_bu_mce(m->status); | ||
397 | break; | ||
398 | |||
399 | case 3: | ||
400 | amd_decode_ls_mce(m->status); | ||
401 | break; | ||
402 | |||
403 | case 4: | ||
404 | regs.nbsl = (u32) m->status; | ||
405 | regs.nbsh = (u32)(m->status >> 32); | ||
406 | regs.nbeal = (u32) m->addr; | ||
407 | regs.nbeah = (u32)(m->addr >> 32); | ||
408 | node = per_cpu(cpu_llc_id, m->extcpu); | ||
409 | |||
410 | amd_decode_nb_mce(node, ®s, 1); | ||
411 | break; | ||
412 | |||
413 | case 5: | ||
414 | amd_decode_fr_mce(m->status); | ||
415 | break; | ||
416 | |||
417 | default: | ||
418 | break; | ||
419 | } | ||
420 | |||
421 | amd_decode_err_code(m->status & 0xffff); | ||
422 | } | ||
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/edac_mce_amd.h new file mode 100644 index 000000000000..df23ee065f79 --- /dev/null +++ b/drivers/edac/edac_mce_amd.h | |||
@@ -0,0 +1,69 @@ | |||
1 | #ifndef _EDAC_MCE_AMD_H | ||
2 | #define _EDAC_MCE_AMD_H | ||
3 | |||
4 | #include <asm/mce.h> | ||
5 | |||
6 | #define ERROR_CODE(x) ((x) & 0xffff) | ||
7 | #define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) | ||
8 | #define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)] | ||
9 | |||
10 | #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) | ||
11 | #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) | ||
12 | |||
13 | #define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) | ||
14 | #define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) | ||
15 | #define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) | ||
16 | |||
17 | #define TT(x) (((x) >> 2) & 0x3) | ||
18 | #define TT_MSG(x) tt_msgs[TT(x)] | ||
19 | #define II(x) (((x) >> 2) & 0x3) | ||
20 | #define II_MSG(x) ii_msgs[II(x)] | ||
21 | #define LL(x) (((x) >> 0) & 0x3) | ||
22 | #define LL_MSG(x) ll_msgs[LL(x)] | ||
23 | #define RRRR(x) (((x) >> 4) & 0xf) | ||
24 | #define RRRR_MSG(x) rrrr_msgs[RRRR(x)] | ||
25 | #define TO(x) (((x) >> 8) & 0x1) | ||
26 | #define TO_MSG(x) to_msgs[TO(x)] | ||
27 | #define PP(x) (((x) >> 9) & 0x3) | ||
28 | #define PP_MSG(x) pp_msgs[PP(x)] | ||
29 | |||
30 | #define K8_NBSH 0x4C | ||
31 | |||
32 | #define K8_NBSH_VALID_BIT BIT(31) | ||
33 | #define K8_NBSH_OVERFLOW BIT(30) | ||
34 | #define K8_NBSH_UC_ERR BIT(29) | ||
35 | #define K8_NBSH_ERR_EN BIT(28) | ||
36 | #define K8_NBSH_MISCV BIT(27) | ||
37 | #define K8_NBSH_VALID_ERROR_ADDR BIT(26) | ||
38 | #define K8_NBSH_PCC BIT(25) | ||
39 | #define K8_NBSH_ERR_CPU_VAL BIT(24) | ||
40 | #define K8_NBSH_CECC BIT(14) | ||
41 | #define K8_NBSH_UECC BIT(13) | ||
42 | #define K8_NBSH_ERR_SCRUBER BIT(8) | ||
43 | |||
44 | extern const char *tt_msgs[]; | ||
45 | extern const char *ll_msgs[]; | ||
46 | extern const char *rrrr_msgs[]; | ||
47 | extern const char *pp_msgs[]; | ||
48 | extern const char *to_msgs[]; | ||
49 | extern const char *ii_msgs[]; | ||
50 | extern const char *ext_msgs[]; | ||
51 | |||
52 | /* | ||
53 | * relevant NB regs | ||
54 | */ | ||
55 | struct err_regs { | ||
56 | u32 nbcfg; | ||
57 | u32 nbsh; | ||
58 | u32 nbsl; | ||
59 | u32 nbeah; | ||
60 | u32 nbeal; | ||
61 | }; | ||
62 | |||
63 | |||
64 | void amd_report_gart_errors(bool); | ||
65 | void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); | ||
66 | void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); | ||
67 | void amd_decode_nb_mce(int, struct err_regs *, int); | ||
68 | |||
69 | #endif /* _EDAC_MCE_AMD_H */ | ||