diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2009-06-25 13:32:38 -0400 |
---|---|---|
committer | Borislav Petkov <borislav.petkov@amd.com> | 2009-09-14 12:57:48 -0400 |
commit | b70ef01016850de87b9a28a6af19fed8801df076 (patch) | |
tree | 78159c6e177c5557f2cc9342f8bc0833645d8da5 /drivers/edac/amd64_edac.c | |
parent | 74fca6a42863ffacaf7ba6f1936a9f228950f657 (diff) |
EDAC: move MCE error descriptions to EDAC core
This is in preparation of adding AMD-specific MCE decoding functionality
to the EDAC core. The error decoding macros originate from the AMD64
EDAC driver albeit in a simplified and cleaned up version here.
While at it, add macros to generate the error description strings and
use them in the error type decoders directly which removes a bunch of
code and makes the decoding functions much more readable. Also, fix
strings and shorten macro names.
Remove superfluous htlink_msgs.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r-- | drivers/edac/amd64_edac.c | 140 |
1 files changed, 78 insertions, 62 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e2a10bcba7a1..b9e84bc91766 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -19,6 +19,63 @@ static struct mem_ctl_info *mci_lookup[MAX_NUMNODES]; | |||
19 | static struct amd64_pvt *pvt_lookup[MAX_NUMNODES]; | 19 | static struct amd64_pvt *pvt_lookup[MAX_NUMNODES]; |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only | ||
23 | * for DDR2 DRAM mapping. | ||
24 | */ | ||
25 | u32 revf_quad_ddr2_shift[] = { | ||
26 | 0, /* 0000b NULL DIMM (128mb) */ | ||
27 | 28, /* 0001b 256mb */ | ||
28 | 29, /* 0010b 512mb */ | ||
29 | 29, /* 0011b 512mb */ | ||
30 | 29, /* 0100b 512mb */ | ||
31 | 30, /* 0101b 1gb */ | ||
32 | 30, /* 0110b 1gb */ | ||
33 | 31, /* 0111b 2gb */ | ||
34 | 31, /* 1000b 2gb */ | ||
35 | 32, /* 1001b 4gb */ | ||
36 | 32, /* 1010b 4gb */ | ||
37 | 33, /* 1011b 8gb */ | ||
38 | 0, /* 1100b future */ | ||
39 | 0, /* 1101b future */ | ||
40 | 0, /* 1110b future */ | ||
41 | 0 /* 1111b future */ | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing | ||
46 | * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- | ||
47 | * or higher value'. | ||
48 | * | ||
49 | *FIXME: Produce a better mapping/linearisation. | ||
50 | */ | ||
51 | |||
52 | struct scrubrate scrubrates[] = { | ||
53 | { 0x01, 1600000000UL}, | ||
54 | { 0x02, 800000000UL}, | ||
55 | { 0x03, 400000000UL}, | ||
56 | { 0x04, 200000000UL}, | ||
57 | { 0x05, 100000000UL}, | ||
58 | { 0x06, 50000000UL}, | ||
59 | { 0x07, 25000000UL}, | ||
60 | { 0x08, 12284069UL}, | ||
61 | { 0x09, 6274509UL}, | ||
62 | { 0x0A, 3121951UL}, | ||
63 | { 0x0B, 1560975UL}, | ||
64 | { 0x0C, 781440UL}, | ||
65 | { 0x0D, 390720UL}, | ||
66 | { 0x0E, 195300UL}, | ||
67 | { 0x0F, 97650UL}, | ||
68 | { 0x10, 48854UL}, | ||
69 | { 0x11, 24427UL}, | ||
70 | { 0x12, 12213UL}, | ||
71 | { 0x13, 6101UL}, | ||
72 | { 0x14, 3051UL}, | ||
73 | { 0x15, 1523UL}, | ||
74 | { 0x16, 761UL}, | ||
75 | { 0x00, 0UL}, /* scrubbing off */ | ||
76 | }; | ||
77 | |||
78 | /* | ||
22 | * Memory scrubber control interface. For K8, memory scrubbing is handled by | 79 | * Memory scrubber control interface. For K8, memory scrubbing is handled by |
23 | * hardware and can involve L2 cache, dcache as well as the main memory. With | 80 | * hardware and can involve L2 cache, dcache as well as the main memory. With |
24 | * F10, this is extended to L3 cache scrubbing on CPU models sporting that | 81 | * F10, this is extended to L3 cache scrubbing on CPU models sporting that |
@@ -1101,8 +1158,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, | |||
1101 | u32 page, offset; | 1158 | u32 page, offset; |
1102 | 1159 | ||
1103 | /* Extract the syndrome parts and form a 16-bit syndrome */ | 1160 | /* Extract the syndrome parts and form a 16-bit syndrome */ |
1104 | syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8; | 1161 | syndrome = HIGH_SYNDROME(info->nbsl) << 8; |
1105 | syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh); | 1162 | syndrome |= LOW_SYNDROME(info->nbsh); |
1106 | 1163 | ||
1107 | /* CHIPKILL enabled */ | 1164 | /* CHIPKILL enabled */ |
1108 | if (info->nbcfg & K8_NBCFG_CHIPKILL) { | 1165 | if (info->nbcfg & K8_NBCFG_CHIPKILL) { |
@@ -1701,8 +1758,8 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, | |||
1701 | if (csrow >= 0) { | 1758 | if (csrow >= 0) { |
1702 | error_address_to_page_and_offset(sys_addr, &page, &offset); | 1759 | error_address_to_page_and_offset(sys_addr, &page, &offset); |
1703 | 1760 | ||
1704 | syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8; | 1761 | syndrome = HIGH_SYNDROME(info->nbsl) << 8; |
1705 | syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh); | 1762 | syndrome |= LOW_SYNDROME(info->nbsh); |
1706 | 1763 | ||
1707 | /* | 1764 | /* |
1708 | * Is CHIPKILL on? If so, then we can attempt to use the | 1765 | * Is CHIPKILL on? If so, then we can attempt to use the |
@@ -2155,36 +2212,22 @@ static int amd64_get_error_info(struct mem_ctl_info *mci, | |||
2155 | static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci, | 2212 | static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci, |
2156 | struct amd64_error_info_regs *info) | 2213 | struct amd64_error_info_regs *info) |
2157 | { | 2214 | { |
2158 | u32 err_code; | 2215 | u32 ec = ERROR_CODE(info->nbsl); |
2159 | u32 ec_tt; /* error code transaction type (2b) */ | ||
2160 | u32 ec_ll; /* error code cache level (2b) */ | ||
2161 | |||
2162 | err_code = EXTRACT_ERROR_CODE(info->nbsl); | ||
2163 | ec_ll = EXTRACT_LL_CODE(err_code); | ||
2164 | ec_tt = EXTRACT_TT_CODE(err_code); | ||
2165 | 2216 | ||
2166 | amd64_mc_printk(mci, KERN_ERR, | 2217 | amd64_mc_printk(mci, KERN_ERR, |
2167 | "GART TLB event: transaction type(%s), " | 2218 | "GART TLB event: transaction type(%s), " |
2168 | "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]); | 2219 | "cache level(%s)\n", TT_MSG(ec), LL_MSG(ec)); |
2169 | } | 2220 | } |
2170 | 2221 | ||
2171 | static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci, | 2222 | static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci, |
2172 | struct amd64_error_info_regs *info) | 2223 | struct amd64_error_info_regs *info) |
2173 | { | 2224 | { |
2174 | u32 err_code; | 2225 | u32 ec = ERROR_CODE(info->nbsl); |
2175 | u32 ec_rrrr; /* error code memory transaction (4b) */ | ||
2176 | u32 ec_tt; /* error code transaction type (2b) */ | ||
2177 | u32 ec_ll; /* error code cache level (2b) */ | ||
2178 | |||
2179 | err_code = EXTRACT_ERROR_CODE(info->nbsl); | ||
2180 | ec_ll = EXTRACT_LL_CODE(err_code); | ||
2181 | ec_tt = EXTRACT_TT_CODE(err_code); | ||
2182 | ec_rrrr = EXTRACT_RRRR_CODE(err_code); | ||
2183 | 2226 | ||
2184 | amd64_mc_printk(mci, KERN_ERR, | 2227 | amd64_mc_printk(mci, KERN_ERR, |
2185 | "cache hierarchy error: memory transaction type(%s), " | 2228 | "cache hierarchy error: memory transaction type(%s), " |
2186 | "transaction type(%s), cache level(%s)\n", | 2229 | "transaction type(%s), cache level(%s)\n", |
2187 | rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]); | 2230 | RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); |
2188 | } | 2231 | } |
2189 | 2232 | ||
2190 | 2233 | ||
@@ -2264,21 +2307,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, | |||
2264 | static void amd64_decode_bus_error(struct mem_ctl_info *mci, | 2307 | static void amd64_decode_bus_error(struct mem_ctl_info *mci, |
2265 | struct amd64_error_info_regs *info) | 2308 | struct amd64_error_info_regs *info) |
2266 | { | 2309 | { |
2267 | u32 err_code, ext_ec; | 2310 | u32 ec = ERROR_CODE(info->nbsl); |
2268 | u32 ec_pp; /* error code participating processor (2p) */ | 2311 | u32 xec = EXT_ERROR_CODE(info->nbsl); |
2269 | u32 ec_to; /* error code timed out (1b) */ | ||
2270 | u32 ec_rrrr; /* error code memory transaction (4b) */ | ||
2271 | u32 ec_ii; /* error code memory or I/O (2b) */ | ||
2272 | u32 ec_ll; /* error code cache level (2b) */ | ||
2273 | |||
2274 | ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl); | ||
2275 | err_code = EXTRACT_ERROR_CODE(info->nbsl); | ||
2276 | |||
2277 | ec_ll = EXTRACT_LL_CODE(err_code); | ||
2278 | ec_ii = EXTRACT_II_CODE(err_code); | ||
2279 | ec_rrrr = EXTRACT_RRRR_CODE(err_code); | ||
2280 | ec_to = EXTRACT_TO_CODE(err_code); | ||
2281 | ec_pp = EXTRACT_PP_CODE(err_code); | ||
2282 | 2312 | ||
2283 | amd64_mc_printk(mci, KERN_ERR, | 2313 | amd64_mc_printk(mci, KERN_ERR, |
2284 | "BUS ERROR:\n" | 2314 | "BUS ERROR:\n" |
@@ -2286,20 +2316,17 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
2286 | " participating processor(%s)\n" | 2316 | " participating processor(%s)\n" |
2287 | " memory transaction type(%s)\n" | 2317 | " memory transaction type(%s)\n" |
2288 | " cache level(%s) Error Found by: %s\n", | 2318 | " cache level(%s) Error Found by: %s\n", |
2289 | to_msgs[ec_to], | 2319 | TO_MSG(ec), II_MSG(ec), PP_MSG(ec), RRRR_MSG(ec), LL_MSG(ec), |
2290 | ii_msgs[ec_ii], | ||
2291 | pp_msgs[ec_pp], | ||
2292 | rrrr_msgs[ec_rrrr], | ||
2293 | ll_msgs[ec_ll], | ||
2294 | (info->nbsh & K8_NBSH_ERR_SCRUBER) ? | 2320 | (info->nbsh & K8_NBSH_ERR_SCRUBER) ? |
2295 | "Scrubber" : "Normal Operation"); | 2321 | "Scrubber" : "Normal Operation"); |
2296 | 2322 | ||
2297 | /* If this was an 'observed' error, early out */ | 2323 | |
2298 | if (ec_pp == K8_NBSL_PP_OBS) | 2324 | /* Bail early out if this was an 'observed' error */ |
2299 | return; /* We aren't the node involved */ | 2325 | if (PP(ec) == K8_NBSL_PP_OBS) |
2326 | return; | ||
2300 | 2327 | ||
2301 | /* Parse out the extended error code for ECC events */ | 2328 | /* Parse out the extended error code for ECC events */ |
2302 | switch (ext_ec) { | 2329 | switch (xec) { |
2303 | /* F10 changed to one Extended ECC error code */ | 2330 | /* F10 changed to one Extended ECC error code */ |
2304 | case F10_NBSL_EXT_ERR_RES: /* Reserved field */ | 2331 | case F10_NBSL_EXT_ERR_RES: /* Reserved field */ |
2305 | case F10_NBSL_EXT_ERR_ECC: /* F10 ECC ext err code */ | 2332 | case F10_NBSL_EXT_ERR_ECC: /* F10 ECC ext err code */ |
@@ -2379,7 +2406,7 @@ int amd64_process_error_info(struct mem_ctl_info *mci, | |||
2379 | (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False"); | 2406 | (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False"); |
2380 | 2407 | ||
2381 | 2408 | ||
2382 | err_code = EXTRACT_ERROR_CODE(regs->nbsl); | 2409 | err_code = ERROR_CODE(regs->nbsl); |
2383 | 2410 | ||
2384 | /* Determine which error type: | 2411 | /* Determine which error type: |
2385 | * 1) GART errors - non-fatal, developmental events | 2412 | * 1) GART errors - non-fatal, developmental events |
@@ -2387,7 +2414,7 @@ int amd64_process_error_info(struct mem_ctl_info *mci, | |||
2387 | * 3) BUS errors | 2414 | * 3) BUS errors |
2388 | * 4) Unknown error | 2415 | * 4) Unknown error |
2389 | */ | 2416 | */ |
2390 | if (TEST_TLB_ERROR(err_code)) { | 2417 | if (TLB_ERROR(err_code)) { |
2391 | /* | 2418 | /* |
2392 | * GART errors are intended to help graphics driver developers | 2419 | * GART errors are intended to help graphics driver developers |
2393 | * to detect bad GART PTEs. It is recommended by AMD to disable | 2420 | * to detect bad GART PTEs. It is recommended by AMD to disable |
@@ -2411,10 +2438,10 @@ int amd64_process_error_info(struct mem_ctl_info *mci, | |||
2411 | 2438 | ||
2412 | debugf1("GART TLB error\n"); | 2439 | debugf1("GART TLB error\n"); |
2413 | amd64_decode_gart_tlb_error(mci, info); | 2440 | amd64_decode_gart_tlb_error(mci, info); |
2414 | } else if (TEST_MEM_ERROR(err_code)) { | 2441 | } else if (MEM_ERROR(err_code)) { |
2415 | debugf1("Memory/Cache error\n"); | 2442 | debugf1("Memory/Cache error\n"); |
2416 | amd64_decode_mem_cache_error(mci, info); | 2443 | amd64_decode_mem_cache_error(mci, info); |
2417 | } else if (TEST_BUS_ERROR(err_code)) { | 2444 | } else if (BUS_ERROR(err_code)) { |
2418 | debugf1("Bus (Link/DRAM) error\n"); | 2445 | debugf1("Bus (Link/DRAM) error\n"); |
2419 | amd64_decode_bus_error(mci, info); | 2446 | amd64_decode_bus_error(mci, info); |
2420 | } else { | 2447 | } else { |
@@ -2424,21 +2451,10 @@ int amd64_process_error_info(struct mem_ctl_info *mci, | |||
2424 | err_code); | 2451 | err_code); |
2425 | } | 2452 | } |
2426 | 2453 | ||
2427 | ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl); | 2454 | ext_ec = EXT_ERROR_CODE(regs->nbsl); |
2428 | amd64_mc_printk(mci, KERN_ERR, | 2455 | amd64_mc_printk(mci, KERN_ERR, |
2429 | "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]); | 2456 | "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]); |
2430 | 2457 | ||
2431 | if (((ext_ec >= F10_NBSL_EXT_ERR_CRC && | ||
2432 | ext_ec <= F10_NBSL_EXT_ERR_TGT) || | ||
2433 | (ext_ec == F10_NBSL_EXT_ERR_RMW)) && | ||
2434 | EXTRACT_LDT_LINK(info->nbsh)) { | ||
2435 | |||
2436 | amd64_mc_printk(mci, KERN_ERR, | ||
2437 | "Error on hypertransport link: %s\n", | ||
2438 | htlink_msgs[ | ||
2439 | EXTRACT_LDT_LINK(info->nbsh)]); | ||
2440 | } | ||
2441 | |||
2442 | /* | 2458 | /* |
2443 | * Check the UE bit of the NB status high register, if set generate some | 2459 | * Check the UE bit of the NB status high register, if set generate some |
2444 | * logs. If NOT a GART error, then process the event as a NO-INFO event. | 2460 | * logs. If NOT a GART error, then process the event as a NO-INFO event. |