aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/amd64_edac.c
diff options
context:
space:
mode:
authorBorislav Petkov <borislav.petkov@amd.com>2009-06-25 13:32:38 -0400
committerBorislav Petkov <borislav.petkov@amd.com>2009-09-14 12:57:48 -0400
commitb70ef01016850de87b9a28a6af19fed8801df076 (patch)
tree78159c6e177c5557f2cc9342f8bc0833645d8da5 /drivers/edac/amd64_edac.c
parent74fca6a42863ffacaf7ba6f1936a9f228950f657 (diff)
EDAC: move MCE error descriptions to EDAC core
This is in preparation of adding AMD-specific MCE decoding functionality to the EDAC core. The error decoding macros originate from the AMD64 EDAC driver albeit in a simplified and cleaned up version here. While at it, add macros to generate the error description strings and use them in the error type decoders directly which removes a bunch of code and makes the decoding functions much more readable. Also, fix strings and shorten macro names. Remove superfluous htlink_msgs. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r--drivers/edac/amd64_edac.c140
1 files changed, 78 insertions, 62 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e2a10bcba7a1..b9e84bc91766 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -19,6 +19,63 @@ static struct mem_ctl_info *mci_lookup[MAX_NUMNODES];
19static struct amd64_pvt *pvt_lookup[MAX_NUMNODES]; 19static struct amd64_pvt *pvt_lookup[MAX_NUMNODES];
20 20
21/* 21/*
22 * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
23 * for DDR2 DRAM mapping.
24 */
25u32 revf_quad_ddr2_shift[] = {
26 0, /* 0000b NULL DIMM (128mb) */
27 28, /* 0001b 256mb */
28 29, /* 0010b 512mb */
29 29, /* 0011b 512mb */
30 29, /* 0100b 512mb */
31 30, /* 0101b 1gb */
32 30, /* 0110b 1gb */
33 31, /* 0111b 2gb */
34 31, /* 1000b 2gb */
35 32, /* 1001b 4gb */
36 32, /* 1010b 4gb */
37 33, /* 1011b 8gb */
38 0, /* 1100b future */
39 0, /* 1101b future */
40 0, /* 1110b future */
41 0 /* 1111b future */
42};
43
44/*
45 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
46 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
47 * or higher value'.
48 *
49 *FIXME: Produce a better mapping/linearisation.
50 */
51
52struct scrubrate scrubrates[] = {
53 { 0x01, 1600000000UL},
54 { 0x02, 800000000UL},
55 { 0x03, 400000000UL},
56 { 0x04, 200000000UL},
57 { 0x05, 100000000UL},
58 { 0x06, 50000000UL},
59 { 0x07, 25000000UL},
60 { 0x08, 12284069UL},
61 { 0x09, 6274509UL},
62 { 0x0A, 3121951UL},
63 { 0x0B, 1560975UL},
64 { 0x0C, 781440UL},
65 { 0x0D, 390720UL},
66 { 0x0E, 195300UL},
67 { 0x0F, 97650UL},
68 { 0x10, 48854UL},
69 { 0x11, 24427UL},
70 { 0x12, 12213UL},
71 { 0x13, 6101UL},
72 { 0x14, 3051UL},
73 { 0x15, 1523UL},
74 { 0x16, 761UL},
75 { 0x00, 0UL}, /* scrubbing off */
76};
77
78/*
22 * Memory scrubber control interface. For K8, memory scrubbing is handled by 79 * Memory scrubber control interface. For K8, memory scrubbing is handled by
23 * hardware and can involve L2 cache, dcache as well as the main memory. With 80 * hardware and can involve L2 cache, dcache as well as the main memory. With
24 * F10, this is extended to L3 cache scrubbing on CPU models sporting that 81 * F10, this is extended to L3 cache scrubbing on CPU models sporting that
@@ -1101,8 +1158,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1101 u32 page, offset; 1158 u32 page, offset;
1102 1159
1103 /* Extract the syndrome parts and form a 16-bit syndrome */ 1160 /* Extract the syndrome parts and form a 16-bit syndrome */
1104 syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8; 1161 syndrome = HIGH_SYNDROME(info->nbsl) << 8;
1105 syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh); 1162 syndrome |= LOW_SYNDROME(info->nbsh);
1106 1163
1107 /* CHIPKILL enabled */ 1164 /* CHIPKILL enabled */
1108 if (info->nbcfg & K8_NBCFG_CHIPKILL) { 1165 if (info->nbcfg & K8_NBCFG_CHIPKILL) {
@@ -1701,8 +1758,8 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1701 if (csrow >= 0) { 1758 if (csrow >= 0) {
1702 error_address_to_page_and_offset(sys_addr, &page, &offset); 1759 error_address_to_page_and_offset(sys_addr, &page, &offset);
1703 1760
1704 syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8; 1761 syndrome = HIGH_SYNDROME(info->nbsl) << 8;
1705 syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh); 1762 syndrome |= LOW_SYNDROME(info->nbsh);
1706 1763
1707 /* 1764 /*
1708 * Is CHIPKILL on? If so, then we can attempt to use the 1765 * Is CHIPKILL on? If so, then we can attempt to use the
@@ -2155,36 +2212,22 @@ static int amd64_get_error_info(struct mem_ctl_info *mci,
2155static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci, 2212static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci,
2156 struct amd64_error_info_regs *info) 2213 struct amd64_error_info_regs *info)
2157{ 2214{
2158 u32 err_code; 2215 u32 ec = ERROR_CODE(info->nbsl);
2159 u32 ec_tt; /* error code transaction type (2b) */
2160 u32 ec_ll; /* error code cache level (2b) */
2161
2162 err_code = EXTRACT_ERROR_CODE(info->nbsl);
2163 ec_ll = EXTRACT_LL_CODE(err_code);
2164 ec_tt = EXTRACT_TT_CODE(err_code);
2165 2216
2166 amd64_mc_printk(mci, KERN_ERR, 2217 amd64_mc_printk(mci, KERN_ERR,
2167 "GART TLB event: transaction type(%s), " 2218 "GART TLB event: transaction type(%s), "
2168 "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]); 2219 "cache level(%s)\n", TT_MSG(ec), LL_MSG(ec));
2169} 2220}
2170 2221
2171static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci, 2222static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci,
2172 struct amd64_error_info_regs *info) 2223 struct amd64_error_info_regs *info)
2173{ 2224{
2174 u32 err_code; 2225 u32 ec = ERROR_CODE(info->nbsl);
2175 u32 ec_rrrr; /* error code memory transaction (4b) */
2176 u32 ec_tt; /* error code transaction type (2b) */
2177 u32 ec_ll; /* error code cache level (2b) */
2178
2179 err_code = EXTRACT_ERROR_CODE(info->nbsl);
2180 ec_ll = EXTRACT_LL_CODE(err_code);
2181 ec_tt = EXTRACT_TT_CODE(err_code);
2182 ec_rrrr = EXTRACT_RRRR_CODE(err_code);
2183 2226
2184 amd64_mc_printk(mci, KERN_ERR, 2227 amd64_mc_printk(mci, KERN_ERR,
2185 "cache hierarchy error: memory transaction type(%s), " 2228 "cache hierarchy error: memory transaction type(%s), "
2186 "transaction type(%s), cache level(%s)\n", 2229 "transaction type(%s), cache level(%s)\n",
2187 rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]); 2230 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
2188} 2231}
2189 2232
2190 2233
@@ -2264,21 +2307,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
2264static void amd64_decode_bus_error(struct mem_ctl_info *mci, 2307static void amd64_decode_bus_error(struct mem_ctl_info *mci,
2265 struct amd64_error_info_regs *info) 2308 struct amd64_error_info_regs *info)
2266{ 2309{
2267 u32 err_code, ext_ec; 2310 u32 ec = ERROR_CODE(info->nbsl);
2268 u32 ec_pp; /* error code participating processor (2p) */ 2311 u32 xec = EXT_ERROR_CODE(info->nbsl);
2269 u32 ec_to; /* error code timed out (1b) */
2270 u32 ec_rrrr; /* error code memory transaction (4b) */
2271 u32 ec_ii; /* error code memory or I/O (2b) */
2272 u32 ec_ll; /* error code cache level (2b) */
2273
2274 ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl);
2275 err_code = EXTRACT_ERROR_CODE(info->nbsl);
2276
2277 ec_ll = EXTRACT_LL_CODE(err_code);
2278 ec_ii = EXTRACT_II_CODE(err_code);
2279 ec_rrrr = EXTRACT_RRRR_CODE(err_code);
2280 ec_to = EXTRACT_TO_CODE(err_code);
2281 ec_pp = EXTRACT_PP_CODE(err_code);
2282 2312
2283 amd64_mc_printk(mci, KERN_ERR, 2313 amd64_mc_printk(mci, KERN_ERR,
2284 "BUS ERROR:\n" 2314 "BUS ERROR:\n"
@@ -2286,20 +2316,17 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci,
2286 " participating processor(%s)\n" 2316 " participating processor(%s)\n"
2287 " memory transaction type(%s)\n" 2317 " memory transaction type(%s)\n"
2288 " cache level(%s) Error Found by: %s\n", 2318 " cache level(%s) Error Found by: %s\n",
2289 to_msgs[ec_to], 2319 TO_MSG(ec), II_MSG(ec), PP_MSG(ec), RRRR_MSG(ec), LL_MSG(ec),
2290 ii_msgs[ec_ii],
2291 pp_msgs[ec_pp],
2292 rrrr_msgs[ec_rrrr],
2293 ll_msgs[ec_ll],
2294 (info->nbsh & K8_NBSH_ERR_SCRUBER) ? 2320 (info->nbsh & K8_NBSH_ERR_SCRUBER) ?
2295 "Scrubber" : "Normal Operation"); 2321 "Scrubber" : "Normal Operation");
2296 2322
2297 /* If this was an 'observed' error, early out */ 2323
2298 if (ec_pp == K8_NBSL_PP_OBS) 2324 /* Bail early out if this was an 'observed' error */
2299 return; /* We aren't the node involved */ 2325 if (PP(ec) == K8_NBSL_PP_OBS)
2326 return;
2300 2327
2301 /* Parse out the extended error code for ECC events */ 2328 /* Parse out the extended error code for ECC events */
2302 switch (ext_ec) { 2329 switch (xec) {
2303 /* F10 changed to one Extended ECC error code */ 2330 /* F10 changed to one Extended ECC error code */
2304 case F10_NBSL_EXT_ERR_RES: /* Reserved field */ 2331 case F10_NBSL_EXT_ERR_RES: /* Reserved field */
2305 case F10_NBSL_EXT_ERR_ECC: /* F10 ECC ext err code */ 2332 case F10_NBSL_EXT_ERR_ECC: /* F10 ECC ext err code */
@@ -2379,7 +2406,7 @@ int amd64_process_error_info(struct mem_ctl_info *mci,
2379 (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False"); 2406 (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False");
2380 2407
2381 2408
2382 err_code = EXTRACT_ERROR_CODE(regs->nbsl); 2409 err_code = ERROR_CODE(regs->nbsl);
2383 2410
2384 /* Determine which error type: 2411 /* Determine which error type:
2385 * 1) GART errors - non-fatal, developmental events 2412 * 1) GART errors - non-fatal, developmental events
@@ -2387,7 +2414,7 @@ int amd64_process_error_info(struct mem_ctl_info *mci,
2387 * 3) BUS errors 2414 * 3) BUS errors
2388 * 4) Unknown error 2415 * 4) Unknown error
2389 */ 2416 */
2390 if (TEST_TLB_ERROR(err_code)) { 2417 if (TLB_ERROR(err_code)) {
2391 /* 2418 /*
2392 * GART errors are intended to help graphics driver developers 2419 * GART errors are intended to help graphics driver developers
2393 * to detect bad GART PTEs. It is recommended by AMD to disable 2420 * to detect bad GART PTEs. It is recommended by AMD to disable
@@ -2411,10 +2438,10 @@ int amd64_process_error_info(struct mem_ctl_info *mci,
2411 2438
2412 debugf1("GART TLB error\n"); 2439 debugf1("GART TLB error\n");
2413 amd64_decode_gart_tlb_error(mci, info); 2440 amd64_decode_gart_tlb_error(mci, info);
2414 } else if (TEST_MEM_ERROR(err_code)) { 2441 } else if (MEM_ERROR(err_code)) {
2415 debugf1("Memory/Cache error\n"); 2442 debugf1("Memory/Cache error\n");
2416 amd64_decode_mem_cache_error(mci, info); 2443 amd64_decode_mem_cache_error(mci, info);
2417 } else if (TEST_BUS_ERROR(err_code)) { 2444 } else if (BUS_ERROR(err_code)) {
2418 debugf1("Bus (Link/DRAM) error\n"); 2445 debugf1("Bus (Link/DRAM) error\n");
2419 amd64_decode_bus_error(mci, info); 2446 amd64_decode_bus_error(mci, info);
2420 } else { 2447 } else {
@@ -2424,21 +2451,10 @@ int amd64_process_error_info(struct mem_ctl_info *mci,
2424 err_code); 2451 err_code);
2425 } 2452 }
2426 2453
2427 ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl); 2454 ext_ec = EXT_ERROR_CODE(regs->nbsl);
2428 amd64_mc_printk(mci, KERN_ERR, 2455 amd64_mc_printk(mci, KERN_ERR,
2429 "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]); 2456 "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]);
2430 2457
2431 if (((ext_ec >= F10_NBSL_EXT_ERR_CRC &&
2432 ext_ec <= F10_NBSL_EXT_ERR_TGT) ||
2433 (ext_ec == F10_NBSL_EXT_ERR_RMW)) &&
2434 EXTRACT_LDT_LINK(info->nbsh)) {
2435
2436 amd64_mc_printk(mci, KERN_ERR,
2437 "Error on hypertransport link: %s\n",
2438 htlink_msgs[
2439 EXTRACT_LDT_LINK(info->nbsh)]);
2440 }
2441
2442 /* 2458 /*
2443 * Check the UE bit of the NB status high register, if set generate some 2459 * Check the UE bit of the NB status high register, if set generate some
2444 * logs. If NOT a GART error, then process the event as a NO-INFO event. 2460 * logs. If NOT a GART error, then process the event as a NO-INFO event.