diff options
-rw-r--r-- | drivers/edac/i7core_edac.c | 208 |
1 files changed, 178 insertions, 30 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 87d5695f5fb0..4758c208f39a 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -73,6 +73,18 @@ | |||
73 | #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) | 73 | #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) |
74 | #define DIMM0_COR_ERR(r) ((r) & 0x7fff) | 74 | #define DIMM0_COR_ERR(r) ((r) & 0x7fff) |
75 | 75 | ||
76 | /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ | ||
77 | #define MC_COR_ECC_CNT_0 0x80 | ||
78 | #define MC_COR_ECC_CNT_1 0x84 | ||
79 | #define MC_COR_ECC_CNT_2 0x88 | ||
80 | #define MC_COR_ECC_CNT_3 0x8c | ||
81 | #define MC_COR_ECC_CNT_4 0x90 | ||
82 | #define MC_COR_ECC_CNT_5 0x94 | ||
83 | |||
84 | #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff) | ||
85 | #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff) | ||
86 | |||
87 | |||
76 | /* OFFSETS for Devices 4,5 and 6 Function 0 */ | 88 | /* OFFSETS for Devices 4,5 and 6 Function 0 */ |
77 | 89 | ||
78 | #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 | 90 | #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 |
@@ -194,13 +206,20 @@ struct i7core_pvt { | |||
194 | struct i7core_inject inject; | 206 | struct i7core_inject inject; |
195 | struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS]; | 207 | struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS]; |
196 | 208 | ||
209 | unsigned int is_registered:1; /* true if all memories are RDIMMs */ | ||
210 | |||
197 | int sockets; /* Number of sockets */ | 211 | int sockets; /* Number of sockets */ |
198 | int channels; /* Number of active channels */ | 212 | int channels; /* Number of active channels */ |
199 | 213 | ||
200 | int ce_count_available[NUM_SOCKETS]; | 214 | int ce_count_available[NUM_SOCKETS]; |
201 | /* ECC corrected errors counts per dimm */ | 215 | int csrow_map[NUM_SOCKETS][NUM_CHANS][MAX_DIMMS]; |
202 | unsigned long ce_count[NUM_SOCKETS][MAX_DIMMS]; | 216 | |
203 | int last_ce_count[NUM_SOCKETS][MAX_DIMMS]; | 217 | /* ECC corrected errors counts per udimm */ |
218 | unsigned long udimm_ce_count[NUM_SOCKETS][MAX_DIMMS]; | ||
219 | int udimm_last_ce_count[NUM_SOCKETS][MAX_DIMMS]; | ||
220 | /* ECC corrected errors counts per rdimm */ | ||
221 | unsigned long rdimm_ce_count[NUM_SOCKETS][NUM_CHANS][MAX_DIMMS]; | ||
222 | int rdimm_last_ce_count[NUM_SOCKETS][NUM_CHANS][MAX_DIMMS]; | ||
204 | 223 | ||
205 | /* mcelog glue */ | 224 | /* mcelog glue */ |
206 | struct edac_mce edac_mce; | 225 | struct edac_mce edac_mce; |
@@ -471,6 +490,8 @@ static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket) | |||
471 | numrow(pvt->info.max_dod >> 6), | 490 | numrow(pvt->info.max_dod >> 6), |
472 | numcol(pvt->info.max_dod >> 9)); | 491 | numcol(pvt->info.max_dod >> 9)); |
473 | 492 | ||
493 | pvt->is_registered = 1; | ||
494 | |||
474 | for (i = 0; i < NUM_CHANS; i++) { | 495 | for (i = 0; i < NUM_CHANS; i++) { |
475 | u32 data, dimm_dod[3], value[8]; | 496 | u32 data, dimm_dod[3], value[8]; |
476 | 497 | ||
@@ -492,8 +513,14 @@ static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket) | |||
492 | 513 | ||
493 | if (data & REGISTERED_DIMM) | 514 | if (data & REGISTERED_DIMM) |
494 | mtype = MEM_RDDR3; | 515 | mtype = MEM_RDDR3; |
495 | else | 516 | else { |
496 | mtype = MEM_DDR3; | 517 | mtype = MEM_DDR3; |
518 | /* | ||
519 | * FIXME: Currently, the driver will use dev 3:2 | ||
520 | * counter registers only if all memories are registered | ||
521 | */ | ||
522 | pvt->is_registered = 0; | ||
523 | } | ||
497 | #if 0 | 524 | #if 0 |
498 | if (data & THREE_DIMMS_PRESENT) | 525 | if (data & THREE_DIMMS_PRESENT) |
499 | pvt->channel[i].dimms = 3; | 526 | pvt->channel[i].dimms = 3; |
@@ -562,6 +589,8 @@ static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket) | |||
562 | csr->channels[0].chan_idx = i; | 589 | csr->channels[0].chan_idx = i; |
563 | csr->channels[0].ce_count = 0; | 590 | csr->channels[0].ce_count = 0; |
564 | 591 | ||
592 | pvt->csrow_map[socket][i][j] = *csrow; | ||
593 | |||
565 | switch (banks) { | 594 | switch (banks) { |
566 | case 4: | 595 | case 4: |
567 | csr->dtype = DEV_X4; | 596 | csr->dtype = DEV_X4; |
@@ -1031,19 +1060,31 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, | |||
1031 | 1060 | ||
1032 | static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data) | 1061 | static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data) |
1033 | { | 1062 | { |
1034 | unsigned i, count, total = 0; | 1063 | unsigned i, j, count, total = 0; |
1035 | struct i7core_pvt *pvt = mci->pvt_info; | 1064 | struct i7core_pvt *pvt = mci->pvt_info; |
1036 | 1065 | ||
1037 | for (i = 0; i < pvt->sockets; i++) { | 1066 | for (i = 0; i < pvt->sockets; i++) { |
1038 | if (!pvt->ce_count_available[i]) | 1067 | if (!pvt->ce_count_available[i]) { |
1039 | count = sprintf(data, "socket 0 data unavailable\n"); | 1068 | count = sprintf(data, "socket 0 data unavailable\n"); |
1040 | else | 1069 | continue; |
1070 | } | ||
1071 | if (!pvt->is_registered) | ||
1041 | count = sprintf(data, "socket %d, dimm0: %lu\n" | 1072 | count = sprintf(data, "socket %d, dimm0: %lu\n" |
1042 | "dimm1: %lu\ndimm2: %lu\n", | 1073 | "dimm1: %lu\ndimm2: %lu\n", |
1043 | i, | 1074 | i, |
1044 | pvt->ce_count[i][0], | 1075 | pvt->udimm_ce_count[i][0], |
1045 | pvt->ce_count[i][1], | 1076 | pvt->udimm_ce_count[i][1], |
1046 | pvt->ce_count[i][2]); | 1077 | pvt->udimm_ce_count[i][2]); |
1078 | else | ||
1079 | for (j = 0; j < NUM_CHANS; j++) { | ||
1080 | count = sprintf(data, "socket %d, channel %d" | ||
1081 | "dimm0: %lu\n" | ||
1082 | "dimm1: %lu\ndimm2: %lu\n", | ||
1083 | i, j, | ||
1084 | pvt->rdimm_ce_count[i][j][0], | ||
1085 | pvt->rdimm_ce_count[i][j][1], | ||
1086 | pvt->rdimm_ce_count[i][j][2]); | ||
1087 | } | ||
1047 | data += count; | 1088 | data += count; |
1048 | total += count; | 1089 | total += count; |
1049 | } | 1090 | } |
@@ -1308,6 +1349,103 @@ error: | |||
1308 | /**************************************************************************** | 1349 | /**************************************************************************** |
1309 | Error check routines | 1350 | Error check routines |
1310 | ****************************************************************************/ | 1351 | ****************************************************************************/ |
1352 | static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci, int socket, | ||
1353 | int chan, int dimm, int add) | ||
1354 | { | ||
1355 | char *msg; | ||
1356 | struct i7core_pvt *pvt = mci->pvt_info; | ||
1357 | int row = pvt->csrow_map[socket][chan][dimm], i; | ||
1358 | |||
1359 | for (i = 0; i < add; i++) { | ||
1360 | msg = kasprintf(GFP_KERNEL, "Corrected error " | ||
1361 | "(Socket=%d channel=%d dimm=%d", | ||
1362 | socket, chan, dimm); | ||
1363 | |||
1364 | edac_mc_handle_fbd_ce(mci, row, 0, msg); | ||
1365 | kfree (msg); | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci, | ||
1370 | int socket, int chan, int new0, int new1, int new2) | ||
1371 | { | ||
1372 | struct i7core_pvt *pvt = mci->pvt_info; | ||
1373 | int add0 = 0, add1 = 0, add2 = 0; | ||
1374 | /* Updates CE counters if it is not the first time here */ | ||
1375 | if (pvt->ce_count_available[socket]) { | ||
1376 | /* Updates CE counters */ | ||
1377 | |||
1378 | add2 = new2 - pvt->rdimm_last_ce_count[socket][chan][2]; | ||
1379 | add1 = new1 - pvt->rdimm_last_ce_count[socket][chan][1]; | ||
1380 | add0 = new0 - pvt->rdimm_last_ce_count[socket][chan][0]; | ||
1381 | |||
1382 | if (add2 < 0) | ||
1383 | add2 += 0x7fff; | ||
1384 | pvt->rdimm_ce_count[socket][chan][2] += add2; | ||
1385 | |||
1386 | if (add1 < 0) | ||
1387 | add1 += 0x7fff; | ||
1388 | pvt->rdimm_ce_count[socket][chan][1] += add1; | ||
1389 | |||
1390 | if (add0 < 0) | ||
1391 | add0 += 0x7fff; | ||
1392 | pvt->rdimm_ce_count[socket][chan][0] += add0; | ||
1393 | } else | ||
1394 | pvt->ce_count_available[socket] = 1; | ||
1395 | |||
1396 | /* Store the new values */ | ||
1397 | pvt->rdimm_last_ce_count[socket][chan][2] = new2; | ||
1398 | pvt->rdimm_last_ce_count[socket][chan][1] = new1; | ||
1399 | pvt->rdimm_last_ce_count[socket][chan][0] = new0; | ||
1400 | |||
1401 | /*updated the edac core */ | ||
1402 | if (add0 != 0) | ||
1403 | i7core_rdimm_update_csrow(mci, socket, chan, 0, add0); | ||
1404 | if (add1 != 0) | ||
1405 | i7core_rdimm_update_csrow(mci, socket, chan, 1, add1); | ||
1406 | if (add2 != 0) | ||
1407 | i7core_rdimm_update_csrow(mci, socket, chan, 2, add2); | ||
1408 | |||
1409 | } | ||
1410 | |||
1411 | static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci, u8 socket) | ||
1412 | { | ||
1413 | struct i7core_pvt *pvt = mci->pvt_info; | ||
1414 | u32 rcv[3][2]; | ||
1415 | int i, new0, new1, new2; | ||
1416 | |||
1417 | /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/ | ||
1418 | pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_0, | ||
1419 | &rcv[0][0]); | ||
1420 | pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_1, | ||
1421 | &rcv[0][1]); | ||
1422 | pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_2, | ||
1423 | &rcv[1][0]); | ||
1424 | pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_3, | ||
1425 | &rcv[1][1]); | ||
1426 | pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_4, | ||
1427 | &rcv[2][0]); | ||
1428 | pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_5, | ||
1429 | &rcv[2][1]); | ||
1430 | for (i = 0 ; i < 3; i++) { | ||
1431 | debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n", | ||
1432 | (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]); | ||
1433 | /*if the channel has 3 dimms*/ | ||
1434 | if (pvt->channel[socket][i].dimms > 2) { | ||
1435 | new0 = DIMM_BOT_COR_ERR(rcv[i][0]); | ||
1436 | new1 = DIMM_TOP_COR_ERR(rcv[i][0]); | ||
1437 | new2 = DIMM_BOT_COR_ERR(rcv[i][1]); | ||
1438 | } else { | ||
1439 | new0 = DIMM_TOP_COR_ERR(rcv[i][0]) + | ||
1440 | DIMM_BOT_COR_ERR(rcv[i][0]); | ||
1441 | new1 = DIMM_TOP_COR_ERR(rcv[i][1]) + | ||
1442 | DIMM_BOT_COR_ERR(rcv[i][1]); | ||
1443 | new2 = 0; | ||
1444 | } | ||
1445 | |||
1446 | i7core_rdimm_update_ce_count(mci, socket, i, new0, new1, new2); | ||
1447 | } | ||
1448 | } | ||
1311 | 1449 | ||
1312 | /* This function is based on the device 3 function 4 registers as described on: | 1450 | /* This function is based on the device 3 function 4 registers as described on: |
1313 | * Intel Xeon Processor 5500 Series Datasheet Volume 2 | 1451 | * Intel Xeon Processor 5500 Series Datasheet Volume 2 |
@@ -1315,7 +1453,7 @@ error: | |||
1315 | * also available at: | 1453 | * also available at: |
1316 | * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf | 1454 | * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf |
1317 | */ | 1455 | */ |
1318 | static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket) | 1456 | static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci, u8 socket) |
1319 | { | 1457 | { |
1320 | struct i7core_pvt *pvt = mci->pvt_info; | 1458 | struct i7core_pvt *pvt = mci->pvt_info; |
1321 | u32 rcv1, rcv0; | 1459 | u32 rcv1, rcv0; |
@@ -1326,7 +1464,7 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket) | |||
1326 | return; | 1464 | return; |
1327 | } | 1465 | } |
1328 | 1466 | ||
1329 | /* Corrected error reads */ | 1467 | /* Corrected test errors */ |
1330 | pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1); | 1468 | pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1); |
1331 | pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0); | 1469 | pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0); |
1332 | 1470 | ||
@@ -1335,39 +1473,38 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket) | |||
1335 | new1 = DIMM1_COR_ERR(rcv0); | 1473 | new1 = DIMM1_COR_ERR(rcv0); |
1336 | new0 = DIMM0_COR_ERR(rcv0); | 1474 | new0 = DIMM0_COR_ERR(rcv0); |
1337 | 1475 | ||
1338 | #if 0 | ||
1339 | debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n", | ||
1340 | (pvt->ce_count_available ? "UPDATE" : "READ"), | ||
1341 | rcv1, rcv0, new0, new1, new2); | ||
1342 | #endif | ||
1343 | |||
1344 | /* Updates CE counters if it is not the first time here */ | 1476 | /* Updates CE counters if it is not the first time here */ |
1345 | if (pvt->ce_count_available[socket]) { | 1477 | if (pvt->ce_count_available[socket]) { |
1346 | /* Updates CE counters */ | 1478 | /* Updates CE counters */ |
1347 | int add0, add1, add2; | 1479 | int add0, add1, add2; |
1348 | 1480 | ||
1349 | add2 = new2 - pvt->last_ce_count[socket][2]; | 1481 | add2 = new2 - pvt->udimm_last_ce_count[socket][2]; |
1350 | add1 = new1 - pvt->last_ce_count[socket][1]; | 1482 | add1 = new1 - pvt->udimm_last_ce_count[socket][1]; |
1351 | add0 = new0 - pvt->last_ce_count[socket][0]; | 1483 | add0 = new0 - pvt->udimm_last_ce_count[socket][0]; |
1352 | 1484 | ||
1353 | if (add2 < 0) | 1485 | if (add2 < 0) |
1354 | add2 += 0x7fff; | 1486 | add2 += 0x7fff; |
1355 | pvt->ce_count[socket][2] += add2; | 1487 | pvt->udimm_ce_count[socket][2] += add2; |
1356 | 1488 | ||
1357 | if (add1 < 0) | 1489 | if (add1 < 0) |
1358 | add1 += 0x7fff; | 1490 | add1 += 0x7fff; |
1359 | pvt->ce_count[socket][1] += add1; | 1491 | pvt->udimm_ce_count[socket][1] += add1; |
1360 | 1492 | ||
1361 | if (add0 < 0) | 1493 | if (add0 < 0) |
1362 | add0 += 0x7fff; | 1494 | add0 += 0x7fff; |
1363 | pvt->ce_count[socket][0] += add0; | 1495 | pvt->udimm_ce_count[socket][0] += add0; |
1496 | |||
1497 | if (add0 | add1 | add2) | ||
1498 | i7core_printk(KERN_ERR, "New Corrected error(s): " | ||
1499 | "dimm0: +%d, dimm1: +%d, dimm2 +%d\n", | ||
1500 | add0, add1, add2); | ||
1364 | } else | 1501 | } else |
1365 | pvt->ce_count_available[socket] = 1; | 1502 | pvt->ce_count_available[socket] = 1; |
1366 | 1503 | ||
1367 | /* Store the new values */ | 1504 | /* Store the new values */ |
1368 | pvt->last_ce_count[socket][2] = new2; | 1505 | pvt->udimm_last_ce_count[socket][2] = new2; |
1369 | pvt->last_ce_count[socket][1] = new1; | 1506 | pvt->udimm_last_ce_count[socket][1] = new1; |
1370 | pvt->last_ce_count[socket][0] = new0; | 1507 | pvt->udimm_last_ce_count[socket][0] = new0; |
1371 | } | 1508 | } |
1372 | 1509 | ||
1373 | /* | 1510 | /* |
@@ -1386,6 +1523,7 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket) | |||
1386 | static void i7core_mce_output_error(struct mem_ctl_info *mci, | 1523 | static void i7core_mce_output_error(struct mem_ctl_info *mci, |
1387 | struct mce *m) | 1524 | struct mce *m) |
1388 | { | 1525 | { |
1526 | struct i7core_pvt *pvt = mci->pvt_info; | ||
1389 | char *type, *optype, *err, *msg; | 1527 | char *type, *optype, *err, *msg; |
1390 | unsigned long error = m->status & 0x1ff0000l; | 1528 | unsigned long error = m->status & 0x1ff0000l; |
1391 | u32 optypenum = (m->status >> 4) & 0x07; | 1529 | u32 optypenum = (m->status >> 4) & 0x07; |
@@ -1394,6 +1532,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, | |||
1394 | u32 channel = (m->misc >> 18) & 0x3; | 1532 | u32 channel = (m->misc >> 18) & 0x3; |
1395 | u32 syndrome = m->misc >> 32; | 1533 | u32 syndrome = m->misc >> 32; |
1396 | u32 errnum = find_first_bit(&error, 32); | 1534 | u32 errnum = find_first_bit(&error, 32); |
1535 | int csrow; | ||
1397 | 1536 | ||
1398 | if (m->mcgstatus & 1) | 1537 | if (m->mcgstatus & 1) |
1399 | type = "FATAL"; | 1538 | type = "FATAL"; |
@@ -1463,9 +1602,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, | |||
1463 | 1602 | ||
1464 | debugf0("%s", msg); | 1603 | debugf0("%s", msg); |
1465 | 1604 | ||
1605 | csrow = pvt->csrow_map[m->cpu][channel][dimm]; | ||
1606 | |||
1466 | /* Call the helper to output message */ | 1607 | /* Call the helper to output message */ |
1467 | edac_mc_handle_fbd_ue(mci, 0 /* FIXME: should be rank here */, | 1608 | if (m->mcgstatus & 1) |
1468 | 0, 0 /* FIXME: should be channel here */, msg); | 1609 | edac_mc_handle_fbd_ue(mci, csrow, 0, |
1610 | 0 /* FIXME: should be channel here */, msg); | ||
1611 | else if (!pvt->is_registered) | ||
1612 | edac_mc_handle_fbd_ce(mci, csrow, | ||
1613 | 0 /* FIXME: should be channel here */, msg); | ||
1469 | 1614 | ||
1470 | kfree(msg); | 1615 | kfree(msg); |
1471 | } | 1616 | } |
@@ -1502,7 +1647,10 @@ static void i7core_check_error(struct mem_ctl_info *mci) | |||
1502 | 1647 | ||
1503 | /* check memory count errors */ | 1648 | /* check memory count errors */ |
1504 | for (i = 0; i < pvt->sockets; i++) | 1649 | for (i = 0; i < pvt->sockets; i++) |
1505 | check_mc_test_err(mci, i); | 1650 | if (!pvt->is_registered) |
1651 | i7core_udimm_check_mc_ecc_err(mci, i); | ||
1652 | else | ||
1653 | i7core_rdimm_check_mc_ecc_err(mci, i); | ||
1506 | } | 1654 | } |
1507 | 1655 | ||
1508 | /* | 1656 | /* |