aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/edac/i7core_edac.c208
1 files changed, 178 insertions, 30 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 87d5695f5fb0..4758c208f39a 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -73,6 +73,18 @@
73 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) 73 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
74 #define DIMM0_COR_ERR(r) ((r) & 0x7fff) 74 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
75 75
76/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
77#define MC_COR_ECC_CNT_0 0x80
78#define MC_COR_ECC_CNT_1 0x84
79#define MC_COR_ECC_CNT_2 0x88
80#define MC_COR_ECC_CNT_3 0x8c
81#define MC_COR_ECC_CNT_4 0x90
82#define MC_COR_ECC_CNT_5 0x94
83
84#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
85#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
86
87
76 /* OFFSETS for Devices 4,5 and 6 Function 0 */ 88 /* OFFSETS for Devices 4,5 and 6 Function 0 */
77 89
78#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 90#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
@@ -194,13 +206,20 @@ struct i7core_pvt {
194 struct i7core_inject inject; 206 struct i7core_inject inject;
195 struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS]; 207 struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS];
196 208
209 unsigned int is_registered:1; /* true if all memories are RDIMMs */
210
197 int sockets; /* Number of sockets */ 211 int sockets; /* Number of sockets */
198 int channels; /* Number of active channels */ 212 int channels; /* Number of active channels */
199 213
200 int ce_count_available[NUM_SOCKETS]; 214 int ce_count_available[NUM_SOCKETS];
201 /* ECC corrected errors counts per dimm */ 215 int csrow_map[NUM_SOCKETS][NUM_CHANS][MAX_DIMMS];
202 unsigned long ce_count[NUM_SOCKETS][MAX_DIMMS]; 216
203 int last_ce_count[NUM_SOCKETS][MAX_DIMMS]; 217 /* ECC corrected errors counts per udimm */
218 unsigned long udimm_ce_count[NUM_SOCKETS][MAX_DIMMS];
219 int udimm_last_ce_count[NUM_SOCKETS][MAX_DIMMS];
220 /* ECC corrected errors counts per rdimm */
221 unsigned long rdimm_ce_count[NUM_SOCKETS][NUM_CHANS][MAX_DIMMS];
222 int rdimm_last_ce_count[NUM_SOCKETS][NUM_CHANS][MAX_DIMMS];
204 223
205 /* mcelog glue */ 224 /* mcelog glue */
206 struct edac_mce edac_mce; 225 struct edac_mce edac_mce;
@@ -471,6 +490,8 @@ static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket)
471 numrow(pvt->info.max_dod >> 6), 490 numrow(pvt->info.max_dod >> 6),
472 numcol(pvt->info.max_dod >> 9)); 491 numcol(pvt->info.max_dod >> 9));
473 492
493 pvt->is_registered = 1;
494
474 for (i = 0; i < NUM_CHANS; i++) { 495 for (i = 0; i < NUM_CHANS; i++) {
475 u32 data, dimm_dod[3], value[8]; 496 u32 data, dimm_dod[3], value[8];
476 497
@@ -492,8 +513,14 @@ static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket)
492 513
493 if (data & REGISTERED_DIMM) 514 if (data & REGISTERED_DIMM)
494 mtype = MEM_RDDR3; 515 mtype = MEM_RDDR3;
495 else 516 else {
496 mtype = MEM_DDR3; 517 mtype = MEM_DDR3;
518 /*
519 * FIXME: Currently, the driver will use dev 3:2
520 * counter registers only if all memories are registered
521 */
522 pvt->is_registered = 0;
523 }
497#if 0 524#if 0
498 if (data & THREE_DIMMS_PRESENT) 525 if (data & THREE_DIMMS_PRESENT)
499 pvt->channel[i].dimms = 3; 526 pvt->channel[i].dimms = 3;
@@ -562,6 +589,8 @@ static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket)
562 csr->channels[0].chan_idx = i; 589 csr->channels[0].chan_idx = i;
563 csr->channels[0].ce_count = 0; 590 csr->channels[0].ce_count = 0;
564 591
592 pvt->csrow_map[socket][i][j] = *csrow;
593
565 switch (banks) { 594 switch (banks) {
566 case 4: 595 case 4:
567 csr->dtype = DEV_X4; 596 csr->dtype = DEV_X4;
@@ -1031,19 +1060,31 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1031 1060
1032static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data) 1061static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1033{ 1062{
1034 unsigned i, count, total = 0; 1063 unsigned i, j, count, total = 0;
1035 struct i7core_pvt *pvt = mci->pvt_info; 1064 struct i7core_pvt *pvt = mci->pvt_info;
1036 1065
1037 for (i = 0; i < pvt->sockets; i++) { 1066 for (i = 0; i < pvt->sockets; i++) {
1038 if (!pvt->ce_count_available[i]) 1067 if (!pvt->ce_count_available[i]) {
1039 count = sprintf(data, "socket 0 data unavailable\n"); 1068 count = sprintf(data, "socket 0 data unavailable\n");
1040 else 1069 continue;
1070 }
1071 if (!pvt->is_registered)
1041 count = sprintf(data, "socket %d, dimm0: %lu\n" 1072 count = sprintf(data, "socket %d, dimm0: %lu\n"
1042 "dimm1: %lu\ndimm2: %lu\n", 1073 "dimm1: %lu\ndimm2: %lu\n",
1043 i, 1074 i,
1044 pvt->ce_count[i][0], 1075 pvt->udimm_ce_count[i][0],
1045 pvt->ce_count[i][1], 1076 pvt->udimm_ce_count[i][1],
1046 pvt->ce_count[i][2]); 1077 pvt->udimm_ce_count[i][2]);
1078 else
1079 for (j = 0; j < NUM_CHANS; j++) {
1080 count = sprintf(data, "socket %d, channel %d"
1081 "dimm0: %lu\n"
1082 "dimm1: %lu\ndimm2: %lu\n",
1083 i, j,
1084 pvt->rdimm_ce_count[i][j][0],
1085 pvt->rdimm_ce_count[i][j][1],
1086 pvt->rdimm_ce_count[i][j][2]);
1087 }
1047 data += count; 1088 data += count;
1048 total += count; 1089 total += count;
1049 } 1090 }
@@ -1308,6 +1349,103 @@ error:
1308/**************************************************************************** 1349/****************************************************************************
1309 Error check routines 1350 Error check routines
1310 ****************************************************************************/ 1351 ****************************************************************************/
1352static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci, int socket,
1353 int chan, int dimm, int add)
1354{
1355 char *msg;
1356 struct i7core_pvt *pvt = mci->pvt_info;
1357 int row = pvt->csrow_map[socket][chan][dimm], i;
1358
1359 for (i = 0; i < add; i++) {
1360 msg = kasprintf(GFP_KERNEL, "Corrected error "
1361 "(Socket=%d channel=%d dimm=%d",
1362 socket, chan, dimm);
1363
1364 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1365 kfree (msg);
1366 }
1367}
1368
1369static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1370 int socket, int chan, int new0, int new1, int new2)
1371{
1372 struct i7core_pvt *pvt = mci->pvt_info;
1373 int add0 = 0, add1 = 0, add2 = 0;
1374 /* Updates CE counters if it is not the first time here */
1375 if (pvt->ce_count_available[socket]) {
1376 /* Updates CE counters */
1377
1378 add2 = new2 - pvt->rdimm_last_ce_count[socket][chan][2];
1379 add1 = new1 - pvt->rdimm_last_ce_count[socket][chan][1];
1380 add0 = new0 - pvt->rdimm_last_ce_count[socket][chan][0];
1381
1382 if (add2 < 0)
1383 add2 += 0x7fff;
1384 pvt->rdimm_ce_count[socket][chan][2] += add2;
1385
1386 if (add1 < 0)
1387 add1 += 0x7fff;
1388 pvt->rdimm_ce_count[socket][chan][1] += add1;
1389
1390 if (add0 < 0)
1391 add0 += 0x7fff;
1392 pvt->rdimm_ce_count[socket][chan][0] += add0;
1393 } else
1394 pvt->ce_count_available[socket] = 1;
1395
1396 /* Store the new values */
1397 pvt->rdimm_last_ce_count[socket][chan][2] = new2;
1398 pvt->rdimm_last_ce_count[socket][chan][1] = new1;
1399 pvt->rdimm_last_ce_count[socket][chan][0] = new0;
1400
1401 /*updated the edac core */
1402 if (add0 != 0)
1403 i7core_rdimm_update_csrow(mci, socket, chan, 0, add0);
1404 if (add1 != 0)
1405 i7core_rdimm_update_csrow(mci, socket, chan, 1, add1);
1406 if (add2 != 0)
1407 i7core_rdimm_update_csrow(mci, socket, chan, 2, add2);
1408
1409}
1410
1411static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci, u8 socket)
1412{
1413 struct i7core_pvt *pvt = mci->pvt_info;
1414 u32 rcv[3][2];
1415 int i, new0, new1, new2;
1416
1417 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1418 pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_0,
1419 &rcv[0][0]);
1420 pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_1,
1421 &rcv[0][1]);
1422 pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_2,
1423 &rcv[1][0]);
1424 pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_3,
1425 &rcv[1][1]);
1426 pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_4,
1427 &rcv[2][0]);
1428 pci_read_config_dword(pvt->pci_mcr[socket][2], MC_COR_ECC_CNT_5,
1429 &rcv[2][1]);
1430 for (i = 0 ; i < 3; i++) {
1431 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1432 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1433 /*if the channel has 3 dimms*/
1434 if (pvt->channel[socket][i].dimms > 2) {
1435 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1436 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1437 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1438 } else {
1439 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1440 DIMM_BOT_COR_ERR(rcv[i][0]);
1441 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1442 DIMM_BOT_COR_ERR(rcv[i][1]);
1443 new2 = 0;
1444 }
1445
1446 i7core_rdimm_update_ce_count(mci, socket, i, new0, new1, new2);
1447 }
1448}
1311 1449
1312/* This function is based on the device 3 function 4 registers as described on: 1450/* This function is based on the device 3 function 4 registers as described on:
1313 * Intel Xeon Processor 5500 Series Datasheet Volume 2 1451 * Intel Xeon Processor 5500 Series Datasheet Volume 2
@@ -1315,7 +1453,7 @@ error:
1315 * also available at: 1453 * also available at:
1316 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf 1454 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1317 */ 1455 */
1318static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket) 1456static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci, u8 socket)
1319{ 1457{
1320 struct i7core_pvt *pvt = mci->pvt_info; 1458 struct i7core_pvt *pvt = mci->pvt_info;
1321 u32 rcv1, rcv0; 1459 u32 rcv1, rcv0;
@@ -1326,7 +1464,7 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
1326 return; 1464 return;
1327 } 1465 }
1328 1466
1329 /* Corrected error reads */ 1467 /* Corrected test errors */
1330 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1); 1468 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1);
1331 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0); 1469 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0);
1332 1470
@@ -1335,39 +1473,38 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
1335 new1 = DIMM1_COR_ERR(rcv0); 1473 new1 = DIMM1_COR_ERR(rcv0);
1336 new0 = DIMM0_COR_ERR(rcv0); 1474 new0 = DIMM0_COR_ERR(rcv0);
1337 1475
1338#if 0
1339 debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n",
1340 (pvt->ce_count_available ? "UPDATE" : "READ"),
1341 rcv1, rcv0, new0, new1, new2);
1342#endif
1343
1344 /* Updates CE counters if it is not the first time here */ 1476 /* Updates CE counters if it is not the first time here */
1345 if (pvt->ce_count_available[socket]) { 1477 if (pvt->ce_count_available[socket]) {
1346 /* Updates CE counters */ 1478 /* Updates CE counters */
1347 int add0, add1, add2; 1479 int add0, add1, add2;
1348 1480
1349 add2 = new2 - pvt->last_ce_count[socket][2]; 1481 add2 = new2 - pvt->udimm_last_ce_count[socket][2];
1350 add1 = new1 - pvt->last_ce_count[socket][1]; 1482 add1 = new1 - pvt->udimm_last_ce_count[socket][1];
1351 add0 = new0 - pvt->last_ce_count[socket][0]; 1483 add0 = new0 - pvt->udimm_last_ce_count[socket][0];
1352 1484
1353 if (add2 < 0) 1485 if (add2 < 0)
1354 add2 += 0x7fff; 1486 add2 += 0x7fff;
1355 pvt->ce_count[socket][2] += add2; 1487 pvt->udimm_ce_count[socket][2] += add2;
1356 1488
1357 if (add1 < 0) 1489 if (add1 < 0)
1358 add1 += 0x7fff; 1490 add1 += 0x7fff;
1359 pvt->ce_count[socket][1] += add1; 1491 pvt->udimm_ce_count[socket][1] += add1;
1360 1492
1361 if (add0 < 0) 1493 if (add0 < 0)
1362 add0 += 0x7fff; 1494 add0 += 0x7fff;
1363 pvt->ce_count[socket][0] += add0; 1495 pvt->udimm_ce_count[socket][0] += add0;
1496
1497 if (add0 | add1 | add2)
1498 i7core_printk(KERN_ERR, "New Corrected error(s): "
1499 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1500 add0, add1, add2);
1364 } else 1501 } else
1365 pvt->ce_count_available[socket] = 1; 1502 pvt->ce_count_available[socket] = 1;
1366 1503
1367 /* Store the new values */ 1504 /* Store the new values */
1368 pvt->last_ce_count[socket][2] = new2; 1505 pvt->udimm_last_ce_count[socket][2] = new2;
1369 pvt->last_ce_count[socket][1] = new1; 1506 pvt->udimm_last_ce_count[socket][1] = new1;
1370 pvt->last_ce_count[socket][0] = new0; 1507 pvt->udimm_last_ce_count[socket][0] = new0;
1371} 1508}
1372 1509
1373/* 1510/*
@@ -1386,6 +1523,7 @@ static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
1386static void i7core_mce_output_error(struct mem_ctl_info *mci, 1523static void i7core_mce_output_error(struct mem_ctl_info *mci,
1387 struct mce *m) 1524 struct mce *m)
1388{ 1525{
1526 struct i7core_pvt *pvt = mci->pvt_info;
1389 char *type, *optype, *err, *msg; 1527 char *type, *optype, *err, *msg;
1390 unsigned long error = m->status & 0x1ff0000l; 1528 unsigned long error = m->status & 0x1ff0000l;
1391 u32 optypenum = (m->status >> 4) & 0x07; 1529 u32 optypenum = (m->status >> 4) & 0x07;
@@ -1394,6 +1532,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
1394 u32 channel = (m->misc >> 18) & 0x3; 1532 u32 channel = (m->misc >> 18) & 0x3;
1395 u32 syndrome = m->misc >> 32; 1533 u32 syndrome = m->misc >> 32;
1396 u32 errnum = find_first_bit(&error, 32); 1534 u32 errnum = find_first_bit(&error, 32);
1535 int csrow;
1397 1536
1398 if (m->mcgstatus & 1) 1537 if (m->mcgstatus & 1)
1399 type = "FATAL"; 1538 type = "FATAL";
@@ -1463,9 +1602,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
1463 1602
1464 debugf0("%s", msg); 1603 debugf0("%s", msg);
1465 1604
1605 csrow = pvt->csrow_map[m->cpu][channel][dimm];
1606
1466 /* Call the helper to output message */ 1607 /* Call the helper to output message */
1467 edac_mc_handle_fbd_ue(mci, 0 /* FIXME: should be rank here */, 1608 if (m->mcgstatus & 1)
1468 0, 0 /* FIXME: should be channel here */, msg); 1609 edac_mc_handle_fbd_ue(mci, csrow, 0,
1610 0 /* FIXME: should be channel here */, msg);
1611 else if (!pvt->is_registered)
1612 edac_mc_handle_fbd_ce(mci, csrow,
1613 0 /* FIXME: should be channel here */, msg);
1469 1614
1470 kfree(msg); 1615 kfree(msg);
1471} 1616}
@@ -1502,7 +1647,10 @@ static void i7core_check_error(struct mem_ctl_info *mci)
1502 1647
1503 /* check memory count errors */ 1648 /* check memory count errors */
1504 for (i = 0; i < pvt->sockets; i++) 1649 for (i = 0; i < pvt->sockets; i++)
1505 check_mc_test_err(mci, i); 1650 if (!pvt->is_registered)
1651 i7core_udimm_check_mc_ecc_err(mci, i);
1652 else
1653 i7core_rdimm_check_mc_ecc_err(mci, i);
1506} 1654}
1507 1655
1508/* 1656/*