aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-04-16 14:12:22 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-05-28 18:13:48 -0400
commitc36e3e77687b39073903f55461fc9417514e831e (patch)
treed6372d12ba85909f7b1c65533b89a580369c69b0 /drivers/edac
parent63b5d1d9aa4b59847ce6279b8798ed28c686a459 (diff)
sb_edac: convert driver to use the new edac ABI
The legacy edac ABI is going to be removed. Port the driver to use and benefit from the new API functionality. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/sb_edac.c159
1 files changed, 56 insertions, 103 deletions
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 2ce9bf5e354b..828647969124 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -314,8 +314,6 @@ struct sbridge_pvt {
314 struct sbridge_info info; 314 struct sbridge_info info;
315 struct sbridge_channel channel[NUM_CHANNELS]; 315 struct sbridge_channel channel[NUM_CHANNELS];
316 316
317 int csrow_map[NUM_CHANNELS][MAX_DIMMS];
318
319 /* Memory type detection */ 317 /* Memory type detection */
320 bool is_mirrored, is_lockstep, is_close_pg; 318 bool is_mirrored, is_lockstep, is_close_pg;
321 319
@@ -487,29 +485,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
487} 485}
488 486
489/** 487/**
490 * sbridge_get_active_channels() - gets the number of channels and csrows 488 * check_if_ecc_is_active() - Checks if ECC is active
491 * bus: Device bus 489 * bus: Device bus
492 * @channels: Number of channels that will be returned
493 * @csrows: Number of csrows found
494 *
495 * Since EDAC core needs to know in advance the number of available channels
496 * and csrows, in order to allocate memory for csrows/channels, it is needed
497 * to run two similar steps. At the first step, implemented on this function,
498 * it checks the number of csrows/channels present at one socket, identified
499 * by the associated PCI bus.
500 * this is used in order to properly allocate the size of mci components.
501 * Note: one csrow is one dimm.
502 */ 490 */
503static int sbridge_get_active_channels(const u8 bus, unsigned *channels, 491static int check_if_ecc_is_active(const u8 bus)
504 unsigned *csrows)
505{ 492{
506 struct pci_dev *pdev = NULL; 493 struct pci_dev *pdev = NULL;
507 int i, j;
508 u32 mcmtr; 494 u32 mcmtr;
509 495
510 *channels = 0;
511 *csrows = 0;
512
513 pdev = get_pdev_slot_func(bus, 15, 0); 496 pdev = get_pdev_slot_func(bus, 15, 0);
514 if (!pdev) { 497 if (!pdev) {
515 sbridge_printk(KERN_ERR, "Couldn't find PCI device " 498 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
@@ -523,41 +506,14 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
523 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); 506 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
524 return -ENODEV; 507 return -ENODEV;
525 } 508 }
526
527 for (i = 0; i < NUM_CHANNELS; i++) {
528 u32 mtr;
529
530 /* Device 15 functions 2 - 5 */
531 pdev = get_pdev_slot_func(bus, 15, 2 + i);
532 if (!pdev) {
533 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
534 "%2x.%02d.%d!!!\n",
535 bus, 15, 2 + i);
536 return -ENODEV;
537 }
538 (*channels)++;
539
540 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
541 pci_read_config_dword(pdev, mtr_regs[j], &mtr);
542 debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr);
543 if (IS_DIMM_PRESENT(mtr))
544 (*csrows)++;
545 }
546 }
547
548 debugf0("Number of active channels: %d, number of active dimms: %d\n",
549 *channels, *csrows);
550
551 return 0; 509 return 0;
552} 510}
553 511
554static int get_dimm_config(struct mem_ctl_info *mci) 512static int get_dimm_config(struct mem_ctl_info *mci)
555{ 513{
556 struct sbridge_pvt *pvt = mci->pvt_info; 514 struct sbridge_pvt *pvt = mci->pvt_info;
557 struct csrow_info *csr; 515 struct dimm_info *dimm;
558 int i, j, banks, ranks, rows, cols, size, npages; 516 int i, j, banks, ranks, rows, cols, size, npages;
559 int csrow = 0;
560 unsigned long last_page = 0;
561 u32 reg; 517 u32 reg;
562 enum edac_type mode; 518 enum edac_type mode;
563 enum mem_type mtype; 519 enum mem_type mtype;
@@ -616,7 +572,8 @@ static int get_dimm_config(struct mem_ctl_info *mci)
616 u32 mtr; 572 u32 mtr;
617 573
618 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { 574 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
619 struct dimm_info *dimm = &mci->dimms[j]; 575 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
576 i, j, 0);
620 pci_read_config_dword(pvt->pci_tad[i], 577 pci_read_config_dword(pvt->pci_tad[i],
621 mtr_regs[j], &mtr); 578 mtr_regs[j], &mtr);
622 debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); 579 debugf4("Channel #%d MTR%d = %x\n", i, j, mtr);
@@ -636,16 +593,6 @@ static int get_dimm_config(struct mem_ctl_info *mci)
636 size, npages, 593 size, npages,
637 banks, ranks, rows, cols); 594 banks, ranks, rows, cols);
638 595
639 /*
640 * Fake stuff. This controller doesn't see
641 * csrows.
642 */
643 csr = &mci->csrows[csrow];
644 pvt->csrow_map[i][j] = csrow;
645 last_page += npages;
646 csrow++;
647
648 csr->channels[0].dimm = dimm;
649 dimm->nr_pages = npages; 596 dimm->nr_pages = npages;
650 dimm->grain = 32; 597 dimm->grain = 32;
651 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; 598 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
@@ -841,11 +788,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
841 u8 *socket, 788 u8 *socket,
842 long *channel_mask, 789 long *channel_mask,
843 u8 *rank, 790 u8 *rank,
844 char *area_type) 791 char *area_type, char *msg)
845{ 792{
846 struct mem_ctl_info *new_mci; 793 struct mem_ctl_info *new_mci;
847 struct sbridge_pvt *pvt = mci->pvt_info; 794 struct sbridge_pvt *pvt = mci->pvt_info;
848 char msg[256];
849 int n_rir, n_sads, n_tads, sad_way, sck_xch; 795 int n_rir, n_sads, n_tads, sad_way, sck_xch;
850 int sad_interl, idx, base_ch; 796 int sad_interl, idx, base_ch;
851 int interleave_mode; 797 int interleave_mode;
@@ -867,12 +813,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
867 */ 813 */
868 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { 814 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
869 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); 815 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
870 edac_mc_handle_ce_no_info(mci, msg);
871 return -EINVAL; 816 return -EINVAL;
872 } 817 }
873 if (addr >= (u64)pvt->tohm) { 818 if (addr >= (u64)pvt->tohm) {
874 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); 819 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
875 edac_mc_handle_ce_no_info(mci, msg);
876 return -EINVAL; 820 return -EINVAL;
877 } 821 }
878 822
@@ -889,7 +833,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
889 limit = SAD_LIMIT(reg); 833 limit = SAD_LIMIT(reg);
890 if (limit <= prv) { 834 if (limit <= prv) {
891 sprintf(msg, "Can't discover the memory socket"); 835 sprintf(msg, "Can't discover the memory socket");
892 edac_mc_handle_ce_no_info(mci, msg);
893 return -EINVAL; 836 return -EINVAL;
894 } 837 }
895 if (addr <= limit) 838 if (addr <= limit)
@@ -898,7 +841,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
898 } 841 }
899 if (n_sads == MAX_SAD) { 842 if (n_sads == MAX_SAD) {
900 sprintf(msg, "Can't discover the memory socket"); 843 sprintf(msg, "Can't discover the memory socket");
901 edac_mc_handle_ce_no_info(mci, msg);
902 return -EINVAL; 844 return -EINVAL;
903 } 845 }
904 area_type = get_dram_attr(reg); 846 area_type = get_dram_attr(reg);
@@ -939,7 +881,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
939 break; 881 break;
940 default: 882 default:
941 sprintf(msg, "Can't discover socket interleave"); 883 sprintf(msg, "Can't discover socket interleave");
942 edac_mc_handle_ce_no_info(mci, msg);
943 return -EINVAL; 884 return -EINVAL;
944 } 885 }
945 *socket = sad_interleave[idx]; 886 *socket = sad_interleave[idx];
@@ -954,7 +895,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
954 if (!new_mci) { 895 if (!new_mci) {
955 sprintf(msg, "Struct for socket #%u wasn't initialized", 896 sprintf(msg, "Struct for socket #%u wasn't initialized",
956 *socket); 897 *socket);
957 edac_mc_handle_ce_no_info(mci, msg);
958 return -EINVAL; 898 return -EINVAL;
959 } 899 }
960 mci = new_mci; 900 mci = new_mci;
@@ -970,7 +910,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
970 limit = TAD_LIMIT(reg); 910 limit = TAD_LIMIT(reg);
971 if (limit <= prv) { 911 if (limit <= prv) {
972 sprintf(msg, "Can't discover the memory channel"); 912 sprintf(msg, "Can't discover the memory channel");
973 edac_mc_handle_ce_no_info(mci, msg);
974 return -EINVAL; 913 return -EINVAL;
975 } 914 }
976 if (addr <= limit) 915 if (addr <= limit)
@@ -1010,7 +949,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1010 break; 949 break;
1011 default: 950 default:
1012 sprintf(msg, "Can't discover the TAD target"); 951 sprintf(msg, "Can't discover the TAD target");
1013 edac_mc_handle_ce_no_info(mci, msg);
1014 return -EINVAL; 952 return -EINVAL;
1015 } 953 }
1016 *channel_mask = 1 << base_ch; 954 *channel_mask = 1 << base_ch;
@@ -1024,7 +962,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1024 break; 962 break;
1025 default: 963 default:
1026 sprintf(msg, "Invalid mirror set. Can't decode addr"); 964 sprintf(msg, "Invalid mirror set. Can't decode addr");
1027 edac_mc_handle_ce_no_info(mci, msg);
1028 return -EINVAL; 965 return -EINVAL;
1029 } 966 }
1030 } else 967 } else
@@ -1052,7 +989,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1052 if (offset > addr) { 989 if (offset > addr) {
1053 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", 990 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1054 offset, addr); 991 offset, addr);
1055 edac_mc_handle_ce_no_info(mci, msg);
1056 return -EINVAL; 992 return -EINVAL;
1057 } 993 }
1058 addr -= offset; 994 addr -= offset;
@@ -1092,7 +1028,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1092 if (n_rir == MAX_RIR_RANGES) { 1028 if (n_rir == MAX_RIR_RANGES) {
1093 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", 1029 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1094 ch_addr); 1030 ch_addr);
1095 edac_mc_handle_ce_no_info(mci, msg);
1096 return -EINVAL; 1031 return -EINVAL;
1097 } 1032 }
1098 rir_way = RIR_WAY(reg); 1033 rir_way = RIR_WAY(reg);
@@ -1406,7 +1341,8 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1406{ 1341{
1407 struct mem_ctl_info *new_mci; 1342 struct mem_ctl_info *new_mci;
1408 struct sbridge_pvt *pvt = mci->pvt_info; 1343 struct sbridge_pvt *pvt = mci->pvt_info;
1409 char *type, *optype, *msg, *recoverable_msg; 1344 enum hw_event_mc_err_type tp_event;
1345 char *type, *optype, msg[256], *recoverable_msg;
1410 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 1346 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1411 bool overflow = GET_BITFIELD(m->status, 62, 62); 1347 bool overflow = GET_BITFIELD(m->status, 62, 62);
1412 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 1348 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1418,13 +1354,21 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1418 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 1354 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1419 long channel_mask, first_channel; 1355 long channel_mask, first_channel;
1420 u8 rank, socket; 1356 u8 rank, socket;
1421 int csrow, rc, dimm; 1357 int rc, dimm;
1422 char *area_type = "Unknown"; 1358 char *area_type = "Unknown";
1423 1359
1424 if (ripv) 1360 if (uncorrected_error) {
1425 type = "NON_FATAL"; 1361 if (ripv) {
1426 else 1362 type = "FATAL";
1427 type = "FATAL"; 1363 tp_event = HW_EVENT_ERR_FATAL;
1364 } else {
1365 type = "NON_FATAL";
1366 tp_event = HW_EVENT_ERR_UNCORRECTED;
1367 }
1368 } else {
1369 type = "CORRECTED";
1370 tp_event = HW_EVENT_ERR_CORRECTED;
1371 }
1428 1372
1429 /* 1373 /*
1430 * According with Table 15-9 of the Intel Archictecture spec vol 3A, 1374 * According with Table 15-9 of the Intel Archictecture spec vol 3A,
@@ -1442,19 +1386,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1442 } else { 1386 } else {
1443 switch (optypenum) { 1387 switch (optypenum) {
1444 case 0: 1388 case 0:
1445 optype = "generic undef request"; 1389 optype = "generic undef request error";
1446 break; 1390 break;
1447 case 1: 1391 case 1:
1448 optype = "memory read"; 1392 optype = "memory read error";
1449 break; 1393 break;
1450 case 2: 1394 case 2:
1451 optype = "memory write"; 1395 optype = "memory write error";
1452 break; 1396 break;
1453 case 3: 1397 case 3:
1454 optype = "addr/cmd"; 1398 optype = "addr/cmd error";
1455 break; 1399 break;
1456 case 4: 1400 case 4:
1457 optype = "memory scrubbing"; 1401 optype = "memory scrubbing error";
1458 break; 1402 break;
1459 default: 1403 default:
1460 optype = "reserved"; 1404 optype = "reserved";
@@ -1463,13 +1407,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1463 } 1407 }
1464 1408
1465 rc = get_memory_error_data(mci, m->addr, &socket, 1409 rc = get_memory_error_data(mci, m->addr, &socket,
1466 &channel_mask, &rank, area_type); 1410 &channel_mask, &rank, area_type, msg);
1467 if (rc < 0) 1411 if (rc < 0)
1468 return; 1412 goto err_parsing;
1469 new_mci = get_mci_for_node_id(socket); 1413 new_mci = get_mci_for_node_id(socket);
1470 if (!new_mci) { 1414 if (!new_mci) {
1471 edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); 1415 strcpy(msg, "Error: socket got corrupted!");
1472 return; 1416 goto err_parsing;
1473 } 1417 }
1474 mci = new_mci; 1418 mci = new_mci;
1475 pvt = mci->pvt_info; 1419 pvt = mci->pvt_info;
@@ -1483,8 +1427,6 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1483 else 1427 else
1484 dimm = 2; 1428 dimm = 2;
1485 1429
1486 csrow = pvt->csrow_map[first_channel][dimm];
1487
1488 if (uncorrected_error && recoverable) 1430 if (uncorrected_error && recoverable)
1489 recoverable_msg = " recoverable"; 1431 recoverable_msg = " recoverable";
1490 else 1432 else
@@ -1495,18 +1437,14 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1495 * Probably, we can just discard it, as the channel information 1437 * Probably, we can just discard it, as the channel information
1496 * comes from the get_memory_error_data() address decoding 1438 * comes from the get_memory_error_data() address decoding
1497 */ 1439 */
1498 msg = kasprintf(GFP_ATOMIC, 1440 snprintf(msg, sizeof(msg),
1499 "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " 1441 "%d error(s)%s: %s%s: cpu=%d Err=%04x:%04x addr = 0x%08llx socket=%d Channel=%ld(mask=%ld), rank=%d\n",
1500 "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n",
1501 core_err_cnt, 1442 core_err_cnt,
1443 overflow ? " OVERFLOW" : "",
1502 area_type, 1444 area_type,
1503 optype,
1504 type,
1505 recoverable_msg, 1445 recoverable_msg,
1506 overflow ? "OVERFLOW" : "",
1507 m->cpu, 1446 m->cpu,
1508 mscod, errcode, 1447 mscod, errcode,
1509 channel, /* 1111b means not specified */
1510 (long long) m->addr, 1448 (long long) m->addr,
1511 socket, 1449 socket,
1512 first_channel, /* This is the real channel on SB */ 1450 first_channel, /* This is the real channel on SB */
@@ -1515,13 +1453,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1515 1453
1516 debugf0("%s", msg); 1454 debugf0("%s", msg);
1517 1455
1456 /* FIXME: need support for channel mask */
1457
1518 /* Call the helper to output message */ 1458 /* Call the helper to output message */
1519 if (uncorrected_error) 1459 edac_mc_handle_error(tp_event, mci,
1520 edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); 1460 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
1521 else 1461 channel, dimm, -1,
1522 edac_mc_handle_fbd_ce(mci, csrow, 0, msg); 1462 optype, msg, m);
1463 return;
1464err_parsing:
1465 edac_mc_handle_error(tp_event, mci, 0, 0, 0,
1466 -1, -1, -1,
1467 msg, "", m);
1523 1468
1524 kfree(msg);
1525} 1469}
1526 1470
1527/* 1471/*
@@ -1680,16 +1624,25 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1680static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) 1624static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1681{ 1625{
1682 struct mem_ctl_info *mci; 1626 struct mem_ctl_info *mci;
1627 struct edac_mc_layer layers[2];
1683 struct sbridge_pvt *pvt; 1628 struct sbridge_pvt *pvt;
1684 int rc, channels, csrows; 1629 int rc;
1685 1630
1686 /* Check the number of active and not disabled channels */ 1631 /* Check the number of active and not disabled channels */
1687 rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); 1632 rc = check_if_ecc_is_active(sbridge_dev->bus);
1688 if (unlikely(rc < 0)) 1633 if (unlikely(rc < 0))
1689 return rc; 1634 return rc;
1690 1635
1691 /* allocate a new MC control structure */ 1636 /* allocate a new MC control structure */
1692 mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); 1637 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1638 layers[0].size = NUM_CHANNELS;
1639 layers[0].is_virt_csrow = false;
1640 layers[1].type = EDAC_MC_LAYER_SLOT;
1641 layers[1].size = MAX_DIMMS;
1642 layers[1].is_virt_csrow = true;
1643 mci = new_edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
1644 sizeof(*pvt));
1645
1693 if (unlikely(!mci)) 1646 if (unlikely(!mci))
1694 return -ENOMEM; 1647 return -ENOMEM;
1695 1648