aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/sb_edac.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 21:32:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 21:32:37 -0400
commit87a5af24e54857e7b15c1f1b0468512ee65c94e3 (patch)
treeee346852a0fc27f172a5eb57b6e3c7bf111f2fad /drivers/edac/sb_edac.c
parent7e5b2db77b05746613516599c916a8cc2e321077 (diff)
parent0bf09e829dd4b07227ed5a8bc4ac85752a044458 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC internal API changes from Mauro Carvalho Chehab: "This changeset is the first part of a series of patches that fixes the EDAC sybsystem. On this set, it changes the Kernel EDAC API in order to properly represent the Intel i3/i5/i7, Xeon 3xxx/5xxx/7xxx, and Intel E5-xxxx memory controllers. The EDAC core used to assume that: - the DRAM chip select pin is directly accessed by the memory controller - when multiple channels are used, they're all filled with the same type of memory. None of the above premises is true on Intel memory controllers since 2002, when RAMBUS and FB-DIMMs were introduced, and Advanced Memory Buffer or by some similar technologies hides the direct access to the DRAM pins. So, the existing drivers for those chipsets had to lie to the EDAC core, in general telling that just one channel is filled. That produces some hard to understand error messages like: EDAC MC0: CE row 3, channel 0, label "DIMM1": 1 Unknown error(s): memory read error on FATAL area : cpu=0 Err=0008:00c2 (ch=2), addr = 0xad1f73480 => socket=0, Channel=0(mask=2), rank=1 The location information there (row3 channel 0) is completely bogus: it has no physical meaning, and are just some random values that the driver uses to talk with the EDAC core. The error actually happened at CPU socket 0, channel 0, slot 1, but this is not reported anywhere, as the EDAC core doesn't know anything about the memory layout. So, only advanced users that know how the EDAC driver works and that tests their systems to see how DIMMs are mapped can actually benefit for such error logs. This patch series fixes the error report logic, in order to allow the EDAC to expose the memory architecture used by them to the EDAC core. So, as the EDAC core now understands how the memory is organized, it can provide an useful report: EDAC MC0: CE memory read error on DIMM1 (channel:0 slot:1 page:0x364b1b offset:0x600 grain:32 syndrome:0x0 - count:1 area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:4) The location of the DIMM where the error happened is reported by "MC0" (cpu socket #0), at "channel:0 slot:1" location, and matches the physical location of the DIMM. There are two remaining issues not covered by this patch series: - The EDAC sysfs API will still report bogus values. So, userspace tools like edac-utils will still use the bogus data; - Add a new tracepoint-based way to get the binary information about the errors. Those are on a second series of patches (also at -next), but will probably miss the train for 3.5, due to the slow review process." Fix up trivial conflict (due to spelling correction of removed code) in drivers/edac/edac_device.c * git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (42 commits) i7core: fix ranks information at the per-channel struct i5000: Fix the fatal error handling i5100_edac: Fix a warning when compiled with 32 bits i82975x_edac: Test nr_pages earlier to save a few CPU cycles e752x_edac: provide more info about how DIMMS/ranks are mapped i5000_edac: Fix the logic that retrieves memory information i5400_edac: improve debug messages to better represent the filled memory edac: Cleanup the logs for i7core and sb edac drivers edac: Initialize the dimm label with the known information edac: Remove the legacy EDAC ABI x38_edac: convert driver to use the new edac ABI tile_edac: convert driver to use the new edac ABI sb_edac: convert driver to use the new edac ABI r82600_edac: convert driver to use the new edac ABI ppc4xx_edac: convert driver to use the new edac ABI pasemi_edac: convert driver to use the new edac ABI mv64x60_edac: convert driver to use the new edac ABI mpc85xx_edac: convert driver to use the new edac ABI i82975x_edac: convert driver to use the new edac ABI i82875p_edac: convert driver to use the new edac ABI ...
Diffstat (limited to 'drivers/edac/sb_edac.c')
-rw-r--r--drivers/edac/sb_edac.c212
1 files changed, 78 insertions, 134 deletions
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 123204f8e23b..4adaf4b7da99 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -314,8 +314,6 @@ struct sbridge_pvt {
314 struct sbridge_info info; 314 struct sbridge_info info;
315 struct sbridge_channel channel[NUM_CHANNELS]; 315 struct sbridge_channel channel[NUM_CHANNELS];
316 316
317 int csrow_map[NUM_CHANNELS][MAX_DIMMS];
318
319 /* Memory type detection */ 317 /* Memory type detection */
320 bool is_mirrored, is_lockstep, is_close_pg; 318 bool is_mirrored, is_lockstep, is_close_pg;
321 319
@@ -487,29 +485,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
487} 485}
488 486
489/** 487/**
490 * sbridge_get_active_channels() - gets the number of channels and csrows 488 * check_if_ecc_is_active() - Checks if ECC is active
491 * bus: Device bus 489 * bus: Device bus
492 * @channels: Number of channels that will be returned
493 * @csrows: Number of csrows found
494 *
495 * Since EDAC core needs to know in advance the number of available channels
496 * and csrows, in order to allocate memory for csrows/channels, it is needed
497 * to run two similar steps. At the first step, implemented on this function,
498 * it checks the number of csrows/channels present at one socket, identified
499 * by the associated PCI bus.
500 * this is used in order to properly allocate the size of mci components.
501 * Note: one csrow is one dimm.
502 */ 490 */
503static int sbridge_get_active_channels(const u8 bus, unsigned *channels, 491static int check_if_ecc_is_active(const u8 bus)
504 unsigned *csrows)
505{ 492{
506 struct pci_dev *pdev = NULL; 493 struct pci_dev *pdev = NULL;
507 int i, j;
508 u32 mcmtr; 494 u32 mcmtr;
509 495
510 *channels = 0;
511 *csrows = 0;
512
513 pdev = get_pdev_slot_func(bus, 15, 0); 496 pdev = get_pdev_slot_func(bus, 15, 0);
514 if (!pdev) { 497 if (!pdev) {
515 sbridge_printk(KERN_ERR, "Couldn't find PCI device " 498 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
@@ -523,41 +506,14 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
523 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); 506 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
524 return -ENODEV; 507 return -ENODEV;
525 } 508 }
526
527 for (i = 0; i < NUM_CHANNELS; i++) {
528 u32 mtr;
529
530 /* Device 15 functions 2 - 5 */
531 pdev = get_pdev_slot_func(bus, 15, 2 + i);
532 if (!pdev) {
533 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
534 "%2x.%02d.%d!!!\n",
535 bus, 15, 2 + i);
536 return -ENODEV;
537 }
538 (*channels)++;
539
540 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
541 pci_read_config_dword(pdev, mtr_regs[j], &mtr);
542 debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr);
543 if (IS_DIMM_PRESENT(mtr))
544 (*csrows)++;
545 }
546 }
547
548 debugf0("Number of active channels: %d, number of active dimms: %d\n",
549 *channels, *csrows);
550
551 return 0; 509 return 0;
552} 510}
553 511
554static int get_dimm_config(const struct mem_ctl_info *mci) 512static int get_dimm_config(struct mem_ctl_info *mci)
555{ 513{
556 struct sbridge_pvt *pvt = mci->pvt_info; 514 struct sbridge_pvt *pvt = mci->pvt_info;
557 struct csrow_info *csr; 515 struct dimm_info *dimm;
558 int i, j, banks, ranks, rows, cols, size, npages; 516 int i, j, banks, ranks, rows, cols, size, npages;
559 int csrow = 0;
560 unsigned long last_page = 0;
561 u32 reg; 517 u32 reg;
562 enum edac_type mode; 518 enum edac_type mode;
563 enum mem_type mtype; 519 enum mem_type mtype;
@@ -616,6 +572,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
616 u32 mtr; 572 u32 mtr;
617 573
618 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { 574 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
575 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
576 i, j, 0);
619 pci_read_config_dword(pvt->pci_tad[i], 577 pci_read_config_dword(pvt->pci_tad[i],
620 mtr_regs[j], &mtr); 578 mtr_regs[j], &mtr);
621 debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); 579 debugf4("Channel #%d MTR%d = %x\n", i, j, mtr);
@@ -634,29 +592,15 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
634 pvt->sbridge_dev->mc, i, j, 592 pvt->sbridge_dev->mc, i, j,
635 size, npages, 593 size, npages,
636 banks, ranks, rows, cols); 594 banks, ranks, rows, cols);
637 csr = &mci->csrows[csrow]; 595
638 596 dimm->nr_pages = npages;
639 csr->first_page = last_page; 597 dimm->grain = 32;
640 csr->last_page = last_page + npages - 1; 598 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
641 csr->page_mask = 0UL; /* Unused */ 599 dimm->mtype = mtype;
642 csr->nr_pages = npages; 600 dimm->edac_mode = mode;
643 csr->grain = 32; 601 snprintf(dimm->label, sizeof(dimm->label),
644 csr->csrow_idx = csrow;
645 csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
646 csr->ce_count = 0;
647 csr->ue_count = 0;
648 csr->mtype = mtype;
649 csr->edac_mode = mode;
650 csr->nr_channels = 1;
651 csr->channels[0].chan_idx = i;
652 csr->channels[0].ce_count = 0;
653 pvt->csrow_map[i][j] = csrow;
654 snprintf(csr->channels[0].label,
655 sizeof(csr->channels[0].label),
656 "CPU_SrcID#%u_Channel#%u_DIMM#%u", 602 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
657 pvt->sbridge_dev->source_id, i, j); 603 pvt->sbridge_dev->source_id, i, j);
658 last_page += npages;
659 csrow++;
660 } 604 }
661 } 605 }
662 } 606 }
@@ -844,11 +788,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
844 u8 *socket, 788 u8 *socket,
845 long *channel_mask, 789 long *channel_mask,
846 u8 *rank, 790 u8 *rank,
847 char *area_type) 791 char **area_type, char *msg)
848{ 792{
849 struct mem_ctl_info *new_mci; 793 struct mem_ctl_info *new_mci;
850 struct sbridge_pvt *pvt = mci->pvt_info; 794 struct sbridge_pvt *pvt = mci->pvt_info;
851 char msg[256];
852 int n_rir, n_sads, n_tads, sad_way, sck_xch; 795 int n_rir, n_sads, n_tads, sad_way, sck_xch;
853 int sad_interl, idx, base_ch; 796 int sad_interl, idx, base_ch;
854 int interleave_mode; 797 int interleave_mode;
@@ -870,12 +813,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
870 */ 813 */
871 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { 814 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
872 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); 815 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
873 edac_mc_handle_ce_no_info(mci, msg);
874 return -EINVAL; 816 return -EINVAL;
875 } 817 }
876 if (addr >= (u64)pvt->tohm) { 818 if (addr >= (u64)pvt->tohm) {
877 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); 819 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
878 edac_mc_handle_ce_no_info(mci, msg);
879 return -EINVAL; 820 return -EINVAL;
880 } 821 }
881 822
@@ -892,7 +833,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
892 limit = SAD_LIMIT(reg); 833 limit = SAD_LIMIT(reg);
893 if (limit <= prv) { 834 if (limit <= prv) {
894 sprintf(msg, "Can't discover the memory socket"); 835 sprintf(msg, "Can't discover the memory socket");
895 edac_mc_handle_ce_no_info(mci, msg);
896 return -EINVAL; 836 return -EINVAL;
897 } 837 }
898 if (addr <= limit) 838 if (addr <= limit)
@@ -901,10 +841,9 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
901 } 841 }
902 if (n_sads == MAX_SAD) { 842 if (n_sads == MAX_SAD) {
903 sprintf(msg, "Can't discover the memory socket"); 843 sprintf(msg, "Can't discover the memory socket");
904 edac_mc_handle_ce_no_info(mci, msg);
905 return -EINVAL; 844 return -EINVAL;
906 } 845 }
907 area_type = get_dram_attr(reg); 846 *area_type = get_dram_attr(reg);
908 interleave_mode = INTERLEAVE_MODE(reg); 847 interleave_mode = INTERLEAVE_MODE(reg);
909 848
910 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], 849 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@ -942,7 +881,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
942 break; 881 break;
943 default: 882 default:
944 sprintf(msg, "Can't discover socket interleave"); 883 sprintf(msg, "Can't discover socket interleave");
945 edac_mc_handle_ce_no_info(mci, msg);
946 return -EINVAL; 884 return -EINVAL;
947 } 885 }
948 *socket = sad_interleave[idx]; 886 *socket = sad_interleave[idx];
@@ -957,7 +895,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
957 if (!new_mci) { 895 if (!new_mci) {
958 sprintf(msg, "Struct for socket #%u wasn't initialized", 896 sprintf(msg, "Struct for socket #%u wasn't initialized",
959 *socket); 897 *socket);
960 edac_mc_handle_ce_no_info(mci, msg);
961 return -EINVAL; 898 return -EINVAL;
962 } 899 }
963 mci = new_mci; 900 mci = new_mci;
@@ -973,7 +910,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
973 limit = TAD_LIMIT(reg); 910 limit = TAD_LIMIT(reg);
974 if (limit <= prv) { 911 if (limit <= prv) {
975 sprintf(msg, "Can't discover the memory channel"); 912 sprintf(msg, "Can't discover the memory channel");
976 edac_mc_handle_ce_no_info(mci, msg);
977 return -EINVAL; 913 return -EINVAL;
978 } 914 }
979 if (addr <= limit) 915 if (addr <= limit)
@@ -1013,7 +949,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1013 break; 949 break;
1014 default: 950 default:
1015 sprintf(msg, "Can't discover the TAD target"); 951 sprintf(msg, "Can't discover the TAD target");
1016 edac_mc_handle_ce_no_info(mci, msg);
1017 return -EINVAL; 952 return -EINVAL;
1018 } 953 }
1019 *channel_mask = 1 << base_ch; 954 *channel_mask = 1 << base_ch;
@@ -1027,7 +962,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1027 break; 962 break;
1028 default: 963 default:
1029 sprintf(msg, "Invalid mirror set. Can't decode addr"); 964 sprintf(msg, "Invalid mirror set. Can't decode addr");
1030 edac_mc_handle_ce_no_info(mci, msg);
1031 return -EINVAL; 965 return -EINVAL;
1032 } 966 }
1033 } else 967 } else
@@ -1055,7 +989,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1055 if (offset > addr) { 989 if (offset > addr) {
1056 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", 990 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1057 offset, addr); 991 offset, addr);
1058 edac_mc_handle_ce_no_info(mci, msg);
1059 return -EINVAL; 992 return -EINVAL;
1060 } 993 }
1061 addr -= offset; 994 addr -= offset;
@@ -1095,7 +1028,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1095 if (n_rir == MAX_RIR_RANGES) { 1028 if (n_rir == MAX_RIR_RANGES) {
1096 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", 1029 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1097 ch_addr); 1030 ch_addr);
1098 edac_mc_handle_ce_no_info(mci, msg);
1099 return -EINVAL; 1031 return -EINVAL;
1100 } 1032 }
1101 rir_way = RIR_WAY(reg); 1033 rir_way = RIR_WAY(reg);
@@ -1409,7 +1341,8 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1409{ 1341{
1410 struct mem_ctl_info *new_mci; 1342 struct mem_ctl_info *new_mci;
1411 struct sbridge_pvt *pvt = mci->pvt_info; 1343 struct sbridge_pvt *pvt = mci->pvt_info;
1412 char *type, *optype, *msg, *recoverable_msg; 1344 enum hw_event_mc_err_type tp_event;
1345 char *type, *optype, msg[256];
1413 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 1346 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1414 bool overflow = GET_BITFIELD(m->status, 62, 62); 1347 bool overflow = GET_BITFIELD(m->status, 62, 62);
1415 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 1348 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1421,13 +1354,21 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1421 u32 optypenum = GET_BITFIELD(m->status, 4, 6); 1354 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1422 long channel_mask, first_channel; 1355 long channel_mask, first_channel;
1423 u8 rank, socket; 1356 u8 rank, socket;
1424 int csrow, rc, dimm; 1357 int rc, dimm;
1425 char *area_type = "Unknown"; 1358 char *area_type = NULL;
1426 1359
1427 if (ripv) 1360 if (uncorrected_error) {
1428 type = "NON_FATAL"; 1361 if (ripv) {
1429 else 1362 type = "FATAL";
1430 type = "FATAL"; 1363 tp_event = HW_EVENT_ERR_FATAL;
1364 } else {
1365 type = "NON_FATAL";
1366 tp_event = HW_EVENT_ERR_UNCORRECTED;
1367 }
1368 } else {
1369 type = "CORRECTED";
1370 tp_event = HW_EVENT_ERR_CORRECTED;
1371 }
1431 1372
1432 /* 1373 /*
1433 * According with Table 15-9 of the Intel Architecture spec vol 3A, 1374 * According with Table 15-9 of the Intel Architecture spec vol 3A,
@@ -1445,19 +1386,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1445 } else { 1386 } else {
1446 switch (optypenum) { 1387 switch (optypenum) {
1447 case 0: 1388 case 0:
1448 optype = "generic undef request"; 1389 optype = "generic undef request error";
1449 break; 1390 break;
1450 case 1: 1391 case 1:
1451 optype = "memory read"; 1392 optype = "memory read error";
1452 break; 1393 break;
1453 case 2: 1394 case 2:
1454 optype = "memory write"; 1395 optype = "memory write error";
1455 break; 1396 break;
1456 case 3: 1397 case 3:
1457 optype = "addr/cmd"; 1398 optype = "addr/cmd error";
1458 break; 1399 break;
1459 case 4: 1400 case 4:
1460 optype = "memory scrubbing"; 1401 optype = "memory scrubbing error";
1461 break; 1402 break;
1462 default: 1403 default:
1463 optype = "reserved"; 1404 optype = "reserved";
@@ -1466,13 +1407,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1466 } 1407 }
1467 1408
1468 rc = get_memory_error_data(mci, m->addr, &socket, 1409 rc = get_memory_error_data(mci, m->addr, &socket,
1469 &channel_mask, &rank, area_type); 1410 &channel_mask, &rank, &area_type, msg);
1470 if (rc < 0) 1411 if (rc < 0)
1471 return; 1412 goto err_parsing;
1472 new_mci = get_mci_for_node_id(socket); 1413 new_mci = get_mci_for_node_id(socket);
1473 if (!new_mci) { 1414 if (!new_mci) {
1474 edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); 1415 strcpy(msg, "Error: socket got corrupted!");
1475 return; 1416 goto err_parsing;
1476 } 1417 }
1477 mci = new_mci; 1418 mci = new_mci;
1478 pvt = mci->pvt_info; 1419 pvt = mci->pvt_info;
@@ -1486,45 +1427,39 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1486 else 1427 else
1487 dimm = 2; 1428 dimm = 2;
1488 1429
1489 csrow = pvt->csrow_map[first_channel][dimm];
1490
1491 if (uncorrected_error && recoverable)
1492 recoverable_msg = " recoverable";
1493 else
1494 recoverable_msg = "";
1495 1430
1496 /* 1431 /*
1497 * FIXME: What should we do with "channel" information on mcelog? 1432 * FIXME: On some memory configurations (mirror, lockstep), the
1498 * Probably, we can just discard it, as the channel information 1433 * Memory Controller can't point the error to a single DIMM. The
1499 * comes from the get_memory_error_data() address decoding 1434 * EDAC core should be handling the channel mask, in order to point
1435 * to the group of dimm's where the error may be happening.
1500 */ 1436 */
1501 msg = kasprintf(GFP_ATOMIC, 1437 snprintf(msg, sizeof(msg),
1502 "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " 1438 "count:%d%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
1503 "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n", 1439 core_err_cnt,
1504 core_err_cnt, 1440 overflow ? " OVERFLOW" : "",
1505 area_type, 1441 (uncorrected_error && recoverable) ? " recoverable" : "",
1506 optype, 1442 area_type,
1507 type, 1443 mscod, errcode,
1508 recoverable_msg, 1444 socket,
1509 overflow ? "OVERFLOW" : "", 1445 channel_mask,
1510 m->cpu, 1446 rank);
1511 mscod, errcode,
1512 channel, /* 1111b means not specified */
1513 (long long) m->addr,
1514 socket,
1515 first_channel, /* This is the real channel on SB */
1516 channel_mask,
1517 rank);
1518 1447
1519 debugf0("%s", msg); 1448 debugf0("%s", msg);
1520 1449
1450 /* FIXME: need support for channel mask */
1451
1521 /* Call the helper to output message */ 1452 /* Call the helper to output message */
1522 if (uncorrected_error) 1453 edac_mc_handle_error(tp_event, mci,
1523 edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); 1454 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
1524 else 1455 channel, dimm, -1,
1525 edac_mc_handle_fbd_ce(mci, csrow, 0, msg); 1456 optype, msg, m);
1457 return;
1458err_parsing:
1459 edac_mc_handle_error(tp_event, mci, 0, 0, 0,
1460 -1, -1, -1,
1461 msg, "", m);
1526 1462
1527 kfree(msg);
1528} 1463}
1529 1464
1530/* 1465/*
@@ -1683,16 +1618,25 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1683static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) 1618static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1684{ 1619{
1685 struct mem_ctl_info *mci; 1620 struct mem_ctl_info *mci;
1621 struct edac_mc_layer layers[2];
1686 struct sbridge_pvt *pvt; 1622 struct sbridge_pvt *pvt;
1687 int rc, channels, csrows; 1623 int rc;
1688 1624
1689 /* Check the number of active and not disabled channels */ 1625 /* Check the number of active and not disabled channels */
1690 rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); 1626 rc = check_if_ecc_is_active(sbridge_dev->bus);
1691 if (unlikely(rc < 0)) 1627 if (unlikely(rc < 0))
1692 return rc; 1628 return rc;
1693 1629
1694 /* allocate a new MC control structure */ 1630 /* allocate a new MC control structure */
1695 mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); 1631 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1632 layers[0].size = NUM_CHANNELS;
1633 layers[0].is_virt_csrow = false;
1634 layers[1].type = EDAC_MC_LAYER_SLOT;
1635 layers[1].size = MAX_DIMMS;
1636 layers[1].is_virt_csrow = true;
1637 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
1638 sizeof(*pvt));
1639
1696 if (unlikely(!mci)) 1640 if (unlikely(!mci))
1697 return -ENOMEM; 1641 return -ENOMEM;
1698 1642