diff options
Diffstat (limited to 'drivers/edac/amd64_edac.c')
| -rw-r--r-- | drivers/edac/amd64_edac.c | 297 |
1 files changed, 113 insertions, 184 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cc8e7c78a23c..f74a684269ff 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
| @@ -60,8 +60,8 @@ struct scrubrate { | |||
| 60 | { 0x00, 0UL}, /* scrubbing off */ | 60 | { 0x00, 0UL}, /* scrubbing off */ |
| 61 | }; | 61 | }; |
| 62 | 62 | ||
| 63 | static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, | 63 | int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, |
| 64 | u32 *val, const char *func) | 64 | u32 *val, const char *func) |
| 65 | { | 65 | { |
| 66 | int err = 0; | 66 | int err = 0; |
| 67 | 67 | ||
| @@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, | |||
| 423 | u64 *hole_offset, u64 *hole_size) | 423 | u64 *hole_offset, u64 *hole_size) |
| 424 | { | 424 | { |
| 425 | struct amd64_pvt *pvt = mci->pvt_info; | 425 | struct amd64_pvt *pvt = mci->pvt_info; |
| 426 | u64 base; | ||
| 427 | 426 | ||
| 428 | /* only revE and later have the DRAM Hole Address Register */ | 427 | /* only revE and later have the DRAM Hole Address Register */ |
| 429 | if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { | 428 | if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { |
| @@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, | |||
| 462 | * addresses in the hole so that they start at 0x100000000. | 461 | * addresses in the hole so that they start at 0x100000000. |
| 463 | */ | 462 | */ |
| 464 | 463 | ||
| 465 | base = dhar_base(pvt); | 464 | *hole_base = dhar_base(pvt); |
| 466 | 465 | *hole_size = (1ULL << 32) - *hole_base; | |
| 467 | *hole_base = base; | ||
| 468 | *hole_size = (0x1ull << 32) - base; | ||
| 469 | 466 | ||
| 470 | if (boot_cpu_data.x86 > 0xf) | 467 | if (boot_cpu_data.x86 > 0xf) |
| 471 | *hole_offset = f10_dhar_offset(pvt); | 468 | *hole_offset = f10_dhar_offset(pvt); |
| @@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) | |||
| 513 | { | 510 | { |
| 514 | struct amd64_pvt *pvt = mci->pvt_info; | 511 | struct amd64_pvt *pvt = mci->pvt_info; |
| 515 | u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; | 512 | u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; |
| 516 | int ret = 0; | 513 | int ret; |
| 517 | 514 | ||
| 518 | dram_base = get_dram_base(pvt, pvt->mc_node_id); | 515 | dram_base = get_dram_base(pvt, pvt->mc_node_id); |
| 519 | 516 | ||
| 520 | ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, | 517 | ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, |
| 521 | &hole_size); | 518 | &hole_size); |
| 522 | if (!ret) { | 519 | if (!ret) { |
| 523 | if ((sys_addr >= (1ull << 32)) && | 520 | if ((sys_addr >= (1ULL << 32)) && |
| 524 | (sys_addr < ((1ull << 32) + hole_size))) { | 521 | (sys_addr < ((1ULL << 32) + hole_size))) { |
| 525 | /* use DHAR to translate SysAddr to DramAddr */ | 522 | /* use DHAR to translate SysAddr to DramAddr */ |
| 526 | dram_addr = sys_addr - hole_offset; | 523 | dram_addr = sys_addr - hole_offset; |
| 527 | 524 | ||
| @@ -712,10 +709,10 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci, | |||
| 712 | 709 | ||
| 713 | /* Map the Error address to a PAGE and PAGE OFFSET. */ | 710 | /* Map the Error address to a PAGE and PAGE OFFSET. */ |
| 714 | static inline void error_address_to_page_and_offset(u64 error_address, | 711 | static inline void error_address_to_page_and_offset(u64 error_address, |
| 715 | u32 *page, u32 *offset) | 712 | struct err_info *err) |
| 716 | { | 713 | { |
| 717 | *page = (u32) (error_address >> PAGE_SHIFT); | 714 | err->page = (u32) (error_address >> PAGE_SHIFT); |
| 718 | *offset = ((u32) error_address) & ~PAGE_MASK; | 715 | err->offset = ((u32) error_address) & ~PAGE_MASK; |
| 719 | } | 716 | } |
| 720 | 717 | ||
| 721 | /* | 718 | /* |
| @@ -1026,59 +1023,44 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) | |||
| 1026 | } | 1023 | } |
| 1027 | 1024 | ||
| 1028 | static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, | 1025 | static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, |
| 1029 | u16 syndrome) | 1026 | struct err_info *err) |
| 1030 | { | 1027 | { |
| 1031 | struct mem_ctl_info *src_mci; | ||
| 1032 | struct amd64_pvt *pvt = mci->pvt_info; | 1028 | struct amd64_pvt *pvt = mci->pvt_info; |
| 1033 | int channel, csrow; | ||
| 1034 | u32 page, offset; | ||
| 1035 | 1029 | ||
| 1036 | error_address_to_page_and_offset(sys_addr, &page, &offset); | 1030 | error_address_to_page_and_offset(sys_addr, err); |
| 1037 | 1031 | ||
| 1038 | /* | 1032 | /* |
| 1039 | * Find out which node the error address belongs to. This may be | 1033 | * Find out which node the error address belongs to. This may be |
| 1040 | * different from the node that detected the error. | 1034 | * different from the node that detected the error. |
| 1041 | */ | 1035 | */ |
| 1042 | src_mci = find_mc_by_sys_addr(mci, sys_addr); | 1036 | err->src_mci = find_mc_by_sys_addr(mci, sys_addr); |
| 1043 | if (!src_mci) { | 1037 | if (!err->src_mci) { |
| 1044 | amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", | 1038 | amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", |
| 1045 | (unsigned long)sys_addr); | 1039 | (unsigned long)sys_addr); |
| 1046 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, | 1040 | err->err_code = ERR_NODE; |
| 1047 | page, offset, syndrome, | ||
| 1048 | -1, -1, -1, | ||
| 1049 | "failed to map error addr to a node", | ||
| 1050 | ""); | ||
| 1051 | return; | 1041 | return; |
| 1052 | } | 1042 | } |
| 1053 | 1043 | ||
| 1054 | /* Now map the sys_addr to a CSROW */ | 1044 | /* Now map the sys_addr to a CSROW */ |
| 1055 | csrow = sys_addr_to_csrow(src_mci, sys_addr); | 1045 | err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr); |
| 1056 | if (csrow < 0) { | 1046 | if (err->csrow < 0) { |
| 1057 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, | 1047 | err->err_code = ERR_CSROW; |
| 1058 | page, offset, syndrome, | ||
| 1059 | -1, -1, -1, | ||
| 1060 | "failed to map error addr to a csrow", | ||
| 1061 | ""); | ||
| 1062 | return; | 1048 | return; |
| 1063 | } | 1049 | } |
| 1064 | 1050 | ||
| 1065 | /* CHIPKILL enabled */ | 1051 | /* CHIPKILL enabled */ |
| 1066 | if (pvt->nbcfg & NBCFG_CHIPKILL) { | 1052 | if (pvt->nbcfg & NBCFG_CHIPKILL) { |
| 1067 | channel = get_channel_from_ecc_syndrome(mci, syndrome); | 1053 | err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); |
| 1068 | if (channel < 0) { | 1054 | if (err->channel < 0) { |
| 1069 | /* | 1055 | /* |
| 1070 | * Syndrome didn't map, so we don't know which of the | 1056 | * Syndrome didn't map, so we don't know which of the |
| 1071 | * 2 DIMMs is in error. So we need to ID 'both' of them | 1057 | * 2 DIMMs is in error. So we need to ID 'both' of them |
| 1072 | * as suspect. | 1058 | * as suspect. |
| 1073 | */ | 1059 | */ |
| 1074 | amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - " | 1060 | amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - " |
| 1075 | "possible error reporting race\n", | 1061 | "possible error reporting race\n", |
| 1076 | syndrome); | 1062 | err->syndrome); |
| 1077 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, | 1063 | err->err_code = ERR_CHANNEL; |
| 1078 | page, offset, syndrome, | ||
| 1079 | csrow, -1, -1, | ||
| 1080 | "unknown syndrome - possible error reporting race", | ||
| 1081 | ""); | ||
| 1082 | return; | 1064 | return; |
| 1083 | } | 1065 | } |
| 1084 | } else { | 1066 | } else { |
| @@ -1090,13 +1072,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, | |||
| 1090 | * was obtained from email communication with someone at AMD. | 1072 | * was obtained from email communication with someone at AMD. |
| 1091 | * (Wish the email was placed in this comment - norsk) | 1073 | * (Wish the email was placed in this comment - norsk) |
| 1092 | */ | 1074 | */ |
| 1093 | channel = ((sys_addr & BIT(3)) != 0); | 1075 | err->channel = ((sys_addr & BIT(3)) != 0); |
| 1094 | } | 1076 | } |
| 1095 | |||
| 1096 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1, | ||
| 1097 | page, offset, syndrome, | ||
| 1098 | csrow, channel, -1, | ||
| 1099 | "", ""); | ||
| 1100 | } | 1077 | } |
| 1101 | 1078 | ||
| 1102 | static int ddr2_cs_size(unsigned i, bool dct_width) | 1079 | static int ddr2_cs_size(unsigned i, bool dct_width) |
| @@ -1482,7 +1459,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) | |||
| 1482 | 1459 | ||
| 1483 | /* For a given @dram_range, check if @sys_addr falls within it. */ | 1460 | /* For a given @dram_range, check if @sys_addr falls within it. */ |
| 1484 | static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, | 1461 | static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, |
| 1485 | u64 sys_addr, int *nid, int *chan_sel) | 1462 | u64 sys_addr, int *chan_sel) |
| 1486 | { | 1463 | { |
| 1487 | int cs_found = -EINVAL; | 1464 | int cs_found = -EINVAL; |
| 1488 | u64 chan_addr; | 1465 | u64 chan_addr; |
| @@ -1555,15 +1532,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, | |||
| 1555 | 1532 | ||
| 1556 | cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); | 1533 | cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); |
| 1557 | 1534 | ||
| 1558 | if (cs_found >= 0) { | 1535 | if (cs_found >= 0) |
| 1559 | *nid = node_id; | ||
| 1560 | *chan_sel = channel; | 1536 | *chan_sel = channel; |
| 1561 | } | 1537 | |
| 1562 | return cs_found; | 1538 | return cs_found; |
| 1563 | } | 1539 | } |
| 1564 | 1540 | ||
| 1565 | static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, | 1541 | static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, |
| 1566 | int *node, int *chan_sel) | 1542 | int *chan_sel) |
| 1567 | { | 1543 | { |
| 1568 | int cs_found = -EINVAL; | 1544 | int cs_found = -EINVAL; |
| 1569 | unsigned range; | 1545 | unsigned range; |
| @@ -1577,8 +1553,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, | |||
| 1577 | (get_dram_limit(pvt, range) >= sys_addr)) { | 1553 | (get_dram_limit(pvt, range) >= sys_addr)) { |
| 1578 | 1554 | ||
| 1579 | cs_found = f1x_match_to_this_node(pvt, range, | 1555 | cs_found = f1x_match_to_this_node(pvt, range, |
| 1580 | sys_addr, node, | 1556 | sys_addr, chan_sel); |
| 1581 | chan_sel); | ||
| 1582 | if (cs_found >= 0) | 1557 | if (cs_found >= 0) |
| 1583 | break; | 1558 | break; |
| 1584 | } | 1559 | } |
| @@ -1594,22 +1569,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, | |||
| 1594 | * (MCX_ADDR). | 1569 | * (MCX_ADDR). |
| 1595 | */ | 1570 | */ |
| 1596 | static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, | 1571 | static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, |
| 1597 | u16 syndrome) | 1572 | struct err_info *err) |
| 1598 | { | 1573 | { |
| 1599 | struct amd64_pvt *pvt = mci->pvt_info; | 1574 | struct amd64_pvt *pvt = mci->pvt_info; |
| 1600 | u32 page, offset; | ||
| 1601 | int nid, csrow, chan = 0; | ||
| 1602 | 1575 | ||
| 1603 | error_address_to_page_and_offset(sys_addr, &page, &offset); | 1576 | error_address_to_page_and_offset(sys_addr, err); |
| 1604 | 1577 | ||
| 1605 | csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); | 1578 | err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel); |
| 1606 | 1579 | if (err->csrow < 0) { | |
| 1607 | if (csrow < 0) { | 1580 | err->err_code = ERR_CSROW; |
| 1608 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, | ||
| 1609 | page, offset, syndrome, | ||
| 1610 | -1, -1, -1, | ||
| 1611 | "failed to map error addr to a csrow", | ||
| 1612 | ""); | ||
| 1613 | return; | 1581 | return; |
| 1614 | } | 1582 | } |
| 1615 | 1583 | ||
| @@ -1619,12 +1587,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, | |||
| 1619 | * this point. | 1587 | * this point. |
| 1620 | */ | 1588 | */ |
| 1621 | if (dct_ganging_enabled(pvt)) | 1589 | if (dct_ganging_enabled(pvt)) |
| 1622 | chan = get_channel_from_ecc_syndrome(mci, syndrome); | 1590 | err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); |
| 1623 | |||
| 1624 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, | ||
| 1625 | page, offset, syndrome, | ||
| 1626 | csrow, chan, -1, | ||
| 1627 | "", ""); | ||
| 1628 | } | 1591 | } |
| 1629 | 1592 | ||
| 1630 | /* | 1593 | /* |
| @@ -1633,14 +1596,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, | |||
| 1633 | */ | 1596 | */ |
| 1634 | static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) | 1597 | static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) |
| 1635 | { | 1598 | { |
| 1636 | int dimm, size0, size1, factor = 0; | 1599 | int dimm, size0, size1; |
| 1637 | u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; | 1600 | u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; |
| 1638 | u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; | 1601 | u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; |
| 1639 | 1602 | ||
| 1640 | if (boot_cpu_data.x86 == 0xf) { | 1603 | if (boot_cpu_data.x86 == 0xf) { |
| 1641 | if (pvt->dclr0 & WIDTH_128) | ||
| 1642 | factor = 1; | ||
| 1643 | |||
| 1644 | /* K8 families < revF not supported yet */ | 1604 | /* K8 families < revF not supported yet */ |
| 1645 | if (pvt->ext_model < K8_REV_F) | 1605 | if (pvt->ext_model < K8_REV_F) |
| 1646 | return; | 1606 | return; |
| @@ -1671,8 +1631,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) | |||
| 1671 | DBAM_DIMM(dimm, dbam)); | 1631 | DBAM_DIMM(dimm, dbam)); |
| 1672 | 1632 | ||
| 1673 | amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", | 1633 | amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", |
| 1674 | dimm * 2, size0 << factor, | 1634 | dimm * 2, size0, |
| 1675 | dimm * 2 + 1, size1 << factor); | 1635 | dimm * 2 + 1, size1); |
| 1676 | } | 1636 | } |
| 1677 | } | 1637 | } |
| 1678 | 1638 | ||
| @@ -1893,101 +1853,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) | |||
| 1893 | return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); | 1853 | return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); |
| 1894 | } | 1854 | } |
| 1895 | 1855 | ||
| 1896 | /* | 1856 | static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, |
| 1897 | * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR | 1857 | u8 ecc_type) |
| 1898 | * ADDRESS and process. | ||
| 1899 | */ | ||
| 1900 | static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m) | ||
| 1901 | { | ||
| 1902 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 1903 | u64 sys_addr; | ||
| 1904 | u16 syndrome; | ||
| 1905 | |||
| 1906 | /* Ensure that the Error Address is VALID */ | ||
| 1907 | if (!(m->status & MCI_STATUS_ADDRV)) { | ||
| 1908 | amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); | ||
| 1909 | edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, | ||
| 1910 | 0, 0, 0, | ||
| 1911 | -1, -1, -1, | ||
| 1912 | "HW has no ERROR_ADDRESS available", | ||
| 1913 | ""); | ||
| 1914 | return; | ||
| 1915 | } | ||
| 1916 | |||
| 1917 | sys_addr = get_error_address(m); | ||
| 1918 | syndrome = extract_syndrome(m->status); | ||
| 1919 | |||
| 1920 | amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); | ||
| 1921 | |||
| 1922 | pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome); | ||
| 1923 | } | ||
| 1924 | |||
| 1925 | /* Handle any Un-correctable Errors (UEs) */ | ||
| 1926 | static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) | ||
| 1927 | { | 1858 | { |
| 1928 | struct mem_ctl_info *log_mci, *src_mci = NULL; | 1859 | enum hw_event_mc_err_type err_type; |
| 1929 | int csrow; | 1860 | const char *string; |
| 1930 | u64 sys_addr; | ||
| 1931 | u32 page, offset; | ||
| 1932 | |||
| 1933 | log_mci = mci; | ||
| 1934 | 1861 | ||
| 1935 | if (!(m->status & MCI_STATUS_ADDRV)) { | 1862 | if (ecc_type == 2) |
| 1936 | amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); | 1863 | err_type = HW_EVENT_ERR_CORRECTED; |
| 1937 | edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, | 1864 | else if (ecc_type == 1) |
| 1938 | 0, 0, 0, | 1865 | err_type = HW_EVENT_ERR_UNCORRECTED; |
| 1939 | -1, -1, -1, | 1866 | else { |
| 1940 | "HW has no ERROR_ADDRESS available", | 1867 | WARN(1, "Something is rotten in the state of Denmark.\n"); |
| 1941 | ""); | ||
| 1942 | return; | 1868 | return; |
| 1943 | } | 1869 | } |
| 1944 | 1870 | ||
| 1945 | sys_addr = get_error_address(m); | 1871 | switch (err->err_code) { |
| 1946 | error_address_to_page_and_offset(sys_addr, &page, &offset); | 1872 | case DECODE_OK: |
| 1947 | 1873 | string = ""; | |
| 1948 | /* | 1874 | break; |
| 1949 | * Find out which node the error address belongs to. This may be | 1875 | case ERR_NODE: |
| 1950 | * different from the node that detected the error. | 1876 | string = "Failed to map error addr to a node"; |
| 1951 | */ | 1877 | break; |
| 1952 | src_mci = find_mc_by_sys_addr(mci, sys_addr); | 1878 | case ERR_CSROW: |
| 1953 | if (!src_mci) { | 1879 | string = "Failed to map error addr to a csrow"; |
| 1954 | amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", | 1880 | break; |
| 1955 | (unsigned long)sys_addr); | 1881 | case ERR_CHANNEL: |
| 1956 | edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, | 1882 | string = "unknown syndrome - possible error reporting race"; |
| 1957 | page, offset, 0, | 1883 | break; |
| 1958 | -1, -1, -1, | 1884 | default: |
| 1959 | "ERROR ADDRESS NOT mapped to a MC", | 1885 | string = "WTF error"; |
| 1960 | ""); | 1886 | break; |
| 1961 | return; | ||
| 1962 | } | 1887 | } |
| 1963 | 1888 | ||
| 1964 | log_mci = src_mci; | 1889 | edac_mc_handle_error(err_type, mci, 1, |
| 1965 | 1890 | err->page, err->offset, err->syndrome, | |
| 1966 | csrow = sys_addr_to_csrow(log_mci, sys_addr); | 1891 | err->csrow, err->channel, -1, |
| 1967 | if (csrow < 0) { | 1892 | string, ""); |
| 1968 | amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n", | ||
| 1969 | (unsigned long)sys_addr); | ||
| 1970 | edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, | ||
| 1971 | page, offset, 0, | ||
| 1972 | -1, -1, -1, | ||
| 1973 | "ERROR ADDRESS NOT mapped to CS", | ||
| 1974 | ""); | ||
| 1975 | } else { | ||
| 1976 | edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, | ||
| 1977 | page, offset, 0, | ||
| 1978 | csrow, -1, -1, | ||
| 1979 | "", ""); | ||
| 1980 | } | ||
| 1981 | } | 1893 | } |
| 1982 | 1894 | ||
| 1983 | static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, | 1895 | static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, |
| 1984 | struct mce *m) | 1896 | struct mce *m) |
| 1985 | { | 1897 | { |
| 1986 | u16 ec = EC(m->status); | 1898 | struct amd64_pvt *pvt = mci->pvt_info; |
| 1987 | u8 xec = XEC(m->status, 0x1f); | ||
| 1988 | u8 ecc_type = (m->status >> 45) & 0x3; | 1899 | u8 ecc_type = (m->status >> 45) & 0x3; |
| 1900 | u8 xec = XEC(m->status, 0x1f); | ||
| 1901 | u16 ec = EC(m->status); | ||
| 1902 | u64 sys_addr; | ||
| 1903 | struct err_info err; | ||
| 1989 | 1904 | ||
| 1990 | /* Bail early out if this was an 'observed' error */ | 1905 | /* Bail out early if this was an 'observed' error */ |
| 1991 | if (PP(ec) == NBSL_PP_OBS) | 1906 | if (PP(ec) == NBSL_PP_OBS) |
| 1992 | return; | 1907 | return; |
| 1993 | 1908 | ||
| @@ -1995,10 +1910,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
| 1995 | if (xec && xec != F10_NBSL_EXT_ERR_ECC) | 1910 | if (xec && xec != F10_NBSL_EXT_ERR_ECC) |
| 1996 | return; | 1911 | return; |
| 1997 | 1912 | ||
| 1913 | memset(&err, 0, sizeof(err)); | ||
| 1914 | |||
| 1915 | sys_addr = get_error_address(m); | ||
| 1916 | |||
| 1998 | if (ecc_type == 2) | 1917 | if (ecc_type == 2) |
| 1999 | amd64_handle_ce(mci, m); | 1918 | err.syndrome = extract_syndrome(m->status); |
| 2000 | else if (ecc_type == 1) | 1919 | |
| 2001 | amd64_handle_ue(mci, m); | 1920 | pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); |
| 1921 | |||
| 1922 | __log_bus_error(mci, &err, ecc_type); | ||
| 2002 | } | 1923 | } |
| 2003 | 1924 | ||
| 2004 | void amd64_decode_bus_error(int node_id, struct mce *m) | 1925 | void amd64_decode_bus_error(int node_id, struct mce *m) |
| @@ -2166,6 +2087,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) | |||
| 2166 | u32 cs_mode, nr_pages; | 2087 | u32 cs_mode, nr_pages; |
| 2167 | u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; | 2088 | u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; |
| 2168 | 2089 | ||
| 2090 | |||
| 2169 | /* | 2091 | /* |
| 2170 | * The math on this doesn't look right on the surface because x/2*4 can | 2092 | * The math on this doesn't look right on the surface because x/2*4 can |
| 2171 | * be simplified to x*2 but this expression makes use of the fact that | 2093 | * be simplified to x*2 but this expression makes use of the fact that |
| @@ -2173,13 +2095,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) | |||
| 2173 | * number of bits to shift the DBAM register to extract the proper CSROW | 2095 | * number of bits to shift the DBAM register to extract the proper CSROW |
| 2174 | * field. | 2096 | * field. |
| 2175 | */ | 2097 | */ |
| 2176 | cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF; | 2098 | cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); |
| 2177 | 2099 | ||
| 2178 | nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); | 2100 | nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); |
| 2179 | 2101 | ||
| 2180 | edac_dbg(0, " (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); | 2102 | edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", |
| 2181 | edac_dbg(0, " nr_pages/channel= %u channel-count = %d\n", | 2103 | csrow_nr, dct, cs_mode); |
| 2182 | nr_pages, pvt->channel_count); | 2104 | edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); |
| 2183 | 2105 | ||
| 2184 | return nr_pages; | 2106 | return nr_pages; |
| 2185 | } | 2107 | } |
| @@ -2190,15 +2112,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) | |||
| 2190 | */ | 2112 | */ |
| 2191 | static int init_csrows(struct mem_ctl_info *mci) | 2113 | static int init_csrows(struct mem_ctl_info *mci) |
| 2192 | { | 2114 | { |
| 2115 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 2193 | struct csrow_info *csrow; | 2116 | struct csrow_info *csrow; |
| 2194 | struct dimm_info *dimm; | 2117 | struct dimm_info *dimm; |
| 2195 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 2196 | u64 base, mask; | ||
| 2197 | u32 val; | ||
| 2198 | int i, j, empty = 1; | ||
| 2199 | enum mem_type mtype; | ||
| 2200 | enum edac_type edac_mode; | 2118 | enum edac_type edac_mode; |
| 2119 | enum mem_type mtype; | ||
| 2120 | int i, j, empty = 1; | ||
| 2201 | int nr_pages = 0; | 2121 | int nr_pages = 0; |
| 2122 | u32 val; | ||
| 2202 | 2123 | ||
| 2203 | amd64_read_pci_cfg(pvt->F3, NBCFG, &val); | 2124 | amd64_read_pci_cfg(pvt->F3, NBCFG, &val); |
| 2204 | 2125 | ||
| @@ -2208,29 +2129,35 @@ static int init_csrows(struct mem_ctl_info *mci) | |||
| 2208 | pvt->mc_node_id, val, | 2129 | pvt->mc_node_id, val, |
| 2209 | !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); | 2130 | !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); |
| 2210 | 2131 | ||
| 2132 | /* | ||
| 2133 | * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. | ||
| 2134 | */ | ||
| 2211 | for_each_chip_select(i, 0, pvt) { | 2135 | for_each_chip_select(i, 0, pvt) { |
| 2212 | csrow = mci->csrows[i]; | 2136 | bool row_dct0 = !!csrow_enabled(i, 0, pvt); |
| 2137 | bool row_dct1 = false; | ||
| 2138 | |||
| 2139 | if (boot_cpu_data.x86 != 0xf) | ||
| 2140 | row_dct1 = !!csrow_enabled(i, 1, pvt); | ||
| 2213 | 2141 | ||
| 2214 | if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) { | 2142 | if (!row_dct0 && !row_dct1) |
| 2215 | edac_dbg(1, "----CSROW %d VALID for MC node %d\n", | ||
| 2216 | i, pvt->mc_node_id); | ||
| 2217 | continue; | 2143 | continue; |
| 2218 | } | ||
| 2219 | 2144 | ||
| 2145 | csrow = mci->csrows[i]; | ||
| 2220 | empty = 0; | 2146 | empty = 0; |
| 2221 | if (csrow_enabled(i, 0, pvt)) | 2147 | |
| 2148 | edac_dbg(1, "MC node: %d, csrow: %d\n", | ||
| 2149 | pvt->mc_node_id, i); | ||
| 2150 | |||
| 2151 | if (row_dct0) | ||
| 2222 | nr_pages = amd64_csrow_nr_pages(pvt, 0, i); | 2152 | nr_pages = amd64_csrow_nr_pages(pvt, 0, i); |
| 2223 | if (csrow_enabled(i, 1, pvt)) | ||
| 2224 | nr_pages += amd64_csrow_nr_pages(pvt, 1, i); | ||
| 2225 | 2153 | ||
| 2226 | get_cs_base_and_mask(pvt, i, 0, &base, &mask); | 2154 | /* K8 has only one DCT */ |
| 2227 | /* 8 bytes of resolution */ | 2155 | if (boot_cpu_data.x86 != 0xf && row_dct1) |
| 2156 | nr_pages += amd64_csrow_nr_pages(pvt, 1, i); | ||
| 2228 | 2157 | ||
| 2229 | mtype = amd64_determine_memory_type(pvt, i); | 2158 | mtype = amd64_determine_memory_type(pvt, i); |
| 2230 | 2159 | ||
| 2231 | edac_dbg(1, " for MC node %d csrow %d:\n", pvt->mc_node_id, i); | 2160 | edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); |
| 2232 | edac_dbg(1, " nr_pages: %u\n", | ||
| 2233 | nr_pages * pvt->channel_count); | ||
| 2234 | 2161 | ||
| 2235 | /* | 2162 | /* |
| 2236 | * determine whether CHIPKILL or JUST ECC or NO ECC is operating | 2163 | * determine whether CHIPKILL or JUST ECC or NO ECC is operating |
| @@ -2247,6 +2174,7 @@ static int init_csrows(struct mem_ctl_info *mci) | |||
| 2247 | dimm->edac_mode = edac_mode; | 2174 | dimm->edac_mode = edac_mode; |
| 2248 | dimm->nr_pages = nr_pages; | 2175 | dimm->nr_pages = nr_pages; |
| 2249 | } | 2176 | } |
| 2177 | csrow->nr_pages = nr_pages; | ||
| 2250 | } | 2178 | } |
| 2251 | 2179 | ||
| 2252 | return empty; | 2180 | return empty; |
| @@ -2591,6 +2519,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) | |||
| 2591 | 2519 | ||
| 2592 | mci->pvt_info = pvt; | 2520 | mci->pvt_info = pvt; |
| 2593 | mci->pdev = &pvt->F2->dev; | 2521 | mci->pdev = &pvt->F2->dev; |
| 2522 | mci->csbased = 1; | ||
| 2594 | 2523 | ||
| 2595 | setup_mci_misc_attrs(mci, fam_type); | 2524 | setup_mci_misc_attrs(mci, fam_type); |
| 2596 | 2525 | ||
