aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig8
-rw-r--r--drivers/edac/amd64_edac.c297
-rw-r--r--drivers/edac/amd64_edac.h59
-rw-r--r--drivers/edac/amd64_edac_inj.c128
-rw-r--r--drivers/edac/edac_mc.c65
-rw-r--r--drivers/edac/edac_mc_sysfs.c39
-rw-r--r--drivers/edac/edac_module.c27
-rw-r--r--drivers/edac/edac_pci.c3
-rw-r--r--drivers/edac/edac_pci_sysfs.c12
-rw-r--r--drivers/edac/highbank_mc_edac.c8
-rw-r--r--drivers/edac/i7300_edac.c8
-rw-r--r--drivers/edac/i7core_edac.c6
-rw-r--r--drivers/edac/i82975x_edac.c11
-rw-r--r--drivers/edac/mce_amd.c254
-rw-r--r--drivers/edac/mce_amd.h11
15 files changed, 473 insertions, 463 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index a9db20815a39..4c6c876d9dc3 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -45,10 +45,10 @@ config EDAC_LEGACY_SYSFS
45config EDAC_DEBUG 45config EDAC_DEBUG
46 bool "Debugging" 46 bool "Debugging"
47 help 47 help
48 This turns on debugging information for the entire EDAC 48 This turns on debugging information for the entire EDAC subsystem.
49 sub-system. You can insert module with "debug_level=x", current 49 You do so by inserting edac_module with "edac_debug_level=x." Valid
50 there're four debug levels (x=0,1,2,3 from low to high). 50 levels are 0-4 (from low to high) and by default it is set to 2.
51 Usually you should select 'N'. 51 Usually you should select 'N' here.
52 52
53config EDAC_DECODE_MCE 53config EDAC_DECODE_MCE
54 tristate "Decode MCEs in human-readable form (only on AMD for now)" 54 tristate "Decode MCEs in human-readable form (only on AMD for now)"
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index cc8e7c78a23c..f74a684269ff 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -60,8 +60,8 @@ struct scrubrate {
60 { 0x00, 0UL}, /* scrubbing off */ 60 { 0x00, 0UL}, /* scrubbing off */
61}; 61};
62 62
63static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, 63int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
64 u32 *val, const char *func) 64 u32 *val, const char *func)
65{ 65{
66 int err = 0; 66 int err = 0;
67 67
@@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
423 u64 *hole_offset, u64 *hole_size) 423 u64 *hole_offset, u64 *hole_size)
424{ 424{
425 struct amd64_pvt *pvt = mci->pvt_info; 425 struct amd64_pvt *pvt = mci->pvt_info;
426 u64 base;
427 426
428 /* only revE and later have the DRAM Hole Address Register */ 427 /* only revE and later have the DRAM Hole Address Register */
429 if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { 428 if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
@@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
462 * addresses in the hole so that they start at 0x100000000. 461 * addresses in the hole so that they start at 0x100000000.
463 */ 462 */
464 463
465 base = dhar_base(pvt); 464 *hole_base = dhar_base(pvt);
466 465 *hole_size = (1ULL << 32) - *hole_base;
467 *hole_base = base;
468 *hole_size = (0x1ull << 32) - base;
469 466
470 if (boot_cpu_data.x86 > 0xf) 467 if (boot_cpu_data.x86 > 0xf)
471 *hole_offset = f10_dhar_offset(pvt); 468 *hole_offset = f10_dhar_offset(pvt);
@@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
513{ 510{
514 struct amd64_pvt *pvt = mci->pvt_info; 511 struct amd64_pvt *pvt = mci->pvt_info;
515 u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; 512 u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
516 int ret = 0; 513 int ret;
517 514
518 dram_base = get_dram_base(pvt, pvt->mc_node_id); 515 dram_base = get_dram_base(pvt, pvt->mc_node_id);
519 516
520 ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, 517 ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
521 &hole_size); 518 &hole_size);
522 if (!ret) { 519 if (!ret) {
523 if ((sys_addr >= (1ull << 32)) && 520 if ((sys_addr >= (1ULL << 32)) &&
524 (sys_addr < ((1ull << 32) + hole_size))) { 521 (sys_addr < ((1ULL << 32) + hole_size))) {
525 /* use DHAR to translate SysAddr to DramAddr */ 522 /* use DHAR to translate SysAddr to DramAddr */
526 dram_addr = sys_addr - hole_offset; 523 dram_addr = sys_addr - hole_offset;
527 524
@@ -712,10 +709,10 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
712 709
713/* Map the Error address to a PAGE and PAGE OFFSET. */ 710/* Map the Error address to a PAGE and PAGE OFFSET. */
714static inline void error_address_to_page_and_offset(u64 error_address, 711static inline void error_address_to_page_and_offset(u64 error_address,
715 u32 *page, u32 *offset) 712 struct err_info *err)
716{ 713{
717 *page = (u32) (error_address >> PAGE_SHIFT); 714 err->page = (u32) (error_address >> PAGE_SHIFT);
718 *offset = ((u32) error_address) & ~PAGE_MASK; 715 err->offset = ((u32) error_address) & ~PAGE_MASK;
719} 716}
720 717
721/* 718/*
@@ -1026,59 +1023,44 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
1026} 1023}
1027 1024
1028static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 1025static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1029 u16 syndrome) 1026 struct err_info *err)
1030{ 1027{
1031 struct mem_ctl_info *src_mci;
1032 struct amd64_pvt *pvt = mci->pvt_info; 1028 struct amd64_pvt *pvt = mci->pvt_info;
1033 int channel, csrow;
1034 u32 page, offset;
1035 1029
1036 error_address_to_page_and_offset(sys_addr, &page, &offset); 1030 error_address_to_page_and_offset(sys_addr, err);
1037 1031
1038 /* 1032 /*
1039 * Find out which node the error address belongs to. This may be 1033 * Find out which node the error address belongs to. This may be
1040 * different from the node that detected the error. 1034 * different from the node that detected the error.
1041 */ 1035 */
1042 src_mci = find_mc_by_sys_addr(mci, sys_addr); 1036 err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
1043 if (!src_mci) { 1037 if (!err->src_mci) {
1044 amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", 1038 amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1045 (unsigned long)sys_addr); 1039 (unsigned long)sys_addr);
1046 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 1040 err->err_code = ERR_NODE;
1047 page, offset, syndrome,
1048 -1, -1, -1,
1049 "failed to map error addr to a node",
1050 "");
1051 return; 1041 return;
1052 } 1042 }
1053 1043
1054 /* Now map the sys_addr to a CSROW */ 1044 /* Now map the sys_addr to a CSROW */
1055 csrow = sys_addr_to_csrow(src_mci, sys_addr); 1045 err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
1056 if (csrow < 0) { 1046 if (err->csrow < 0) {
1057 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 1047 err->err_code = ERR_CSROW;
1058 page, offset, syndrome,
1059 -1, -1, -1,
1060 "failed to map error addr to a csrow",
1061 "");
1062 return; 1048 return;
1063 } 1049 }
1064 1050
1065 /* CHIPKILL enabled */ 1051 /* CHIPKILL enabled */
1066 if (pvt->nbcfg & NBCFG_CHIPKILL) { 1052 if (pvt->nbcfg & NBCFG_CHIPKILL) {
1067 channel = get_channel_from_ecc_syndrome(mci, syndrome); 1053 err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1068 if (channel < 0) { 1054 if (err->channel < 0) {
1069 /* 1055 /*
1070 * Syndrome didn't map, so we don't know which of the 1056 * Syndrome didn't map, so we don't know which of the
1071 * 2 DIMMs is in error. So we need to ID 'both' of them 1057 * 2 DIMMs is in error. So we need to ID 'both' of them
1072 * as suspect. 1058 * as suspect.
1073 */ 1059 */
1074 amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - " 1060 amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
1075 "possible error reporting race\n", 1061 "possible error reporting race\n",
1076 syndrome); 1062 err->syndrome);
1077 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 1063 err->err_code = ERR_CHANNEL;
1078 page, offset, syndrome,
1079 csrow, -1, -1,
1080 "unknown syndrome - possible error reporting race",
1081 "");
1082 return; 1064 return;
1083 } 1065 }
1084 } else { 1066 } else {
@@ -1090,13 +1072,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1090 * was obtained from email communication with someone at AMD. 1072 * was obtained from email communication with someone at AMD.
1091 * (Wish the email was placed in this comment - norsk) 1073 * (Wish the email was placed in this comment - norsk)
1092 */ 1074 */
1093 channel = ((sys_addr & BIT(3)) != 0); 1075 err->channel = ((sys_addr & BIT(3)) != 0);
1094 } 1076 }
1095
1096 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
1097 page, offset, syndrome,
1098 csrow, channel, -1,
1099 "", "");
1100} 1077}
1101 1078
1102static int ddr2_cs_size(unsigned i, bool dct_width) 1079static int ddr2_cs_size(unsigned i, bool dct_width)
@@ -1482,7 +1459,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1482 1459
1483/* For a given @dram_range, check if @sys_addr falls within it. */ 1460/* For a given @dram_range, check if @sys_addr falls within it. */
1484static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, 1461static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1485 u64 sys_addr, int *nid, int *chan_sel) 1462 u64 sys_addr, int *chan_sel)
1486{ 1463{
1487 int cs_found = -EINVAL; 1464 int cs_found = -EINVAL;
1488 u64 chan_addr; 1465 u64 chan_addr;
@@ -1555,15 +1532,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1555 1532
1556 cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); 1533 cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1557 1534
1558 if (cs_found >= 0) { 1535 if (cs_found >= 0)
1559 *nid = node_id;
1560 *chan_sel = channel; 1536 *chan_sel = channel;
1561 } 1537
1562 return cs_found; 1538 return cs_found;
1563} 1539}
1564 1540
1565static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, 1541static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1566 int *node, int *chan_sel) 1542 int *chan_sel)
1567{ 1543{
1568 int cs_found = -EINVAL; 1544 int cs_found = -EINVAL;
1569 unsigned range; 1545 unsigned range;
@@ -1577,8 +1553,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1577 (get_dram_limit(pvt, range) >= sys_addr)) { 1553 (get_dram_limit(pvt, range) >= sys_addr)) {
1578 1554
1579 cs_found = f1x_match_to_this_node(pvt, range, 1555 cs_found = f1x_match_to_this_node(pvt, range,
1580 sys_addr, node, 1556 sys_addr, chan_sel);
1581 chan_sel);
1582 if (cs_found >= 0) 1557 if (cs_found >= 0)
1583 break; 1558 break;
1584 } 1559 }
@@ -1594,22 +1569,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1594 * (MCX_ADDR). 1569 * (MCX_ADDR).
1595 */ 1570 */
1596static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 1571static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1597 u16 syndrome) 1572 struct err_info *err)
1598{ 1573{
1599 struct amd64_pvt *pvt = mci->pvt_info; 1574 struct amd64_pvt *pvt = mci->pvt_info;
1600 u32 page, offset;
1601 int nid, csrow, chan = 0;
1602 1575
1603 error_address_to_page_and_offset(sys_addr, &page, &offset); 1576 error_address_to_page_and_offset(sys_addr, err);
1604 1577
1605 csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); 1578 err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
1606 1579 if (err->csrow < 0) {
1607 if (csrow < 0) { 1580 err->err_code = ERR_CSROW;
1608 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1609 page, offset, syndrome,
1610 -1, -1, -1,
1611 "failed to map error addr to a csrow",
1612 "");
1613 return; 1581 return;
1614 } 1582 }
1615 1583
@@ -1619,12 +1587,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1619 * this point. 1587 * this point.
1620 */ 1588 */
1621 if (dct_ganging_enabled(pvt)) 1589 if (dct_ganging_enabled(pvt))
1622 chan = get_channel_from_ecc_syndrome(mci, syndrome); 1590 err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1623
1624 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1625 page, offset, syndrome,
1626 csrow, chan, -1,
1627 "", "");
1628} 1591}
1629 1592
1630/* 1593/*
@@ -1633,14 +1596,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1633 */ 1596 */
1634static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) 1597static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1635{ 1598{
1636 int dimm, size0, size1, factor = 0; 1599 int dimm, size0, size1;
1637 u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; 1600 u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
1638 u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; 1601 u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
1639 1602
1640 if (boot_cpu_data.x86 == 0xf) { 1603 if (boot_cpu_data.x86 == 0xf) {
1641 if (pvt->dclr0 & WIDTH_128)
1642 factor = 1;
1643
1644 /* K8 families < revF not supported yet */ 1604 /* K8 families < revF not supported yet */
1645 if (pvt->ext_model < K8_REV_F) 1605 if (pvt->ext_model < K8_REV_F)
1646 return; 1606 return;
@@ -1671,8 +1631,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1671 DBAM_DIMM(dimm, dbam)); 1631 DBAM_DIMM(dimm, dbam));
1672 1632
1673 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", 1633 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
1674 dimm * 2, size0 << factor, 1634 dimm * 2, size0,
1675 dimm * 2 + 1, size1 << factor); 1635 dimm * 2 + 1, size1);
1676 } 1636 }
1677} 1637}
1678 1638
@@ -1893,101 +1853,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
1893 return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); 1853 return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
1894} 1854}
1895 1855
1896/* 1856static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
1897 * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR 1857 u8 ecc_type)
1898 * ADDRESS and process.
1899 */
1900static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
1901{
1902 struct amd64_pvt *pvt = mci->pvt_info;
1903 u64 sys_addr;
1904 u16 syndrome;
1905
1906 /* Ensure that the Error Address is VALID */
1907 if (!(m->status & MCI_STATUS_ADDRV)) {
1908 amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1909 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
1910 0, 0, 0,
1911 -1, -1, -1,
1912 "HW has no ERROR_ADDRESS available",
1913 "");
1914 return;
1915 }
1916
1917 sys_addr = get_error_address(m);
1918 syndrome = extract_syndrome(m->status);
1919
1920 amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
1921
1922 pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
1923}
1924
1925/* Handle any Un-correctable Errors (UEs) */
1926static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
1927{ 1858{
1928 struct mem_ctl_info *log_mci, *src_mci = NULL; 1859 enum hw_event_mc_err_type err_type;
1929 int csrow; 1860 const char *string;
1930 u64 sys_addr;
1931 u32 page, offset;
1932
1933 log_mci = mci;
1934 1861
1935 if (!(m->status & MCI_STATUS_ADDRV)) { 1862 if (ecc_type == 2)
1936 amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); 1863 err_type = HW_EVENT_ERR_CORRECTED;
1937 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 1864 else if (ecc_type == 1)
1938 0, 0, 0, 1865 err_type = HW_EVENT_ERR_UNCORRECTED;
1939 -1, -1, -1, 1866 else {
1940 "HW has no ERROR_ADDRESS available", 1867 WARN(1, "Something is rotten in the state of Denmark.\n");
1941 "");
1942 return; 1868 return;
1943 } 1869 }
1944 1870
1945 sys_addr = get_error_address(m); 1871 switch (err->err_code) {
1946 error_address_to_page_and_offset(sys_addr, &page, &offset); 1872 case DECODE_OK:
1947 1873 string = "";
1948 /* 1874 break;
1949 * Find out which node the error address belongs to. This may be 1875 case ERR_NODE:
1950 * different from the node that detected the error. 1876 string = "Failed to map error addr to a node";
1951 */ 1877 break;
1952 src_mci = find_mc_by_sys_addr(mci, sys_addr); 1878 case ERR_CSROW:
1953 if (!src_mci) { 1879 string = "Failed to map error addr to a csrow";
1954 amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", 1880 break;
1955 (unsigned long)sys_addr); 1881 case ERR_CHANNEL:
1956 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 1882 string = "unknown syndrome - possible error reporting race";
1957 page, offset, 0, 1883 break;
1958 -1, -1, -1, 1884 default:
1959 "ERROR ADDRESS NOT mapped to a MC", 1885 string = "WTF error";
1960 ""); 1886 break;
1961 return;
1962 } 1887 }
1963 1888
1964 log_mci = src_mci; 1889 edac_mc_handle_error(err_type, mci, 1,
1965 1890 err->page, err->offset, err->syndrome,
1966 csrow = sys_addr_to_csrow(log_mci, sys_addr); 1891 err->csrow, err->channel, -1,
1967 if (csrow < 0) { 1892 string, "");
1968 amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
1969 (unsigned long)sys_addr);
1970 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1971 page, offset, 0,
1972 -1, -1, -1,
1973 "ERROR ADDRESS NOT mapped to CS",
1974 "");
1975 } else {
1976 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
1977 page, offset, 0,
1978 csrow, -1, -1,
1979 "", "");
1980 }
1981} 1893}
1982 1894
1983static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, 1895static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1984 struct mce *m) 1896 struct mce *m)
1985{ 1897{
1986 u16 ec = EC(m->status); 1898 struct amd64_pvt *pvt = mci->pvt_info;
1987 u8 xec = XEC(m->status, 0x1f);
1988 u8 ecc_type = (m->status >> 45) & 0x3; 1899 u8 ecc_type = (m->status >> 45) & 0x3;
1900 u8 xec = XEC(m->status, 0x1f);
1901 u16 ec = EC(m->status);
1902 u64 sys_addr;
1903 struct err_info err;
1989 1904
1990 /* Bail early out if this was an 'observed' error */ 1905 /* Bail out early if this was an 'observed' error */
1991 if (PP(ec) == NBSL_PP_OBS) 1906 if (PP(ec) == NBSL_PP_OBS)
1992 return; 1907 return;
1993 1908
@@ -1995,10 +1910,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
1995 if (xec && xec != F10_NBSL_EXT_ERR_ECC) 1910 if (xec && xec != F10_NBSL_EXT_ERR_ECC)
1996 return; 1911 return;
1997 1912
1913 memset(&err, 0, sizeof(err));
1914
1915 sys_addr = get_error_address(m);
1916
1998 if (ecc_type == 2) 1917 if (ecc_type == 2)
1999 amd64_handle_ce(mci, m); 1918 err.syndrome = extract_syndrome(m->status);
2000 else if (ecc_type == 1) 1919
2001 amd64_handle_ue(mci, m); 1920 pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
1921
1922 __log_bus_error(mci, &err, ecc_type);
2002} 1923}
2003 1924
2004void amd64_decode_bus_error(int node_id, struct mce *m) 1925void amd64_decode_bus_error(int node_id, struct mce *m)
@@ -2166,6 +2087,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2166 u32 cs_mode, nr_pages; 2087 u32 cs_mode, nr_pages;
2167 u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; 2088 u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2168 2089
2090
2169 /* 2091 /*
2170 * The math on this doesn't look right on the surface because x/2*4 can 2092 * The math on this doesn't look right on the surface because x/2*4 can
2171 * be simplified to x*2 but this expression makes use of the fact that 2093 * be simplified to x*2 but this expression makes use of the fact that
@@ -2173,13 +2095,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2173 * number of bits to shift the DBAM register to extract the proper CSROW 2095 * number of bits to shift the DBAM register to extract the proper CSROW
2174 * field. 2096 * field.
2175 */ 2097 */
2176 cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF; 2098 cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
2177 2099
2178 nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); 2100 nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
2179 2101
2180 edac_dbg(0, " (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); 2102 edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2181 edac_dbg(0, " nr_pages/channel= %u channel-count = %d\n", 2103 csrow_nr, dct, cs_mode);
2182 nr_pages, pvt->channel_count); 2104 edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
2183 2105
2184 return nr_pages; 2106 return nr_pages;
2185} 2107}
@@ -2190,15 +2112,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2190 */ 2112 */
2191static int init_csrows(struct mem_ctl_info *mci) 2113static int init_csrows(struct mem_ctl_info *mci)
2192{ 2114{
2115 struct amd64_pvt *pvt = mci->pvt_info;
2193 struct csrow_info *csrow; 2116 struct csrow_info *csrow;
2194 struct dimm_info *dimm; 2117 struct dimm_info *dimm;
2195 struct amd64_pvt *pvt = mci->pvt_info;
2196 u64 base, mask;
2197 u32 val;
2198 int i, j, empty = 1;
2199 enum mem_type mtype;
2200 enum edac_type edac_mode; 2118 enum edac_type edac_mode;
2119 enum mem_type mtype;
2120 int i, j, empty = 1;
2201 int nr_pages = 0; 2121 int nr_pages = 0;
2122 u32 val;
2202 2123
2203 amd64_read_pci_cfg(pvt->F3, NBCFG, &val); 2124 amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2204 2125
@@ -2208,29 +2129,35 @@ static int init_csrows(struct mem_ctl_info *mci)
2208 pvt->mc_node_id, val, 2129 pvt->mc_node_id, val,
2209 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); 2130 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2210 2131
2132 /*
2133 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
2134 */
2211 for_each_chip_select(i, 0, pvt) { 2135 for_each_chip_select(i, 0, pvt) {
2212 csrow = mci->csrows[i]; 2136 bool row_dct0 = !!csrow_enabled(i, 0, pvt);
2137 bool row_dct1 = false;
2138
2139 if (boot_cpu_data.x86 != 0xf)
2140 row_dct1 = !!csrow_enabled(i, 1, pvt);
2213 2141
2214 if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) { 2142 if (!row_dct0 && !row_dct1)
2215 edac_dbg(1, "----CSROW %d VALID for MC node %d\n",
2216 i, pvt->mc_node_id);
2217 continue; 2143 continue;
2218 }
2219 2144
2145 csrow = mci->csrows[i];
2220 empty = 0; 2146 empty = 0;
2221 if (csrow_enabled(i, 0, pvt)) 2147
2148 edac_dbg(1, "MC node: %d, csrow: %d\n",
2149 pvt->mc_node_id, i);
2150
2151 if (row_dct0)
2222 nr_pages = amd64_csrow_nr_pages(pvt, 0, i); 2152 nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
2223 if (csrow_enabled(i, 1, pvt))
2224 nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
2225 2153
2226 get_cs_base_and_mask(pvt, i, 0, &base, &mask); 2154 /* K8 has only one DCT */
2227 /* 8 bytes of resolution */ 2155 if (boot_cpu_data.x86 != 0xf && row_dct1)
2156 nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
2228 2157
2229 mtype = amd64_determine_memory_type(pvt, i); 2158 mtype = amd64_determine_memory_type(pvt, i);
2230 2159
2231 edac_dbg(1, " for MC node %d csrow %d:\n", pvt->mc_node_id, i); 2160 edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
2232 edac_dbg(1, " nr_pages: %u\n",
2233 nr_pages * pvt->channel_count);
2234 2161
2235 /* 2162 /*
2236 * determine whether CHIPKILL or JUST ECC or NO ECC is operating 2163 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
@@ -2247,6 +2174,7 @@ static int init_csrows(struct mem_ctl_info *mci)
2247 dimm->edac_mode = edac_mode; 2174 dimm->edac_mode = edac_mode;
2248 dimm->nr_pages = nr_pages; 2175 dimm->nr_pages = nr_pages;
2249 } 2176 }
2177 csrow->nr_pages = nr_pages;
2250 } 2178 }
2251 2179
2252 return empty; 2180 return empty;
@@ -2591,6 +2519,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
2591 2519
2592 mci->pvt_info = pvt; 2520 mci->pvt_info = pvt;
2593 mci->pdev = &pvt->F2->dev; 2521 mci->pdev = &pvt->F2->dev;
2522 mci->csbased = 1;
2594 2523
2595 setup_mci_misc_attrs(mci, fam_type); 2524 setup_mci_misc_attrs(mci, fam_type);
2596 2525
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8c4139647efc..e864f407806c 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -219,7 +219,7 @@
219#define DBAM1 0x180 219#define DBAM1 0x180
220 220
221/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */ 221/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
222#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF) 222#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF)
223 223
224#define DBAM_MAX_VALUE 11 224#define DBAM_MAX_VALUE 11
225 225
@@ -267,18 +267,20 @@
267#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7) 267#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
268 268
269#define F10_NB_ARRAY_ADDR 0xB8 269#define F10_NB_ARRAY_ADDR 0xB8
270#define F10_NB_ARRAY_DRAM_ECC BIT(31) 270#define F10_NB_ARRAY_DRAM BIT(31)
271 271
272/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */ 272/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
273#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1) 273#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
274 274
275#define F10_NB_ARRAY_DATA 0xBC 275#define F10_NB_ARRAY_DATA 0xBC
276#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \ 276#define F10_NB_ARR_ECC_WR_REQ BIT(17)
277 (BIT(((word) & 0xF) + 20) | \ 277#define SET_NB_DRAM_INJECTION_WRITE(inj) \
278 BIT(17) | bits) 278 (BIT(((inj.word) & 0xF) + 20) | \
279#define SET_NB_DRAM_INJECTION_READ(word, bits) \ 279 F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
280 (BIT(((word) & 0xF) + 20) | \ 280#define SET_NB_DRAM_INJECTION_READ(inj) \
281 BIT(16) | bits) 281 (BIT(((inj.word) & 0xF) + 20) | \
282 BIT(16) | inj.bit_map)
283
282 284
283#define NBCAP 0xE8 285#define NBCAP 0xE8
284#define NBCAP_CHIPKILL BIT(4) 286#define NBCAP_CHIPKILL BIT(4)
@@ -305,9 +307,9 @@ enum amd_families {
305 307
306/* Error injection control structure */ 308/* Error injection control structure */
307struct error_injection { 309struct error_injection {
308 u32 section; 310 u32 section;
309 u32 word; 311 u32 word;
310 u32 bit_map; 312 u32 bit_map;
311}; 313};
312 314
313/* low and high part of PCI config space regs */ 315/* low and high part of PCI config space regs */
@@ -374,6 +376,23 @@ struct amd64_pvt {
374 struct error_injection injection; 376 struct error_injection injection;
375}; 377};
376 378
379enum err_codes {
380 DECODE_OK = 0,
381 ERR_NODE = -1,
382 ERR_CSROW = -2,
383 ERR_CHANNEL = -3,
384};
385
386struct err_info {
387 int err_code;
388 struct mem_ctl_info *src_mci;
389 int csrow;
390 int channel;
391 u16 syndrome;
392 u32 page;
393 u32 offset;
394};
395
377static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) 396static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
378{ 397{
379 u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; 398 u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
@@ -447,7 +466,7 @@ static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
447struct low_ops { 466struct low_ops {
448 int (*early_channel_count) (struct amd64_pvt *pvt); 467 int (*early_channel_count) (struct amd64_pvt *pvt);
449 void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, 468 void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
450 u16 syndrome); 469 struct err_info *);
451 int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); 470 int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
452 int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset, 471 int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset,
453 u32 *val, const char *func); 472 u32 *val, const char *func);
@@ -459,6 +478,8 @@ struct amd64_family_type {
459 struct low_ops ops; 478 struct low_ops ops;
460}; 479};
461 480
481int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
482 u32 *val, const char *func);
462int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, 483int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
463 u32 val, const char *func); 484 u32 val, const char *func);
464 485
@@ -475,3 +496,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
475 u64 *hole_offset, u64 *hole_size); 496 u64 *hole_offset, u64 *hole_size);
476 497
477#define to_mci(k) container_of(k, struct mem_ctl_info, dev) 498#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
499
500/* Injection helpers */
501static inline void disable_caches(void *dummy)
502{
503 write_cr0(read_cr0() | X86_CR0_CD);
504 wbinvd();
505}
506
507static inline void enable_caches(void *dummy)
508{
509 write_cr0(read_cr0() & ~X86_CR0_CD);
510}
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
index 53d972e00dfb..8c171fa1cb9b 100644
--- a/drivers/edac/amd64_edac_inj.c
+++ b/drivers/edac/amd64_edac_inj.c
@@ -22,20 +22,19 @@ static ssize_t amd64_inject_section_store(struct device *dev,
22 struct mem_ctl_info *mci = to_mci(dev); 22 struct mem_ctl_info *mci = to_mci(dev);
23 struct amd64_pvt *pvt = mci->pvt_info; 23 struct amd64_pvt *pvt = mci->pvt_info;
24 unsigned long value; 24 unsigned long value;
25 int ret = 0; 25 int ret;
26 26
27 ret = strict_strtoul(data, 10, &value); 27 ret = strict_strtoul(data, 10, &value);
28 if (ret != -EINVAL) { 28 if (ret < 0)
29 return ret;
29 30
30 if (value > 3) { 31 if (value > 3) {
31 amd64_warn("%s: invalid section 0x%lx\n", __func__, value); 32 amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
32 return -EINVAL; 33 return -EINVAL;
33 }
34
35 pvt->injection.section = (u32) value;
36 return count;
37 } 34 }
38 return ret; 35
36 pvt->injection.section = (u32) value;
37 return count;
39} 38}
40 39
41static ssize_t amd64_inject_word_show(struct device *dev, 40static ssize_t amd64_inject_word_show(struct device *dev,
@@ -60,20 +59,19 @@ static ssize_t amd64_inject_word_store(struct device *dev,
60 struct mem_ctl_info *mci = to_mci(dev); 59 struct mem_ctl_info *mci = to_mci(dev);
61 struct amd64_pvt *pvt = mci->pvt_info; 60 struct amd64_pvt *pvt = mci->pvt_info;
62 unsigned long value; 61 unsigned long value;
63 int ret = 0; 62 int ret;
64 63
65 ret = strict_strtoul(data, 10, &value); 64 ret = strict_strtoul(data, 10, &value);
66 if (ret != -EINVAL) { 65 if (ret < 0)
66 return ret;
67 67
68 if (value > 8) { 68 if (value > 8) {
69 amd64_warn("%s: invalid word 0x%lx\n", __func__, value); 69 amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
70 return -EINVAL; 70 return -EINVAL;
71 }
72
73 pvt->injection.word = (u32) value;
74 return count;
75 } 71 }
76 return ret; 72
73 pvt->injection.word = (u32) value;
74 return count;
77} 75}
78 76
79static ssize_t amd64_inject_ecc_vector_show(struct device *dev, 77static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
@@ -97,21 +95,19 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
97 struct mem_ctl_info *mci = to_mci(dev); 95 struct mem_ctl_info *mci = to_mci(dev);
98 struct amd64_pvt *pvt = mci->pvt_info; 96 struct amd64_pvt *pvt = mci->pvt_info;
99 unsigned long value; 97 unsigned long value;
100 int ret = 0; 98 int ret;
101 99
102 ret = strict_strtoul(data, 16, &value); 100 ret = strict_strtoul(data, 16, &value);
103 if (ret != -EINVAL) { 101 if (ret < 0)
102 return ret;
104 103
105 if (value & 0xFFFF0000) { 104 if (value & 0xFFFF0000) {
106 amd64_warn("%s: invalid EccVector: 0x%lx\n", 105 amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
107 __func__, value); 106 return -EINVAL;
108 return -EINVAL;
109 }
110
111 pvt->injection.bit_map = (u32) value;
112 return count;
113 } 107 }
114 return ret; 108
109 pvt->injection.bit_map = (u32) value;
110 return count;
115} 111}
116 112
117/* 113/*
@@ -126,28 +122,25 @@ static ssize_t amd64_inject_read_store(struct device *dev,
126 struct amd64_pvt *pvt = mci->pvt_info; 122 struct amd64_pvt *pvt = mci->pvt_info;
127 unsigned long value; 123 unsigned long value;
128 u32 section, word_bits; 124 u32 section, word_bits;
129 int ret = 0; 125 int ret;
130 126
131 ret = strict_strtoul(data, 10, &value); 127 ret = strict_strtoul(data, 10, &value);
132 if (ret != -EINVAL) { 128 if (ret < 0)
129 return ret;
133 130
134 /* Form value to choose 16-byte section of cacheline */ 131 /* Form value to choose 16-byte section of cacheline */
135 section = F10_NB_ARRAY_DRAM_ECC | 132 section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
136 SET_NB_ARRAY_ADDRESS(pvt->injection.section);
137 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
138 133
139 word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word, 134 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
140 pvt->injection.bit_map);
141 135
142 /* Issue 'word' and 'bit' along with the READ request */ 136 word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
143 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
144 137
145 edac_dbg(0, "section=0x%x word_bits=0x%x\n", 138 /* Issue 'word' and 'bit' along with the READ request */
146 section, word_bits); 139 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
147 140
148 return count; 141 edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
149 } 142
150 return ret; 143 return count;
151} 144}
152 145
153/* 146/*
@@ -160,30 +153,43 @@ static ssize_t amd64_inject_write_store(struct device *dev,
160{ 153{
161 struct mem_ctl_info *mci = to_mci(dev); 154 struct mem_ctl_info *mci = to_mci(dev);
162 struct amd64_pvt *pvt = mci->pvt_info; 155 struct amd64_pvt *pvt = mci->pvt_info;
156 u32 section, word_bits, tmp;
163 unsigned long value; 157 unsigned long value;
164 u32 section, word_bits; 158 int ret;
165 int ret = 0;
166 159
167 ret = strict_strtoul(data, 10, &value); 160 ret = strict_strtoul(data, 10, &value);
168 if (ret != -EINVAL) { 161 if (ret < 0)
162 return ret;
163
164 /* Form value to choose 16-byte section of cacheline */
165 section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
166
167 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
169 168
170 /* Form value to choose 16-byte section of cacheline */ 169 word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
171 section = F10_NB_ARRAY_DRAM_ECC |
172 SET_NB_ARRAY_ADDRESS(pvt->injection.section);
173 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
174 170
175 word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word, 171 pr_notice_once("Don't forget to decrease MCE polling interval in\n"
176 pvt->injection.bit_map); 172 "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
173 "so that you can get the error report faster.\n");
177 174
178 /* Issue 'word' and 'bit' along with the READ request */ 175 on_each_cpu(disable_caches, NULL, 1);
179 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
180 176
181 edac_dbg(0, "section=0x%x word_bits=0x%x\n", 177 /* Issue 'word' and 'bit' along with the READ request */
182 section, word_bits); 178 amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
183 179
184 return count; 180 retry:
181 /* wait until injection happens */
182 amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
183 if (tmp & F10_NB_ARR_ECC_WR_REQ) {
184 cpu_relax();
185 goto retry;
185 } 186 }
186 return ret; 187
188 on_each_cpu(enable_caches, NULL, 1);
189
190 edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
191
192 return count;
187} 193}
188 194
189/* 195/*
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 90f0b730e9bb..281f566a5513 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -416,10 +416,18 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
416 dimm->cschannel = chn; 416 dimm->cschannel = chn;
417 417
418 /* Increment csrow location */ 418 /* Increment csrow location */
419 row++; 419 if (layers[0].is_virt_csrow) {
420 if (row == tot_csrows) {
421 row = 0;
422 chn++; 420 chn++;
421 if (chn == tot_channels) {
422 chn = 0;
423 row++;
424 }
425 } else {
426 row++;
427 if (row == tot_csrows) {
428 row = 0;
429 chn++;
430 }
423 } 431 }
424 432
425 /* Increment dimm location */ 433 /* Increment dimm location */
@@ -966,20 +974,22 @@ static void edac_ce_error(struct mem_ctl_info *mci,
966 long grain) 974 long grain)
967{ 975{
968 unsigned long remapped_page; 976 unsigned long remapped_page;
977 char *msg_aux = "";
978
979 if (*msg)
980 msg_aux = " ";
969 981
970 if (edac_mc_get_log_ce()) { 982 if (edac_mc_get_log_ce()) {
971 if (other_detail && *other_detail) 983 if (other_detail && *other_detail)
972 edac_mc_printk(mci, KERN_WARNING, 984 edac_mc_printk(mci, KERN_WARNING,
973 "%d CE %s on %s (%s %s - %s)\n", 985 "%d CE %s%son %s (%s %s - %s)\n",
974 error_count, 986 error_count, msg, msg_aux, label,
975 msg, label, location, 987 location, detail, other_detail);
976 detail, other_detail);
977 else 988 else
978 edac_mc_printk(mci, KERN_WARNING, 989 edac_mc_printk(mci, KERN_WARNING,
979 "%d CE %s on %s (%s %s)\n", 990 "%d CE %s%son %s (%s %s)\n",
980 error_count, 991 error_count, msg, msg_aux, label,
981 msg, label, location, 992 location, detail);
982 detail);
983 } 993 }
984 edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); 994 edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
985 995
@@ -1014,27 +1024,31 @@ static void edac_ue_error(struct mem_ctl_info *mci,
1014 const char *other_detail, 1024 const char *other_detail,
1015 const bool enable_per_layer_report) 1025 const bool enable_per_layer_report)
1016{ 1026{
1027 char *msg_aux = "";
1028
1029 if (*msg)
1030 msg_aux = " ";
1031
1017 if (edac_mc_get_log_ue()) { 1032 if (edac_mc_get_log_ue()) {
1018 if (other_detail && *other_detail) 1033 if (other_detail && *other_detail)
1019 edac_mc_printk(mci, KERN_WARNING, 1034 edac_mc_printk(mci, KERN_WARNING,
1020 "%d UE %s on %s (%s %s - %s)\n", 1035 "%d UE %s%son %s (%s %s - %s)\n",
1021 error_count, 1036 error_count, msg, msg_aux, label,
1022 msg, label, location, detail, 1037 location, detail, other_detail);
1023 other_detail);
1024 else 1038 else
1025 edac_mc_printk(mci, KERN_WARNING, 1039 edac_mc_printk(mci, KERN_WARNING,
1026 "%d UE %s on %s (%s %s)\n", 1040 "%d UE %s%son %s (%s %s)\n",
1027 error_count, 1041 error_count, msg, msg_aux, label,
1028 msg, label, location, detail); 1042 location, detail);
1029 } 1043 }
1030 1044
1031 if (edac_mc_get_panic_on_ue()) { 1045 if (edac_mc_get_panic_on_ue()) {
1032 if (other_detail && *other_detail) 1046 if (other_detail && *other_detail)
1033 panic("UE %s on %s (%s%s - %s)\n", 1047 panic("UE %s%son %s (%s%s - %s)\n",
1034 msg, label, location, detail, other_detail); 1048 msg, msg_aux, label, location, detail, other_detail);
1035 else 1049 else
1036 panic("UE %s on %s (%s%s)\n", 1050 panic("UE %s%son %s (%s%s)\n",
1037 msg, label, location, detail); 1051 msg, msg_aux, label, location, detail);
1038 } 1052 }
1039 1053
1040 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); 1054 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
@@ -1093,10 +1107,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1093 */ 1107 */
1094 for (i = 0; i < mci->n_layers; i++) { 1108 for (i = 0; i < mci->n_layers; i++) {
1095 if (pos[i] >= (int)mci->layers[i].size) { 1109 if (pos[i] >= (int)mci->layers[i].size) {
1096 if (type == HW_EVENT_ERR_CORRECTED)
1097 p = "CE";
1098 else
1099 p = "UE";
1100 1110
1101 edac_mc_printk(mci, KERN_ERR, 1111 edac_mc_printk(mci, KERN_ERR,
1102 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", 1112 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
@@ -1128,6 +1138,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1128 grain = 0; 1138 grain = 0;
1129 p = label; 1139 p = label;
1130 *p = '\0'; 1140 *p = '\0';
1141
1131 for (i = 0; i < mci->tot_dimms; i++) { 1142 for (i = 0; i < mci->tot_dimms; i++) {
1132 struct dimm_info *dimm = mci->dimms[i]; 1143 struct dimm_info *dimm = mci->dimms[i];
1133 1144
@@ -1195,6 +1206,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1195 1206
1196 /* Fill the RAM location data */ 1207 /* Fill the RAM location data */
1197 p = location; 1208 p = location;
1209
1198 for (i = 0; i < mci->n_layers; i++) { 1210 for (i = 0; i < mci->n_layers; i++) {
1199 if (pos[i] < 0) 1211 if (pos[i] < 0)
1200 continue; 1212 continue;
@@ -1207,7 +1219,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1207 *(p - 1) = '\0'; 1219 *(p - 1) = '\0';
1208 1220
1209 /* Report the error via the trace interface */ 1221 /* Report the error via the trace interface */
1210
1211 grain_bits = fls_long(grain) + 1; 1222 grain_bits = fls_long(grain) + 1;
1212 trace_mc_event(type, msg, label, error_count, 1223 trace_mc_event(type, msg, label, error_count,
1213 mci->mc_idx, top_layer, mid_layer, low_layer, 1224 mci->mc_idx, top_layer, mid_layer, low_layer,
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index ed0bc07b8503..de2df92f9c77 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -180,6 +180,9 @@ static ssize_t csrow_size_show(struct device *dev,
180 int i; 180 int i;
181 u32 nr_pages = 0; 181 u32 nr_pages = 0;
182 182
183 if (csrow->mci->csbased)
184 return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
185
183 for (i = 0; i < csrow->nr_channels; i++) 186 for (i = 0; i < csrow->nr_channels; i++)
184 nr_pages += csrow->channels[i]->dimm->nr_pages; 187 nr_pages += csrow->channels[i]->dimm->nr_pages;
185 return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); 188 return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
@@ -373,6 +376,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
373 csrow->dev.bus = &mci->bus; 376 csrow->dev.bus = &mci->bus;
374 device_initialize(&csrow->dev); 377 device_initialize(&csrow->dev);
375 csrow->dev.parent = &mci->dev; 378 csrow->dev.parent = &mci->dev;
379 csrow->mci = mci;
376 dev_set_name(&csrow->dev, "csrow%d", index); 380 dev_set_name(&csrow->dev, "csrow%d", index);
377 dev_set_drvdata(&csrow->dev, csrow); 381 dev_set_drvdata(&csrow->dev, csrow);
378 382
@@ -777,10 +781,14 @@ static ssize_t mci_size_mb_show(struct device *dev,
777 for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { 781 for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
778 struct csrow_info *csrow = mci->csrows[csrow_idx]; 782 struct csrow_info *csrow = mci->csrows[csrow_idx];
779 783
780 for (j = 0; j < csrow->nr_channels; j++) { 784 if (csrow->mci->csbased) {
781 struct dimm_info *dimm = csrow->channels[j]->dimm; 785 total_pages += csrow->nr_pages;
786 } else {
787 for (j = 0; j < csrow->nr_channels; j++) {
788 struct dimm_info *dimm = csrow->channels[j]->dimm;
782 789
783 total_pages += dimm->nr_pages; 790 total_pages += dimm->nr_pages;
791 }
784 } 792 }
785 } 793 }
786 794
@@ -838,14 +846,8 @@ static ssize_t edac_fake_inject_write(struct file *file,
838 return count; 846 return count;
839} 847}
840 848
841static int debugfs_open(struct inode *inode, struct file *file)
842{
843 file->private_data = inode->i_private;
844 return 0;
845}
846
847static const struct file_operations debug_fake_inject_fops = { 849static const struct file_operations debug_fake_inject_fops = {
848 .open = debugfs_open, 850 .open = simple_open,
849 .write = edac_fake_inject_write, 851 .write = edac_fake_inject_write,
850 .llseek = generic_file_llseek, 852 .llseek = generic_file_llseek,
851}; 853};
@@ -1124,10 +1126,15 @@ int __init edac_mc_sysfs_init(void)
1124 edac_subsys = edac_get_sysfs_subsys(); 1126 edac_subsys = edac_get_sysfs_subsys();
1125 if (edac_subsys == NULL) { 1127 if (edac_subsys == NULL) {
1126 edac_dbg(1, "no edac_subsys\n"); 1128 edac_dbg(1, "no edac_subsys\n");
1127 return -EINVAL; 1129 err = -EINVAL;
1130 goto out;
1128 } 1131 }
1129 1132
1130 mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); 1133 mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
1134 if (!mci_pdev) {
1135 err = -ENOMEM;
1136 goto out_put_sysfs;
1137 }
1131 1138
1132 mci_pdev->bus = edac_subsys; 1139 mci_pdev->bus = edac_subsys;
1133 mci_pdev->type = &mc_attr_type; 1140 mci_pdev->type = &mc_attr_type;
@@ -1136,11 +1143,18 @@ int __init edac_mc_sysfs_init(void)
1136 1143
1137 err = device_add(mci_pdev); 1144 err = device_add(mci_pdev);
1138 if (err < 0) 1145 if (err < 0)
1139 return err; 1146 goto out_dev_free;
1140 1147
1141 edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); 1148 edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
1142 1149
1143 return 0; 1150 return 0;
1151
1152 out_dev_free:
1153 kfree(mci_pdev);
1154 out_put_sysfs:
1155 edac_put_sysfs_subsys();
1156 out:
1157 return err;
1144} 1158}
1145 1159
1146void __exit edac_mc_sysfs_exit(void) 1160void __exit edac_mc_sysfs_exit(void)
@@ -1148,4 +1162,5 @@ void __exit edac_mc_sysfs_exit(void)
1148 put_device(mci_pdev); 1162 put_device(mci_pdev);
1149 device_del(mci_pdev); 1163 device_del(mci_pdev);
1150 edac_put_sysfs_subsys(); 1164 edac_put_sysfs_subsys();
1165 kfree(mci_pdev);
1151} 1166}
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 58a28d838f37..12c951a2c33d 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -18,9 +18,29 @@
18#define EDAC_VERSION "Ver: 3.0.0" 18#define EDAC_VERSION "Ver: 3.0.0"
19 19
20#ifdef CONFIG_EDAC_DEBUG 20#ifdef CONFIG_EDAC_DEBUG
21
22static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
23{
24 unsigned long val;
25 int ret;
26
27 ret = kstrtoul(buf, 0, &val);
28 if (ret)
29 return ret;
30
31 if (val < 0 || val > 4)
32 return -EINVAL;
33
34 return param_set_int(buf, kp);
35}
36
21/* Values of 0 to 4 will generate output */ 37/* Values of 0 to 4 will generate output */
22int edac_debug_level = 2; 38int edac_debug_level = 2;
23EXPORT_SYMBOL_GPL(edac_debug_level); 39EXPORT_SYMBOL_GPL(edac_debug_level);
40
41module_param_call(edac_debug_level, edac_set_debug_level, param_get_int,
42 &edac_debug_level, 0644);
43MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2");
24#endif 44#endif
25 45
26/* scope is to module level only */ 46/* scope is to module level only */
@@ -132,10 +152,3 @@ module_exit(edac_exit);
132MODULE_LICENSE("GPL"); 152MODULE_LICENSE("GPL");
133MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al"); 153MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
134MODULE_DESCRIPTION("Core library routines for EDAC reporting"); 154MODULE_DESCRIPTION("Core library routines for EDAC reporting");
135
136/* refer to *_sysfs.c files for parameters that are exported via sysfs */
137
138#ifdef CONFIG_EDAC_DEBUG
139module_param(edac_debug_level, int, 0644);
140MODULE_PARM_DESC(edac_debug_level, "Debug level");
141#endif
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index ee87ef972ead..dd370f92ace3 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -470,7 +470,8 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
470 470
471 pci->mod_name = mod_name; 471 pci->mod_name = mod_name;
472 pci->ctl_name = EDAC_PCI_GENCTL_NAME; 472 pci->ctl_name = EDAC_PCI_GENCTL_NAME;
473 pci->edac_check = edac_pci_generic_check; 473 if (edac_op_state == EDAC_OPSTATE_POLL)
474 pci->edac_check = edac_pci_generic_check;
474 475
475 pdata->edac_idx = edac_pci_idx++; 476 pdata->edac_idx = edac_pci_idx++;
476 477
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index e164c555a337..dc6e905ee1a5 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -645,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
645 645
646/* 646/*
647 * pci_dev parity list iterator 647 * pci_dev parity list iterator
648 * Scan the PCI device list for one pass, looking for SERRORs 648 *
649 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices 649 * Scan the PCI device list looking for SERRORs, Master Parity ERRORS or
650 * Parity ERRORs on primary or secondary devices.
650 */ 651 */
651static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) 652static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
652{ 653{
653 struct pci_dev *dev = NULL; 654 struct pci_dev *dev = NULL;
654 655
655 /* request for kernel access to the next PCI device, if any, 656 for_each_pci_dev(dev)
656 * and while we are looking at it have its reference count
657 * bumped until we are done with it
658 */
659 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
660 fn(dev); 657 fn(dev);
661 }
662} 658}
663 659
664/* 660/*
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
index c769f477fd22..7ea4cc2e8bd2 100644
--- a/drivers/edac/highbank_mc_edac.c
+++ b/drivers/edac/highbank_mc_edac.c
@@ -113,14 +113,8 @@ static ssize_t highbank_mc_err_inject_write(struct file *file,
113 return count; 113 return count;
114} 114}
115 115
116static int debugfs_open(struct inode *inode, struct file *file)
117{
118 file->private_data = inode->i_private;
119 return 0;
120}
121
122static const struct file_operations highbank_mc_debug_inject_fops = { 116static const struct file_operations highbank_mc_debug_inject_fops = {
123 .open = debugfs_open, 117 .open = simple_open,
124 .write = highbank_mc_err_inject_write, 118 .write = highbank_mc_err_inject_write,
125 .llseek = generic_file_llseek, 119 .llseek = generic_file_llseek,
126}; 120};
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index a09d0667f72a..9d669cd43618 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -197,8 +197,8 @@ static const char *ferr_fat_fbd_name[] = {
197 [0] = "Memory Write error on non-redundant retry or " 197 [0] = "Memory Write error on non-redundant retry or "
198 "FBD configuration Write error on retry", 198 "FBD configuration Write error on retry",
199}; 199};
200#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28)) 200#define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3)
201#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) 201#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22))
202 202
203#define FERR_NF_FBD 0xa0 203#define FERR_NF_FBD 0xa0
204static const char *ferr_nf_fbd_name[] = { 204static const char *ferr_nf_fbd_name[] = {
@@ -225,7 +225,7 @@ static const char *ferr_nf_fbd_name[] = {
225 [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", 225 [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
226 [0] = "Uncorrectable Data ECC on Replay", 226 [0] = "Uncorrectable Data ECC on Replay",
227}; 227};
228#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28)) 228#define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3)
229#define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\ 229#define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\
230 (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\ 230 (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\
231 (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\ 231 (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\
@@ -464,7 +464,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
464 errnum = find_first_bit(&errors, 464 errnum = find_first_bit(&errors,
465 ARRAY_SIZE(ferr_nf_fbd_name)); 465 ARRAY_SIZE(ferr_nf_fbd_name));
466 specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); 466 specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
467 branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; 467 branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0;
468 468
469 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, 469 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
470 REDMEMA, &syndrome); 470 REDMEMA, &syndrome);
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 3672101023bd..10c8c00d6469 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -816,7 +816,7 @@ static ssize_t i7core_inject_store_##param( \
816 struct device_attribute *mattr, \ 816 struct device_attribute *mattr, \
817 const char *data, size_t count) \ 817 const char *data, size_t count) \
818{ \ 818{ \
819 struct mem_ctl_info *mci = to_mci(dev); \ 819 struct mem_ctl_info *mci = dev_get_drvdata(dev); \
820 struct i7core_pvt *pvt; \ 820 struct i7core_pvt *pvt; \
821 long value; \ 821 long value; \
822 int rc; \ 822 int rc; \
@@ -845,7 +845,7 @@ static ssize_t i7core_inject_show_##param( \
845 struct device_attribute *mattr, \ 845 struct device_attribute *mattr, \
846 char *data) \ 846 char *data) \
847{ \ 847{ \
848 struct mem_ctl_info *mci = to_mci(dev); \ 848 struct mem_ctl_info *mci = dev_get_drvdata(dev); \
849 struct i7core_pvt *pvt; \ 849 struct i7core_pvt *pvt; \
850 \ 850 \
851 pvt = mci->pvt_info; \ 851 pvt = mci->pvt_info; \
@@ -1052,7 +1052,7 @@ static ssize_t i7core_show_counter_##param( \
1052 struct device_attribute *mattr, \ 1052 struct device_attribute *mattr, \
1053 char *data) \ 1053 char *data) \
1054{ \ 1054{ \
1055 struct mem_ctl_info *mci = to_mci(dev); \ 1055 struct mem_ctl_info *mci = dev_get_drvdata(dev); \
1056 struct i7core_pvt *pvt = mci->pvt_info; \ 1056 struct i7core_pvt *pvt = mci->pvt_info; \
1057 \ 1057 \
1058 edac_dbg(1, "\n"); \ 1058 edac_dbg(1, "\n"); \
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 069e26c11c4f..a98020409fa9 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -370,10 +370,6 @@ static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank)
370static void i82975x_init_csrows(struct mem_ctl_info *mci, 370static void i82975x_init_csrows(struct mem_ctl_info *mci,
371 struct pci_dev *pdev, void __iomem *mch_window) 371 struct pci_dev *pdev, void __iomem *mch_window)
372{ 372{
373 static const char *labels[4] = {
374 "DIMM A1", "DIMM A2",
375 "DIMM B1", "DIMM B2"
376 };
377 struct csrow_info *csrow; 373 struct csrow_info *csrow;
378 unsigned long last_cumul_size; 374 unsigned long last_cumul_size;
379 u8 value; 375 u8 value;
@@ -423,9 +419,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
423 dimm = mci->csrows[index]->channels[chan]->dimm; 419 dimm = mci->csrows[index]->channels[chan]->dimm;
424 420
425 dimm->nr_pages = nr_pages / csrow->nr_channels; 421 dimm->nr_pages = nr_pages / csrow->nr_channels;
426 strncpy(csrow->channels[chan]->dimm->label, 422
427 labels[(index >> 1) + (chan * 2)], 423 snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d",
428 EDAC_MC_LABEL_LEN); 424 (chan == 0) ? 'A' : 'B',
425 index);
429 dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ 426 dimm->grain = 1 << 7; /* 128Byte cache-line resolution */
430 dimm->dtype = i82975x_dram_type(mch_window, index); 427 dimm->dtype = i82975x_dram_type(mch_window, index);
431 dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ 428 dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index d0c372e30de4..ad637572d8c7 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -64,7 +64,7 @@ EXPORT_SYMBOL_GPL(to_msgs);
64const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; 64const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
65EXPORT_SYMBOL_GPL(ii_msgs); 65EXPORT_SYMBOL_GPL(ii_msgs);
66 66
67static const char * const f15h_ic_mce_desc[] = { 67static const char * const f15h_mc1_mce_desc[] = {
68 "UC during a demand linefill from L2", 68 "UC during a demand linefill from L2",
69 "Parity error during data load from IC", 69 "Parity error during data load from IC",
70 "Parity error for IC valid bit", 70 "Parity error for IC valid bit",
@@ -84,7 +84,7 @@ static const char * const f15h_ic_mce_desc[] = {
84 "fetch address FIFO" 84 "fetch address FIFO"
85}; 85};
86 86
87static const char * const f15h_cu_mce_desc[] = { 87static const char * const f15h_mc2_mce_desc[] = {
88 "Fill ECC error on data fills", /* xec = 0x4 */ 88 "Fill ECC error on data fills", /* xec = 0x4 */
89 "Fill parity error on insn fills", 89 "Fill parity error on insn fills",
90 "Prefetcher request FIFO parity error", 90 "Prefetcher request FIFO parity error",
@@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = {
101 "PRB address parity error" 101 "PRB address parity error"
102}; 102};
103 103
104static const char * const nb_mce_desc[] = { 104static const char * const mc4_mce_desc[] = {
105 "DRAM ECC error detected on the NB", 105 "DRAM ECC error detected on the NB",
106 "CRC error detected on HT link", 106 "CRC error detected on HT link",
107 "Link-defined sync error packets detected on HT link", 107 "Link-defined sync error packets detected on HT link",
@@ -123,7 +123,7 @@ static const char * const nb_mce_desc[] = {
123 "ECC Error in the Probe Filter directory" 123 "ECC Error in the Probe Filter directory"
124}; 124};
125 125
126static const char * const fr_ex_mce_desc[] = { 126static const char * const mc5_mce_desc[] = {
127 "CPU Watchdog timer expire", 127 "CPU Watchdog timer expire",
128 "Wakeup array dest tag", 128 "Wakeup array dest tag",
129 "AG payload array", 129 "AG payload array",
@@ -139,7 +139,7 @@ static const char * const fr_ex_mce_desc[] = {
139 "DE error occurred" 139 "DE error occurred"
140}; 140};
141 141
142static bool f12h_dc_mce(u16 ec, u8 xec) 142static bool f12h_mc0_mce(u16 ec, u8 xec)
143{ 143{
144 bool ret = false; 144 bool ret = false;
145 145
@@ -157,26 +157,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec)
157 return ret; 157 return ret;
158} 158}
159 159
160static bool f10h_dc_mce(u16 ec, u8 xec) 160static bool f10h_mc0_mce(u16 ec, u8 xec)
161{ 161{
162 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { 162 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
163 pr_cont("during data scrub.\n"); 163 pr_cont("during data scrub.\n");
164 return true; 164 return true;
165 } 165 }
166 return f12h_dc_mce(ec, xec); 166 return f12h_mc0_mce(ec, xec);
167} 167}
168 168
169static bool k8_dc_mce(u16 ec, u8 xec) 169static bool k8_mc0_mce(u16 ec, u8 xec)
170{ 170{
171 if (BUS_ERROR(ec)) { 171 if (BUS_ERROR(ec)) {
172 pr_cont("during system linefill.\n"); 172 pr_cont("during system linefill.\n");
173 return true; 173 return true;
174 } 174 }
175 175
176 return f10h_dc_mce(ec, xec); 176 return f10h_mc0_mce(ec, xec);
177} 177}
178 178
179static bool f14h_dc_mce(u16 ec, u8 xec) 179static bool f14h_mc0_mce(u16 ec, u8 xec)
180{ 180{
181 u8 r4 = R4(ec); 181 u8 r4 = R4(ec);
182 bool ret = true; 182 bool ret = true;
@@ -228,7 +228,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec)
228 return ret; 228 return ret;
229} 229}
230 230
231static bool f15h_dc_mce(u16 ec, u8 xec) 231static bool f15h_mc0_mce(u16 ec, u8 xec)
232{ 232{
233 bool ret = true; 233 bool ret = true;
234 234
@@ -275,12 +275,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec)
275 return ret; 275 return ret;
276} 276}
277 277
278static void amd_decode_dc_mce(struct mce *m) 278static void decode_mc0_mce(struct mce *m)
279{ 279{
280 u16 ec = EC(m->status); 280 u16 ec = EC(m->status);
281 u8 xec = XEC(m->status, xec_mask); 281 u8 xec = XEC(m->status, xec_mask);
282 282
283 pr_emerg(HW_ERR "Data Cache Error: "); 283 pr_emerg(HW_ERR "MC0 Error: ");
284 284
285 /* TLB error signatures are the same across families */ 285 /* TLB error signatures are the same across families */
286 if (TLB_ERROR(ec)) { 286 if (TLB_ERROR(ec)) {
@@ -290,13 +290,13 @@ static void amd_decode_dc_mce(struct mce *m)
290 : (xec ? "multimatch" : "parity"))); 290 : (xec ? "multimatch" : "parity")));
291 return; 291 return;
292 } 292 }
293 } else if (fam_ops->dc_mce(ec, xec)) 293 } else if (fam_ops->mc0_mce(ec, xec))
294 ; 294 ;
295 else 295 else
296 pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); 296 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
297} 297}
298 298
299static bool k8_ic_mce(u16 ec, u8 xec) 299static bool k8_mc1_mce(u16 ec, u8 xec)
300{ 300{
301 u8 ll = LL(ec); 301 u8 ll = LL(ec);
302 bool ret = true; 302 bool ret = true;
@@ -330,7 +330,7 @@ static bool k8_ic_mce(u16 ec, u8 xec)
330 return ret; 330 return ret;
331} 331}
332 332
333static bool f14h_ic_mce(u16 ec, u8 xec) 333static bool f14h_mc1_mce(u16 ec, u8 xec)
334{ 334{
335 u8 r4 = R4(ec); 335 u8 r4 = R4(ec);
336 bool ret = true; 336 bool ret = true;
@@ -349,7 +349,7 @@ static bool f14h_ic_mce(u16 ec, u8 xec)
349 return ret; 349 return ret;
350} 350}
351 351
352static bool f15h_ic_mce(u16 ec, u8 xec) 352static bool f15h_mc1_mce(u16 ec, u8 xec)
353{ 353{
354 bool ret = true; 354 bool ret = true;
355 355
@@ -358,19 +358,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
358 358
359 switch (xec) { 359 switch (xec) {
360 case 0x0 ... 0xa: 360 case 0x0 ... 0xa:
361 pr_cont("%s.\n", f15h_ic_mce_desc[xec]); 361 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
362 break; 362 break;
363 363
364 case 0xd: 364 case 0xd:
365 pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); 365 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
366 break; 366 break;
367 367
368 case 0x10: 368 case 0x10:
369 pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]); 369 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
370 break; 370 break;
371 371
372 case 0x11 ... 0x14: 372 case 0x11 ... 0x14:
373 pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); 373 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
374 break; 374 break;
375 375
376 default: 376 default:
@@ -379,12 +379,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
379 return ret; 379 return ret;
380} 380}
381 381
382static void amd_decode_ic_mce(struct mce *m) 382static void decode_mc1_mce(struct mce *m)
383{ 383{
384 u16 ec = EC(m->status); 384 u16 ec = EC(m->status);
385 u8 xec = XEC(m->status, xec_mask); 385 u8 xec = XEC(m->status, xec_mask);
386 386
387 pr_emerg(HW_ERR "Instruction Cache Error: "); 387 pr_emerg(HW_ERR "MC1 Error: ");
388 388
389 if (TLB_ERROR(ec)) 389 if (TLB_ERROR(ec))
390 pr_cont("%s TLB %s.\n", LL_MSG(ec), 390 pr_cont("%s TLB %s.\n", LL_MSG(ec),
@@ -393,18 +393,18 @@ static void amd_decode_ic_mce(struct mce *m)
393 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); 393 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
394 394
395 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); 395 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
396 } else if (fam_ops->ic_mce(ec, xec)) 396 } else if (fam_ops->mc1_mce(ec, xec))
397 ; 397 ;
398 else 398 else
399 pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); 399 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
400} 400}
401 401
402static void amd_decode_bu_mce(struct mce *m) 402static void decode_mc2_mce(struct mce *m)
403{ 403{
404 u16 ec = EC(m->status); 404 u16 ec = EC(m->status);
405 u8 xec = XEC(m->status, xec_mask); 405 u8 xec = XEC(m->status, xec_mask);
406 406
407 pr_emerg(HW_ERR "Bus Unit Error"); 407 pr_emerg(HW_ERR "MC2 Error");
408 408
409 if (xec == 0x1) 409 if (xec == 0x1)
410 pr_cont(" in the write data buffers.\n"); 410 pr_cont(" in the write data buffers.\n");
@@ -429,24 +429,24 @@ static void amd_decode_bu_mce(struct mce *m)
429 pr_cont(": %s parity/ECC error during data " 429 pr_cont(": %s parity/ECC error during data "
430 "access from L2.\n", R4_MSG(ec)); 430 "access from L2.\n", R4_MSG(ec));
431 else 431 else
432 goto wrong_bu_mce; 432 goto wrong_mc2_mce;
433 } else 433 } else
434 goto wrong_bu_mce; 434 goto wrong_mc2_mce;
435 } else 435 } else
436 goto wrong_bu_mce; 436 goto wrong_mc2_mce;
437 437
438 return; 438 return;
439 439
440wrong_bu_mce: 440 wrong_mc2_mce:
441 pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); 441 pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
442} 442}
443 443
444static void amd_decode_cu_mce(struct mce *m) 444static void decode_f15_mc2_mce(struct mce *m)
445{ 445{
446 u16 ec = EC(m->status); 446 u16 ec = EC(m->status);
447 u8 xec = XEC(m->status, xec_mask); 447 u8 xec = XEC(m->status, xec_mask);
448 448
449 pr_emerg(HW_ERR "Combined Unit Error: "); 449 pr_emerg(HW_ERR "MC2 Error: ");
450 450
451 if (TLB_ERROR(ec)) { 451 if (TLB_ERROR(ec)) {
452 if (xec == 0x0) 452 if (xec == 0x0)
@@ -454,63 +454,63 @@ static void amd_decode_cu_mce(struct mce *m)
454 else if (xec == 0x1) 454 else if (xec == 0x1)
455 pr_cont("Poison data provided for TLB fill.\n"); 455 pr_cont("Poison data provided for TLB fill.\n");
456 else 456 else
457 goto wrong_cu_mce; 457 goto wrong_f15_mc2_mce;
458 } else if (BUS_ERROR(ec)) { 458 } else if (BUS_ERROR(ec)) {
459 if (xec > 2) 459 if (xec > 2)
460 goto wrong_cu_mce; 460 goto wrong_f15_mc2_mce;
461 461
462 pr_cont("Error during attempted NB data read.\n"); 462 pr_cont("Error during attempted NB data read.\n");
463 } else if (MEM_ERROR(ec)) { 463 } else if (MEM_ERROR(ec)) {
464 switch (xec) { 464 switch (xec) {
465 case 0x4 ... 0xc: 465 case 0x4 ... 0xc:
466 pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); 466 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
467 break; 467 break;
468 468
469 case 0x10 ... 0x14: 469 case 0x10 ... 0x14:
470 pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); 470 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
471 break; 471 break;
472 472
473 default: 473 default:
474 goto wrong_cu_mce; 474 goto wrong_f15_mc2_mce;
475 } 475 }
476 } 476 }
477 477
478 return; 478 return;
479 479
480wrong_cu_mce: 480 wrong_f15_mc2_mce:
481 pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); 481 pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
482} 482}
483 483
484static void amd_decode_ls_mce(struct mce *m) 484static void decode_mc3_mce(struct mce *m)
485{ 485{
486 u16 ec = EC(m->status); 486 u16 ec = EC(m->status);
487 u8 xec = XEC(m->status, xec_mask); 487 u8 xec = XEC(m->status, xec_mask);
488 488
489 if (boot_cpu_data.x86 >= 0x14) { 489 if (boot_cpu_data.x86 >= 0x14) {
490 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," 490 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
491 " please report on LKML.\n"); 491 " please report on LKML.\n");
492 return; 492 return;
493 } 493 }
494 494
495 pr_emerg(HW_ERR "Load Store Error"); 495 pr_emerg(HW_ERR "MC3 Error");
496 496
497 if (xec == 0x0) { 497 if (xec == 0x0) {
498 u8 r4 = R4(ec); 498 u8 r4 = R4(ec);
499 499
500 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) 500 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
501 goto wrong_ls_mce; 501 goto wrong_mc3_mce;
502 502
503 pr_cont(" during %s.\n", R4_MSG(ec)); 503 pr_cont(" during %s.\n", R4_MSG(ec));
504 } else 504 } else
505 goto wrong_ls_mce; 505 goto wrong_mc3_mce;
506 506
507 return; 507 return;
508 508
509wrong_ls_mce: 509 wrong_mc3_mce:
510 pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); 510 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
511} 511}
512 512
513void amd_decode_nb_mce(struct mce *m) 513static void decode_mc4_mce(struct mce *m)
514{ 514{
515 struct cpuinfo_x86 *c = &boot_cpu_data; 515 struct cpuinfo_x86 *c = &boot_cpu_data;
516 int node_id = amd_get_nb_id(m->extcpu); 516 int node_id = amd_get_nb_id(m->extcpu);
@@ -518,7 +518,7 @@ void amd_decode_nb_mce(struct mce *m)
518 u8 xec = XEC(m->status, 0x1f); 518 u8 xec = XEC(m->status, 0x1f);
519 u8 offset = 0; 519 u8 offset = 0;
520 520
521 pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); 521 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
522 522
523 switch (xec) { 523 switch (xec) {
524 case 0x0 ... 0xe: 524 case 0x0 ... 0xe:
@@ -527,9 +527,9 @@ void amd_decode_nb_mce(struct mce *m)
527 if (xec == 0x0 || xec == 0x8) { 527 if (xec == 0x0 || xec == 0x8) {
528 /* no ECCs on F11h */ 528 /* no ECCs on F11h */
529 if (c->x86 == 0x11) 529 if (c->x86 == 0x11)
530 goto wrong_nb_mce; 530 goto wrong_mc4_mce;
531 531
532 pr_cont("%s.\n", nb_mce_desc[xec]); 532 pr_cont("%s.\n", mc4_mce_desc[xec]);
533 533
534 if (nb_bus_decoder) 534 if (nb_bus_decoder)
535 nb_bus_decoder(node_id, m); 535 nb_bus_decoder(node_id, m);
@@ -543,14 +543,14 @@ void amd_decode_nb_mce(struct mce *m)
543 else if (BUS_ERROR(ec)) 543 else if (BUS_ERROR(ec))
544 pr_cont("DMA Exclusion Vector Table Walk error.\n"); 544 pr_cont("DMA Exclusion Vector Table Walk error.\n");
545 else 545 else
546 goto wrong_nb_mce; 546 goto wrong_mc4_mce;
547 return; 547 return;
548 548
549 case 0x19: 549 case 0x19:
550 if (boot_cpu_data.x86 == 0x15) 550 if (boot_cpu_data.x86 == 0x15)
551 pr_cont("Compute Unit Data Error.\n"); 551 pr_cont("Compute Unit Data Error.\n");
552 else 552 else
553 goto wrong_nb_mce; 553 goto wrong_mc4_mce;
554 return; 554 return;
555 555
556 case 0x1c ... 0x1f: 556 case 0x1c ... 0x1f:
@@ -558,46 +558,44 @@ void amd_decode_nb_mce(struct mce *m)
558 break; 558 break;
559 559
560 default: 560 default:
561 goto wrong_nb_mce; 561 goto wrong_mc4_mce;
562 } 562 }
563 563
564 pr_cont("%s.\n", nb_mce_desc[xec - offset]); 564 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
565 return; 565 return;
566 566
567wrong_nb_mce: 567 wrong_mc4_mce:
568 pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); 568 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
569} 569}
570EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
571 570
572static void amd_decode_fr_mce(struct mce *m) 571static void decode_mc5_mce(struct mce *m)
573{ 572{
574 struct cpuinfo_x86 *c = &boot_cpu_data; 573 struct cpuinfo_x86 *c = &boot_cpu_data;
575 u8 xec = XEC(m->status, xec_mask); 574 u8 xec = XEC(m->status, xec_mask);
576 575
577 if (c->x86 == 0xf || c->x86 == 0x11) 576 if (c->x86 == 0xf || c->x86 == 0x11)
578 goto wrong_fr_mce; 577 goto wrong_mc5_mce;
579 578
580 pr_emerg(HW_ERR "%s Error: ", 579 pr_emerg(HW_ERR "MC5 Error: ");
581 (c->x86 == 0x15 ? "Execution Unit" : "FIROB"));
582 580
583 if (xec == 0x0 || xec == 0xc) 581 if (xec == 0x0 || xec == 0xc)
584 pr_cont("%s.\n", fr_ex_mce_desc[xec]); 582 pr_cont("%s.\n", mc5_mce_desc[xec]);
585 else if (xec < 0xd) 583 else if (xec < 0xd)
586 pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); 584 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
587 else 585 else
588 goto wrong_fr_mce; 586 goto wrong_mc5_mce;
589 587
590 return; 588 return;
591 589
592wrong_fr_mce: 590 wrong_mc5_mce:
593 pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); 591 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
594} 592}
595 593
596static void amd_decode_fp_mce(struct mce *m) 594static void decode_mc6_mce(struct mce *m)
597{ 595{
598 u8 xec = XEC(m->status, xec_mask); 596 u8 xec = XEC(m->status, xec_mask);
599 597
600 pr_emerg(HW_ERR "Floating Point Unit Error: "); 598 pr_emerg(HW_ERR "MC6 Error: ");
601 599
602 switch (xec) { 600 switch (xec) {
603 case 0x1: 601 case 0x1:
@@ -621,7 +619,7 @@ static void amd_decode_fp_mce(struct mce *m)
621 break; 619 break;
622 620
623 default: 621 default:
624 goto wrong_fp_mce; 622 goto wrong_mc6_mce;
625 break; 623 break;
626 } 624 }
627 625
@@ -629,8 +627,8 @@ static void amd_decode_fp_mce(struct mce *m)
629 627
630 return; 628 return;
631 629
632wrong_fp_mce: 630 wrong_mc6_mce:
633 pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); 631 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
634} 632}
635 633
636static inline void amd_decode_err_code(u16 ec) 634static inline void amd_decode_err_code(u16 ec)
@@ -669,74 +667,94 @@ static bool amd_filter_mce(struct mce *m)
669 return false; 667 return false;
670} 668}
671 669
670static const char *decode_error_status(struct mce *m)
671{
672 if (m->status & MCI_STATUS_UC) {
673 if (m->status & MCI_STATUS_PCC)
674 return "System Fatal error.";
675 if (m->mcgstatus & MCG_STATUS_RIPV)
676 return "Uncorrected, software restartable error.";
677 return "Uncorrected, software containable error.";
678 }
679
680 if (m->status & MCI_STATUS_DEFERRED)
681 return "Deferred error.";
682
683 return "Corrected error, no action required.";
684}
685
672int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) 686int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
673{ 687{
674 struct mce *m = (struct mce *)data; 688 struct mce *m = (struct mce *)data;
675 struct cpuinfo_x86 *c = &boot_cpu_data; 689 struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
676 int ecc; 690 int ecc;
677 691
678 if (amd_filter_mce(m)) 692 if (amd_filter_mce(m))
679 return NOTIFY_STOP; 693 return NOTIFY_STOP;
680 694
681 pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s",
682 m->extcpu, m->bank,
683 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
684 ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
685 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
686 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
687 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
688
689 if (c->x86 == 0x15)
690 pr_cont("|%s|%s",
691 ((m->status & BIT_64(44)) ? "Deferred" : "-"),
692 ((m->status & BIT_64(43)) ? "Poison" : "-"));
693
694 /* do the two bits[14:13] together */
695 ecc = (m->status >> 45) & 0x3;
696 if (ecc)
697 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
698
699 pr_cont("]: 0x%016llx\n", m->status);
700
701 if (m->status & MCI_STATUS_ADDRV)
702 pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
703
704 switch (m->bank) { 695 switch (m->bank) {
705 case 0: 696 case 0:
706 amd_decode_dc_mce(m); 697 decode_mc0_mce(m);
707 break; 698 break;
708 699
709 case 1: 700 case 1:
710 amd_decode_ic_mce(m); 701 decode_mc1_mce(m);
711 break; 702 break;
712 703
713 case 2: 704 case 2:
714 if (c->x86 == 0x15) 705 if (c->x86 == 0x15)
715 amd_decode_cu_mce(m); 706 decode_f15_mc2_mce(m);
716 else 707 else
717 amd_decode_bu_mce(m); 708 decode_mc2_mce(m);
718 break; 709 break;
719 710
720 case 3: 711 case 3:
721 amd_decode_ls_mce(m); 712 decode_mc3_mce(m);
722 break; 713 break;
723 714
724 case 4: 715 case 4:
725 amd_decode_nb_mce(m); 716 decode_mc4_mce(m);
726 break; 717 break;
727 718
728 case 5: 719 case 5:
729 amd_decode_fr_mce(m); 720 decode_mc5_mce(m);
730 break; 721 break;
731 722
732 case 6: 723 case 6:
733 amd_decode_fp_mce(m); 724 decode_mc6_mce(m);
734 break; 725 break;
735 726
736 default: 727 default:
737 break; 728 break;
738 } 729 }
739 730
731 pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m));
732
733 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
734 m->extcpu,
735 c->x86, c->x86_model, c->x86_mask,
736 m->bank,
737 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
738 ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
739 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
740 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
741 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
742
743 if (c->x86 == 0x15)
744 pr_cont("|%s|%s",
745 ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
746 ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
747
748 /* do the two bits[14:13] together */
749 ecc = (m->status >> 45) & 0x3;
750 if (ecc)
751 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
752
753 pr_cont("]: 0x%016llx\n", m->status);
754
755 if (m->status & MCI_STATUS_ADDRV)
756 pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
757
740 amd_decode_err_code(m->status & 0xffff); 758 amd_decode_err_code(m->status & 0xffff);
741 759
742 return NOTIFY_STOP; 760 return NOTIFY_STOP;
@@ -763,35 +781,35 @@ static int __init mce_amd_init(void)
763 781
764 switch (c->x86) { 782 switch (c->x86) {
765 case 0xf: 783 case 0xf:
766 fam_ops->dc_mce = k8_dc_mce; 784 fam_ops->mc0_mce = k8_mc0_mce;
767 fam_ops->ic_mce = k8_ic_mce; 785 fam_ops->mc1_mce = k8_mc1_mce;
768 break; 786 break;
769 787
770 case 0x10: 788 case 0x10:
771 fam_ops->dc_mce = f10h_dc_mce; 789 fam_ops->mc0_mce = f10h_mc0_mce;
772 fam_ops->ic_mce = k8_ic_mce; 790 fam_ops->mc1_mce = k8_mc1_mce;
773 break; 791 break;
774 792
775 case 0x11: 793 case 0x11:
776 fam_ops->dc_mce = k8_dc_mce; 794 fam_ops->mc0_mce = k8_mc0_mce;
777 fam_ops->ic_mce = k8_ic_mce; 795 fam_ops->mc1_mce = k8_mc1_mce;
778 break; 796 break;
779 797
780 case 0x12: 798 case 0x12:
781 fam_ops->dc_mce = f12h_dc_mce; 799 fam_ops->mc0_mce = f12h_mc0_mce;
782 fam_ops->ic_mce = k8_ic_mce; 800 fam_ops->mc1_mce = k8_mc1_mce;
783 break; 801 break;
784 802
785 case 0x14: 803 case 0x14:
786 nb_err_cpumask = 0x3; 804 nb_err_cpumask = 0x3;
787 fam_ops->dc_mce = f14h_dc_mce; 805 fam_ops->mc0_mce = f14h_mc0_mce;
788 fam_ops->ic_mce = f14h_ic_mce; 806 fam_ops->mc1_mce = f14h_mc1_mce;
789 break; 807 break;
790 808
791 case 0x15: 809 case 0x15:
792 xec_mask = 0x1f; 810 xec_mask = 0x1f;
793 fam_ops->dc_mce = f15h_dc_mce; 811 fam_ops->mc0_mce = f15h_mc0_mce;
794 fam_ops->ic_mce = f15h_ic_mce; 812 fam_ops->mc1_mce = f15h_mc1_mce;
795 break; 813 break;
796 814
797 default: 815 default:
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 8c87a5e87057..679679951e23 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -29,10 +29,8 @@
29#define R4(x) (((x) >> 4) & 0xf) 29#define R4(x) (((x) >> 4) & 0xf)
30#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") 30#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
31 31
32/* 32#define MCI_STATUS_DEFERRED BIT_64(44)
33 * F3x4C bits (MCi_STATUS' high half) 33#define MCI_STATUS_POISON BIT_64(43)
34 */
35#define NBSH_ERR_CPU_VAL BIT(24)
36 34
37enum tt_ids { 35enum tt_ids {
38 TT_INSTR = 0, 36 TT_INSTR = 0,
@@ -78,14 +76,13 @@ extern const char * const ii_msgs[];
78 * per-family decoder ops 76 * per-family decoder ops
79 */ 77 */
80struct amd_decoder_ops { 78struct amd_decoder_ops {
81 bool (*dc_mce)(u16, u8); 79 bool (*mc0_mce)(u16, u8);
82 bool (*ic_mce)(u16, u8); 80 bool (*mc1_mce)(u16, u8);
83}; 81};
84 82
85void amd_report_gart_errors(bool); 83void amd_report_gart_errors(bool);
86void amd_register_ecc_decoder(void (*f)(int, struct mce *)); 84void amd_register_ecc_decoder(void (*f)(int, struct mce *));
87void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); 85void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
88void amd_decode_nb_mce(struct mce *);
89int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); 86int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
90 87
91#endif /* _EDAC_MCE_AMD_H */ 88#endif /* _EDAC_MCE_AMD_H */