aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorBorislav Petkov <borislav.petkov@amd.com>2009-11-13 09:10:43 -0500
committerBorislav Petkov <borislav.petkov@amd.com>2009-12-08 07:38:12 -0500
commitbdc30a0c8c7427a1c1d2e4d149d372d4d77781ee (patch)
treea990bedfacf041586af4d6fdf59389c975114a5d /drivers
parentbfc04aec7d687282b5e7adb26799d3eb50d05f01 (diff)
amd64_edac: correct sys address to chip select mapping
The routine does the reverse mapping of the error address of a CECC back to the node id, DRAM controller and chip select of the DIMM which caused the error. We should lookup the channel using the syndromes _only_ when the DCTs are ganged so fix that. Also, add an early exit when there's an error while scanning for the csrow thus decreasing indentation levels for better readability. Finally, fixup comments. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/edac/amd64_edac.c58
1 files changed, 27 insertions, 31 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 0969a404f84f..533f5ff2ec33 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1645,10 +1645,11 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
1645} 1645}
1646 1646
1647/* 1647/*
1648 * This the F10h reference code from AMD to map a @sys_addr to NodeID, 1648 * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
1649 * CSROW, Channel. 1649 * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1650 * 1650 *
1651 * The @sys_addr is usually an error address received from the hardware. 1651 * The @sys_addr is usually an error address received from the hardware
1652 * (MCX_ADDR).
1652 */ 1653 */
1653static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 1654static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1654 struct err_regs *info, 1655 struct err_regs *info,
@@ -1661,39 +1662,34 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
1661 1662
1662 csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); 1663 csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
1663 1664
1664 if (csrow >= 0) { 1665 if (csrow < 0) {
1665 error_address_to_page_and_offset(sys_addr, &page, &offset); 1666 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1667 return;
1668 }
1669
1670 error_address_to_page_and_offset(sys_addr, &page, &offset);
1666 1671
1667 syndrome = HIGH_SYNDROME(info->nbsl) << 8; 1672 syndrome = HIGH_SYNDROME(info->nbsl) << 8;
1668 syndrome |= LOW_SYNDROME(info->nbsh); 1673 syndrome |= LOW_SYNDROME(info->nbsh);
1674
1675 /*
1676 * We need the syndromes for channel detection only when we're
1677 * ganged. Otherwise @chan should already contain the channel at
1678 * this point.
1679 */
1680 if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL)
1681 chan = get_channel_from_ecc_syndrome(mci, syndrome);
1669 1682
1683 if (chan >= 0)
1684 edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
1685 EDAC_MOD_STR);
1686 else
1670 /* 1687 /*
1671 * Is CHIPKILL on? If so, then we can attempt to use the 1688 * Channel unknown, report all channels on this CSROW as failed.
1672 * syndrome to isolate which channel the error was on.
1673 */ 1689 */
1674 if (pvt->nbcfg & K8_NBCFG_CHIPKILL) 1690 for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
1675 chan = get_channel_from_ecc_syndrome(mci, syndrome);
1676
1677 if (chan >= 0) {
1678 edac_mc_handle_ce(mci, page, offset, syndrome, 1691 edac_mc_handle_ce(mci, page, offset, syndrome,
1679 csrow, chan, EDAC_MOD_STR); 1692 csrow, chan, EDAC_MOD_STR);
1680 } else {
1681 /*
1682 * Channel unknown, report all channels on this
1683 * CSROW as failed.
1684 */
1685 for (chan = 0; chan < mci->csrows[csrow].nr_channels;
1686 chan++) {
1687 edac_mc_handle_ce(mci, page, offset,
1688 syndrome,
1689 csrow, chan,
1690 EDAC_MOD_STR);
1691 }
1692 }
1693
1694 } else {
1695 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
1696 }
1697} 1693}
1698 1694
1699/* 1695/*