aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-13 12:03:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-13 12:03:52 -0500
commitdaf34710a9e8849e04867d206692dc42d6d22263 (patch)
treec15e27dc0921d5545aab24673130f27ed5c4b569
parent9346116d148595a28fe3521f81ac8e14d93239c3 (diff)
parent0de2788447b67891a31a156c0206fd159e4a8981 (diff)
Merge tag 'edac_for_4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov: - add KNM support to sb_edac (Piotr Luc) - add AMD Zen support to amd64_edac (Yazen Ghannam) - misc small cleanups, improvements and fixes (Colin Ian King, Dave Hansen, Pan Bian, Thor Thayer, Wei Yongjun, Yanjiang Jin, yours truly) * tag 'edac_for_4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (26 commits) EDAC, amd64: Fix improper return value EDAC, amd64: Improve amd64-specific printing macros EDAC, amd64: Autoload amd64_edac_mod on Fam17h systems EDAC, amd64: Define and register UMC error decode function EDAC, amd64: Determine EDAC capabilities on Fam17h systems EDAC, amd64: Determine EDAC MC capabilities on Fam17h EDAC, amd64: Add Fam17h debug output EDAC, amd64: Add Fam17h scrubber support EDAC, mce_amd: Don't report poison bit on Fam15h, bank 4 EDAC, amd64: Read MC registers on AMD Fam17h EDAC, amd64: Reserve correct PCI devices on AMD Fam17h EDAC, amd64: Add AMD Fam17h family type and ops EDAC, amd64: Extend ecc_enabled() to Fam17h EDAC, amd64: Don't force-enable ECC checking on newer systems EDAC, amd64: Add Deferred Error type EDAC, amd64: Rename __log_bus_error() to be more specific EDAC, amd64: Change target of pci_name from F2 to F3 EDAC, mce_amd: Rename nb_bus_decoder to dram_ecc_decoder EDAC: Add LRDDR4 DRAM type EDAC, mpc85xx: Implement remove method for the platform driver ...
-rw-r--r--drivers/edac/altera_edac.c4
-rw-r--r--drivers/edac/amd64_edac.c692
-rw-r--r--drivers/edac/amd64_edac.h56
-rw-r--r--drivers/edac/edac_mc.c49
-rw-r--r--drivers/edac/mce_amd.c32
-rw-r--r--drivers/edac/mpc85xx_edac.c17
-rw-r--r--drivers/edac/sb_edac.c14
-rw-r--r--drivers/edac/skx_edac.c7
-rw-r--r--drivers/edac/xgene_edac.c6
-rw-r--r--include/linux/edac.h8
10 files changed, 727 insertions, 158 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 58d3e2b39b5b..6421cc3c7dc1 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -153,13 +153,17 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
153 if (count == 3) { 153 if (count == 3) {
154 edac_printk(KERN_ALERT, EDAC_MC, 154 edac_printk(KERN_ALERT, EDAC_MC,
155 "Inject Double bit error\n"); 155 "Inject Double bit error\n");
156 local_irq_disable();
156 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, 157 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
157 (read_reg | priv->ue_set_mask)); 158 (read_reg | priv->ue_set_mask));
159 local_irq_enable();
158 } else { 160 } else {
159 edac_printk(KERN_ALERT, EDAC_MC, 161 edac_printk(KERN_ALERT, EDAC_MC,
160 "Inject Single bit error\n"); 162 "Inject Single bit error\n");
163 local_irq_disable();
161 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, 164 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
162 (read_reg | priv->ce_set_mask)); 165 (read_reg | priv->ce_set_mask));
166 local_irq_enable();
163 } 167 }
164 168
165 ptemp[0] = 0x5A5A5A5A; 169 ptemp[0] = 0x5A5A5A5A;
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ee181c53626f..260251177830 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -164,8 +164,23 @@ static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct,
164 * other archs, we might not have access to the caches directly. 164 * other archs, we might not have access to the caches directly.
165 */ 165 */
166 166
167static inline void __f17h_set_scrubval(struct amd64_pvt *pvt, u32 scrubval)
168{
169 /*
170 * Fam17h supports scrub values between 0x5 and 0x14. Also, the values
171 * are shifted down by 0x5, so scrubval 0x5 is written to the register
172 * as 0x0, scrubval 0x6 as 0x1, etc.
173 */
174 if (scrubval >= 0x5 && scrubval <= 0x14) {
175 scrubval -= 0x5;
176 pci_write_bits32(pvt->F6, F17H_SCR_LIMIT_ADDR, scrubval, 0xF);
177 pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 1, 0x1);
178 } else {
179 pci_write_bits32(pvt->F6, F17H_SCR_BASE_ADDR, 0, 0x1);
180 }
181}
167/* 182/*
168 * scan the scrub rate mapping table for a close or matching bandwidth value to 183 * Scan the scrub rate mapping table for a close or matching bandwidth value to
169 * issue. If requested is too big, then use last maximum value found. 184 * issue. If requested is too big, then use last maximum value found.
170 */ 185 */
171static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) 186static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
@@ -196,7 +211,9 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
196 211
197 scrubval = scrubrates[i].scrubval; 212 scrubval = scrubrates[i].scrubval;
198 213
199 if (pvt->fam == 0x15 && pvt->model == 0x60) { 214 if (pvt->fam == 0x17) {
215 __f17h_set_scrubval(pvt, scrubval);
216 } else if (pvt->fam == 0x15 && pvt->model == 0x60) {
200 f15h_select_dct(pvt, 0); 217 f15h_select_dct(pvt, 0);
201 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F); 218 pci_write_bits32(pvt->F2, F15H_M60H_SCRCTRL, scrubval, 0x001F);
202 f15h_select_dct(pvt, 1); 219 f15h_select_dct(pvt, 1);
@@ -233,18 +250,34 @@ static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
233static int get_scrub_rate(struct mem_ctl_info *mci) 250static int get_scrub_rate(struct mem_ctl_info *mci)
234{ 251{
235 struct amd64_pvt *pvt = mci->pvt_info; 252 struct amd64_pvt *pvt = mci->pvt_info;
236 u32 scrubval = 0;
237 int i, retval = -EINVAL; 253 int i, retval = -EINVAL;
254 u32 scrubval = 0;
238 255
239 if (pvt->fam == 0x15) { 256 switch (pvt->fam) {
257 case 0x15:
240 /* Erratum #505 */ 258 /* Erratum #505 */
241 if (pvt->model < 0x10) 259 if (pvt->model < 0x10)
242 f15h_select_dct(pvt, 0); 260 f15h_select_dct(pvt, 0);
243 261
244 if (pvt->model == 0x60) 262 if (pvt->model == 0x60)
245 amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval); 263 amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
246 } else 264 break;
265
266 case 0x17:
267 amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
268 if (scrubval & BIT(0)) {
269 amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
270 scrubval &= 0xF;
271 scrubval += 0x5;
272 } else {
273 scrubval = 0;
274 }
275 break;
276
277 default:
247 amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); 278 amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
279 break;
280 }
248 281
249 scrubval = scrubval & 0x001F; 282 scrubval = scrubval & 0x001F;
250 283
@@ -682,15 +715,33 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
682 */ 715 */
683static unsigned long determine_edac_cap(struct amd64_pvt *pvt) 716static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
684{ 717{
685 u8 bit;
686 unsigned long edac_cap = EDAC_FLAG_NONE; 718 unsigned long edac_cap = EDAC_FLAG_NONE;
719 u8 bit;
687 720
688 bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) 721 if (pvt->umc) {
689 ? 19 722 u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
690 : 17;
691 723
692 if (pvt->dclr0 & BIT(bit)) 724 for (i = 0; i < NUM_UMCS; i++) {
693 edac_cap = EDAC_FLAG_SECDED; 725 if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
726 continue;
727
728 umc_en_mask |= BIT(i);
729
730 /* UMC Configuration bit 12 (DimmEccEn) */
731 if (pvt->umc[i].umc_cfg & BIT(12))
732 dimm_ecc_en_mask |= BIT(i);
733 }
734
735 if (umc_en_mask == dimm_ecc_en_mask)
736 edac_cap = EDAC_FLAG_SECDED;
737 } else {
738 bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
739 ? 19
740 : 17;
741
742 if (pvt->dclr0 & BIT(bit))
743 edac_cap = EDAC_FLAG_SECDED;
744 }
694 745
695 return edac_cap; 746 return edac_cap;
696} 747}
@@ -729,8 +780,75 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
729 (dclr & BIT(15)) ? "yes" : "no"); 780 (dclr & BIT(15)) ? "yes" : "no");
730} 781}
731 782
783static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
784{
785 u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
786 int dimm, size0, size1;
787
788 edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
789
790 for (dimm = 0; dimm < 4; dimm++) {
791 size0 = 0;
792
793 if (dcsb[dimm*2] & DCSB_CS_ENABLE)
794 size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm);
795
796 size1 = 0;
797 if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
798 size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, dimm);
799
800 amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
801 dimm * 2, size0,
802 dimm * 2 + 1, size1);
803 }
804}
805
806static void __dump_misc_regs_df(struct amd64_pvt *pvt)
807{
808 struct amd64_umc *umc;
809 u32 i, tmp, umc_base;
810
811 for (i = 0; i < NUM_UMCS; i++) {
812 umc_base = get_umc_base(i);
813 umc = &pvt->umc[i];
814
815 edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg);
816 edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
817 edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
818 edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
819
820 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ECC_BAD_SYMBOL, &tmp);
821 edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i, tmp);
822
823 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_UMC_CAP, &tmp);
824 edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i, tmp);
825 edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi);
826
827 edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
828 i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no",
829 (umc->umc_cap_hi & BIT(31)) ? "yes" : "no");
830 edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
831 i, (umc->umc_cfg & BIT(12)) ? "yes" : "no");
832 edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
833 i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no");
834 edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
835 i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
836
837 if (pvt->dram_type == MEM_LRDDR4) {
838 amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
839 edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
840 i, 1 << ((tmp >> 4) & 0x3));
841 }
842
843 debug_display_dimm_sizes_df(pvt, i);
844 }
845
846 edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n",
847 pvt->dhar, dhar_base(pvt));
848}
849
732/* Display and decode various NB registers for debug purposes. */ 850/* Display and decode various NB registers for debug purposes. */
733static void dump_misc_regs(struct amd64_pvt *pvt) 851static void __dump_misc_regs(struct amd64_pvt *pvt)
734{ 852{
735 edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); 853 edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
736 854
@@ -750,8 +868,6 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
750 (pvt->fam == 0xf) ? k8_dhar_offset(pvt) 868 (pvt->fam == 0xf) ? k8_dhar_offset(pvt)
751 : f10_dhar_offset(pvt)); 869 : f10_dhar_offset(pvt));
752 870
753 edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
754
755 debug_display_dimm_sizes(pvt, 0); 871 debug_display_dimm_sizes(pvt, 0);
756 872
757 /* everything below this point is Fam10h and above */ 873 /* everything below this point is Fam10h and above */
@@ -760,13 +876,25 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
760 876
761 debug_display_dimm_sizes(pvt, 1); 877 debug_display_dimm_sizes(pvt, 1);
762 878
763 amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
764
765 /* Only if NOT ganged does dclr1 have valid info */ 879 /* Only if NOT ganged does dclr1 have valid info */
766 if (!dct_ganging_enabled(pvt)) 880 if (!dct_ganging_enabled(pvt))
767 debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); 881 debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
768} 882}
769 883
884/* Display and decode various NB registers for debug purposes. */
885static void dump_misc_regs(struct amd64_pvt *pvt)
886{
887 if (pvt->umc)
888 __dump_misc_regs_df(pvt);
889 else
890 __dump_misc_regs(pvt);
891
892 edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
893
894 amd64_info("using %s syndromes.\n",
895 ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
896}
897
770/* 898/*
771 * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] 899 * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
772 */ 900 */
@@ -789,46 +917,78 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
789 */ 917 */
790static void read_dct_base_mask(struct amd64_pvt *pvt) 918static void read_dct_base_mask(struct amd64_pvt *pvt)
791{ 919{
792 int cs; 920 int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
793 921
794 prep_chip_selects(pvt); 922 prep_chip_selects(pvt);
795 923
924 if (pvt->umc) {
925 base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
926 base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
927 mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
928 mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
929 } else {
930 base_reg0 = DCSB0;
931 base_reg1 = DCSB1;
932 mask_reg0 = DCSM0;
933 mask_reg1 = DCSM1;
934 }
935
796 for_each_chip_select(cs, 0, pvt) { 936 for_each_chip_select(cs, 0, pvt) {
797 int reg0 = DCSB0 + (cs * 4); 937 int reg0 = base_reg0 + (cs * 4);
798 int reg1 = DCSB1 + (cs * 4); 938 int reg1 = base_reg1 + (cs * 4);
799 u32 *base0 = &pvt->csels[0].csbases[cs]; 939 u32 *base0 = &pvt->csels[0].csbases[cs];
800 u32 *base1 = &pvt->csels[1].csbases[cs]; 940 u32 *base1 = &pvt->csels[1].csbases[cs];
801 941
802 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) 942 if (pvt->umc) {
803 edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", 943 if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
804 cs, *base0, reg0); 944 edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n",
945 cs, *base0, reg0);
805 946
806 if (pvt->fam == 0xf) 947 if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
807 continue; 948 edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n",
949 cs, *base1, reg1);
950 } else {
951 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
952 edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
953 cs, *base0, reg0);
954
955 if (pvt->fam == 0xf)
956 continue;
808 957
809 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) 958 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
810 edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", 959 edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
811 cs, *base1, (pvt->fam == 0x10) ? reg1 960 cs, *base1, (pvt->fam == 0x10) ? reg1
812 : reg0); 961 : reg0);
962 }
813 } 963 }
814 964
815 for_each_chip_select_mask(cs, 0, pvt) { 965 for_each_chip_select_mask(cs, 0, pvt) {
816 int reg0 = DCSM0 + (cs * 4); 966 int reg0 = mask_reg0 + (cs * 4);
817 int reg1 = DCSM1 + (cs * 4); 967 int reg1 = mask_reg1 + (cs * 4);
818 u32 *mask0 = &pvt->csels[0].csmasks[cs]; 968 u32 *mask0 = &pvt->csels[0].csmasks[cs];
819 u32 *mask1 = &pvt->csels[1].csmasks[cs]; 969 u32 *mask1 = &pvt->csels[1].csmasks[cs];
820 970
821 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) 971 if (pvt->umc) {
822 edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", 972 if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
823 cs, *mask0, reg0); 973 edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n",
974 cs, *mask0, reg0);
824 975
825 if (pvt->fam == 0xf) 976 if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
826 continue; 977 edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n",
978 cs, *mask1, reg1);
979 } else {
980 if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
981 edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
982 cs, *mask0, reg0);
983
984 if (pvt->fam == 0xf)
985 continue;
827 986
828 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) 987 if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
829 edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", 988 edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
830 cs, *mask1, (pvt->fam == 0x10) ? reg1 989 cs, *mask1, (pvt->fam == 0x10) ? reg1
831 : reg0); 990 : reg0);
991 }
832 } 992 }
833} 993}
834 994
@@ -881,6 +1041,15 @@ static void determine_memory_type(struct amd64_pvt *pvt)
881 case 0x16: 1041 case 0x16:
882 goto ddr3; 1042 goto ddr3;
883 1043
1044 case 0x17:
1045 if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
1046 pvt->dram_type = MEM_LRDDR4;
1047 else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
1048 pvt->dram_type = MEM_RDDR4;
1049 else
1050 pvt->dram_type = MEM_DDR4;
1051 return;
1052
884 default: 1053 default:
885 WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); 1054 WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
886 pvt->dram_type = MEM_EMPTY; 1055 pvt->dram_type = MEM_EMPTY;
@@ -1210,6 +1379,19 @@ static int f1x_early_channel_count(struct amd64_pvt *pvt)
1210 return channels; 1379 return channels;
1211} 1380}
1212 1381
1382static int f17_early_channel_count(struct amd64_pvt *pvt)
1383{
1384 int i, channels = 0;
1385
1386 /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
1387 for (i = 0; i < NUM_UMCS; i++)
1388 channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
1389
1390 amd64_info("MCT channel count: %d\n", channels);
1391
1392 return channels;
1393}
1394
1213static int ddr3_cs_size(unsigned i, bool dct_width) 1395static int ddr3_cs_size(unsigned i, bool dct_width)
1214{ 1396{
1215 unsigned shift = 0; 1397 unsigned shift = 0;
@@ -1337,6 +1519,23 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1337 return ddr3_cs_size(cs_mode, false); 1519 return ddr3_cs_size(cs_mode, false);
1338} 1520}
1339 1521
1522static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc,
1523 unsigned int cs_mode, int csrow_nr)
1524{
1525 u32 base_addr = pvt->csels[umc].csbases[csrow_nr];
1526
1527 /* Each mask is used for every two base addresses. */
1528 u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1];
1529
1530 /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1531 u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1;
1532
1533 edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask);
1534
1535 /* Return size in MBs. */
1536 return size >> 10;
1537}
1538
1340static void read_dram_ctl_register(struct amd64_pvt *pvt) 1539static void read_dram_ctl_register(struct amd64_pvt *pvt)
1341{ 1540{
1342 1541
@@ -1897,8 +2096,9 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1897 2096
1898 size0 = 0; 2097 size0 = 0;
1899 if (dcsb[dimm*2] & DCSB_CS_ENABLE) 2098 if (dcsb[dimm*2] & DCSB_CS_ENABLE)
1900 /* For f15m60h, need multiplier for LRDIMM cs_size 2099 /*
1901 * calculation. We pass 'dimm' value to the dbam_to_cs 2100 * For F15m60h, we need multiplier for LRDIMM cs_size
2101 * calculation. We pass dimm value to the dbam_to_cs
1902 * mapper so we can find the multiplier from the 2102 * mapper so we can find the multiplier from the
1903 * corresponding DCSM. 2103 * corresponding DCSM.
1904 */ 2104 */
@@ -1989,6 +2189,15 @@ static struct amd64_family_type family_types[] = {
1989 .dbam_to_cs = f16_dbam_to_chip_select, 2189 .dbam_to_cs = f16_dbam_to_chip_select,
1990 } 2190 }
1991 }, 2191 },
2192 [F17_CPUS] = {
2193 .ctl_name = "F17h",
2194 .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
2195 .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
2196 .ops = {
2197 .early_channel_count = f17_early_channel_count,
2198 .dbam_to_cs = f17_base_addr_to_cs_size,
2199 }
2200 },
1992}; 2201};
1993 2202
1994/* 2203/*
@@ -2155,7 +2364,7 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
2155 return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); 2364 return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
2156} 2365}
2157 2366
2158static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, 2367static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
2159 u8 ecc_type) 2368 u8 ecc_type)
2160{ 2369{
2161 enum hw_event_mc_err_type err_type; 2370 enum hw_event_mc_err_type err_type;
@@ -2165,6 +2374,8 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
2165 err_type = HW_EVENT_ERR_CORRECTED; 2374 err_type = HW_EVENT_ERR_CORRECTED;
2166 else if (ecc_type == 1) 2375 else if (ecc_type == 1)
2167 err_type = HW_EVENT_ERR_UNCORRECTED; 2376 err_type = HW_EVENT_ERR_UNCORRECTED;
2377 else if (ecc_type == 3)
2378 err_type = HW_EVENT_ERR_DEFERRED;
2168 else { 2379 else {
2169 WARN(1, "Something is rotten in the state of Denmark.\n"); 2380 WARN(1, "Something is rotten in the state of Denmark.\n");
2170 return; 2381 return;
@@ -2181,7 +2392,13 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
2181 string = "Failed to map error addr to a csrow"; 2392 string = "Failed to map error addr to a csrow";
2182 break; 2393 break;
2183 case ERR_CHANNEL: 2394 case ERR_CHANNEL:
2184 string = "unknown syndrome - possible error reporting race"; 2395 string = "Unknown syndrome - possible error reporting race";
2396 break;
2397 case ERR_SYND:
2398 string = "MCA_SYND not valid - unknown syndrome and csrow";
2399 break;
2400 case ERR_NORM_ADDR:
2401 string = "Cannot decode normalized address";
2185 break; 2402 break;
2186 default: 2403 default:
2187 string = "WTF error"; 2404 string = "WTF error";
@@ -2227,36 +2444,127 @@ static inline void decode_bus_error(int node_id, struct mce *m)
2227 2444
2228 pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); 2445 pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
2229 2446
2230 __log_bus_error(mci, &err, ecc_type); 2447 __log_ecc_error(mci, &err, ecc_type);
2448}
2449
2450/*
2451 * To find the UMC channel represented by this bank we need to match on its
2452 * instance_id. The instance_id of a bank is held in the lower 32 bits of its
2453 * IPID.
2454 */
2455static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
2456{
2457 u32 umc_instance_id[] = {0x50f00, 0x150f00};
2458 u32 instance_id = m->ipid & GENMASK(31, 0);
2459 int i, channel = -1;
2460
2461 for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
2462 if (umc_instance_id[i] == instance_id)
2463 channel = i;
2464
2465 return channel;
2466}
2467
2468static void decode_umc_error(int node_id, struct mce *m)
2469{
2470 u8 ecc_type = (m->status >> 45) & 0x3;
2471 struct mem_ctl_info *mci;
2472 struct amd64_pvt *pvt;
2473 struct err_info err;
2474 u64 sys_addr;
2475
2476 mci = edac_mc_find(node_id);
2477 if (!mci)
2478 return;
2479
2480 pvt = mci->pvt_info;
2481
2482 memset(&err, 0, sizeof(err));
2483
2484 if (m->status & MCI_STATUS_DEFERRED)
2485 ecc_type = 3;
2486
2487 err.channel = find_umc_channel(pvt, m);
2488 if (err.channel < 0) {
2489 err.err_code = ERR_CHANNEL;
2490 goto log_error;
2491 }
2492
2493 if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
2494 err.err_code = ERR_NORM_ADDR;
2495 goto log_error;
2496 }
2497
2498 error_address_to_page_and_offset(sys_addr, &err);
2499
2500 if (!(m->status & MCI_STATUS_SYNDV)) {
2501 err.err_code = ERR_SYND;
2502 goto log_error;
2503 }
2504
2505 if (ecc_type == 2) {
2506 u8 length = (m->synd >> 18) & 0x3f;
2507
2508 if (length)
2509 err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
2510 else
2511 err.err_code = ERR_CHANNEL;
2512 }
2513
2514 err.csrow = m->synd & 0x7;
2515
2516log_error:
2517 __log_ecc_error(mci, &err, ecc_type);
2231} 2518}
2232 2519
2233/* 2520/*
2234 * Use pvt->F3 which contains the F3 CPU PCI device to get the related 2521 * Use pvt->F3 which contains the F3 CPU PCI device to get the related
2235 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. 2522 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
2523 * Reserve F0 and F6 on systems with a UMC.
2236 */ 2524 */
2237static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id) 2525static int
2238{ 2526reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2)
2527{
2528 if (pvt->umc) {
2529 pvt->F0 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
2530 if (!pvt->F0) {
2531 amd64_err("F0 not found, device 0x%x (broken BIOS?)\n", pci_id1);
2532 return -ENODEV;
2533 }
2534
2535 pvt->F6 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
2536 if (!pvt->F6) {
2537 pci_dev_put(pvt->F0);
2538 pvt->F0 = NULL;
2539
2540 amd64_err("F6 not found: device 0x%x (broken BIOS?)\n", pci_id2);
2541 return -ENODEV;
2542 }
2543
2544 edac_dbg(1, "F0: %s\n", pci_name(pvt->F0));
2545 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2546 edac_dbg(1, "F6: %s\n", pci_name(pvt->F6));
2547
2548 return 0;
2549 }
2550
2239 /* Reserve the ADDRESS MAP Device */ 2551 /* Reserve the ADDRESS MAP Device */
2240 pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3); 2552 pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3);
2241 if (!pvt->F1) { 2553 if (!pvt->F1) {
2242 amd64_err("error address map device not found: " 2554 amd64_err("F1 not found: device 0x%x (broken BIOS?)\n", pci_id1);
2243 "vendor %x device 0x%x (broken BIOS?)\n",
2244 PCI_VENDOR_ID_AMD, f1_id);
2245 return -ENODEV; 2555 return -ENODEV;
2246 } 2556 }
2247 2557
2248 /* Reserve the DCT Device */ 2558 /* Reserve the DCT Device */
2249 pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3); 2559 pvt->F2 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3);
2250 if (!pvt->F2) { 2560 if (!pvt->F2) {
2251 pci_dev_put(pvt->F1); 2561 pci_dev_put(pvt->F1);
2252 pvt->F1 = NULL; 2562 pvt->F1 = NULL;
2253 2563
2254 amd64_err("error F2 device not found: " 2564 amd64_err("F2 not found: device 0x%x (broken BIOS?)\n", pci_id2);
2255 "vendor %x device 0x%x (broken BIOS?)\n",
2256 PCI_VENDOR_ID_AMD, f2_id);
2257
2258 return -ENODEV; 2565 return -ENODEV;
2259 } 2566 }
2567
2260 edac_dbg(1, "F1: %s\n", pci_name(pvt->F1)); 2568 edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
2261 edac_dbg(1, "F2: %s\n", pci_name(pvt->F2)); 2569 edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
2262 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); 2570 edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
@@ -2266,8 +2574,69 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id)
2266 2574
2267static void free_mc_sibling_devs(struct amd64_pvt *pvt) 2575static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2268{ 2576{
2269 pci_dev_put(pvt->F1); 2577 if (pvt->umc) {
2270 pci_dev_put(pvt->F2); 2578 pci_dev_put(pvt->F0);
2579 pci_dev_put(pvt->F6);
2580 } else {
2581 pci_dev_put(pvt->F1);
2582 pci_dev_put(pvt->F2);
2583 }
2584}
2585
2586static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
2587{
2588 pvt->ecc_sym_sz = 4;
2589
2590 if (pvt->umc) {
2591 u8 i;
2592
2593 for (i = 0; i < NUM_UMCS; i++) {
2594 /* Check enabled channels only: */
2595 if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
2596 (pvt->umc[i].ecc_ctrl & BIT(7))) {
2597 pvt->ecc_sym_sz = 8;
2598 break;
2599 }
2600 }
2601
2602 return;
2603 }
2604
2605 if (pvt->fam >= 0x10) {
2606 u32 tmp;
2607
2608 amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2609 /* F16h has only DCT0, so no need to read dbam1. */
2610 if (pvt->fam != 0x16)
2611 amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
2612
2613 /* F10h, revD and later can do x8 ECC too. */
2614 if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
2615 pvt->ecc_sym_sz = 8;
2616 }
2617}
2618
2619/*
2620 * Retrieve the hardware registers of the memory controller.
2621 */
2622static void __read_mc_regs_df(struct amd64_pvt *pvt)
2623{
2624 u8 nid = pvt->mc_node_id;
2625 struct amd64_umc *umc;
2626 u32 i, umc_base;
2627
2628 /* Read registers from each UMC */
2629 for (i = 0; i < NUM_UMCS; i++) {
2630
2631 umc_base = get_umc_base(i);
2632 umc = &pvt->umc[i];
2633
2634 amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
2635 amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
2636 amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
2637 amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
2638 amd_smn_read(nid, umc_base + UMCCH_UMC_CAP_HI, &umc->umc_cap_hi);
2639 }
2271} 2640}
2272 2641
2273/* 2642/*
@@ -2276,24 +2645,31 @@ static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2276 */ 2645 */
2277static void read_mc_regs(struct amd64_pvt *pvt) 2646static void read_mc_regs(struct amd64_pvt *pvt)
2278{ 2647{
2279 unsigned range; 2648 unsigned int range;
2280 u64 msr_val; 2649 u64 msr_val;
2281 u32 tmp;
2282 2650
2283 /* 2651 /*
2284 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since 2652 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2285 * those are Read-As-Zero 2653 * those are Read-As-Zero.
2286 */ 2654 */
2287 rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem); 2655 rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2288 edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); 2656 edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
2289 2657
2290 /* check first whether TOP_MEM2 is enabled */ 2658 /* Check first whether TOP_MEM2 is enabled: */
2291 rdmsrl(MSR_K8_SYSCFG, msr_val); 2659 rdmsrl(MSR_K8_SYSCFG, msr_val);
2292 if (msr_val & (1U << 21)) { 2660 if (msr_val & BIT(21)) {
2293 rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); 2661 rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2294 edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); 2662 edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2295 } else 2663 } else {
2296 edac_dbg(0, " TOP_MEM2 disabled\n"); 2664 edac_dbg(0, " TOP_MEM2 disabled\n");
2665 }
2666
2667 if (pvt->umc) {
2668 __read_mc_regs_df(pvt);
2669 amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
2670
2671 goto skip;
2672 }
2297 2673
2298 amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap); 2674 amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2299 2675
@@ -2322,8 +2698,6 @@ static void read_mc_regs(struct amd64_pvt *pvt)
2322 dram_dst_node(pvt, range)); 2698 dram_dst_node(pvt, range));
2323 } 2699 }
2324 2700
2325 read_dct_base_mask(pvt);
2326
2327 amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar); 2701 amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2328 amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0); 2702 amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0);
2329 2703
@@ -2337,20 +2711,14 @@ static void read_mc_regs(struct amd64_pvt *pvt)
2337 amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1); 2711 amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
2338 } 2712 }
2339 2713
2340 pvt->ecc_sym_sz = 4; 2714skip:
2715 read_dct_base_mask(pvt);
2716
2341 determine_memory_type(pvt); 2717 determine_memory_type(pvt);
2342 edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); 2718 edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
2343 2719
2344 if (pvt->fam >= 0x10) { 2720 determine_ecc_sym_sz(pvt);
2345 amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2346 /* F16h has only DCT0, so no need to read dbam1 */
2347 if (pvt->fam != 0x16)
2348 amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
2349 2721
2350 /* F10h, revD and later can do x8 ECC too */
2351 if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
2352 pvt->ecc_sym_sz = 8;
2353 }
2354 dump_misc_regs(pvt); 2722 dump_misc_regs(pvt);
2355} 2723}
2356 2724
@@ -2420,20 +2788,22 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2420static int init_csrows(struct mem_ctl_info *mci) 2788static int init_csrows(struct mem_ctl_info *mci)
2421{ 2789{
2422 struct amd64_pvt *pvt = mci->pvt_info; 2790 struct amd64_pvt *pvt = mci->pvt_info;
2791 enum edac_type edac_mode = EDAC_NONE;
2423 struct csrow_info *csrow; 2792 struct csrow_info *csrow;
2424 struct dimm_info *dimm; 2793 struct dimm_info *dimm;
2425 enum edac_type edac_mode;
2426 int i, j, empty = 1; 2794 int i, j, empty = 1;
2427 int nr_pages = 0; 2795 int nr_pages = 0;
2428 u32 val; 2796 u32 val;
2429 2797
2430 amd64_read_pci_cfg(pvt->F3, NBCFG, &val); 2798 if (!pvt->umc) {
2799 amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2431 2800
2432 pvt->nbcfg = val; 2801 pvt->nbcfg = val;
2433 2802
2434 edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", 2803 edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2435 pvt->mc_node_id, val, 2804 pvt->mc_node_id, val,
2436 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); 2805 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2806 }
2437 2807
2438 /* 2808 /*
2439 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. 2809 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
@@ -2469,14 +2839,18 @@ static int init_csrows(struct mem_ctl_info *mci)
2469 2839
2470 edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); 2840 edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
2471 2841
2472 /* 2842 /* Determine DIMM ECC mode: */
2473 * determine whether CHIPKILL or JUST ECC or NO ECC is operating 2843 if (pvt->umc) {
2474 */ 2844 if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED)
2475 if (pvt->nbcfg & NBCFG_ECC_ENABLE) 2845 edac_mode = EDAC_S4ECD4ED;
2476 edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? 2846 else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED)
2477 EDAC_S4ECD4ED : EDAC_SECDED; 2847 edac_mode = EDAC_SECDED;
2478 else 2848
2479 edac_mode = EDAC_NONE; 2849 } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
2850 edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL)
2851 ? EDAC_S4ECD4ED
2852 : EDAC_SECDED;
2853 }
2480 2854
2481 for (j = 0; j < pvt->channel_count; j++) { 2855 for (j = 0; j < pvt->channel_count; j++) {
2482 dimm = csrow->channels[j]->dimm; 2856 dimm = csrow->channels[j]->dimm;
@@ -2539,7 +2913,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
2539 2913
2540 if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { 2914 if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2541 amd64_warn("%s: error allocating mask\n", __func__); 2915 amd64_warn("%s: error allocating mask\n", __func__);
2542 return false; 2916 return -ENOMEM;
2543 } 2917 }
2544 2918
2545 get_cpus_on_this_dct_cpumask(cmask, nid); 2919 get_cpus_on_this_dct_cpumask(cmask, nid);
@@ -2627,7 +3001,6 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2627{ 3001{
2628 u32 value, mask = 0x3; /* UECC/CECC enable */ 3002 u32 value, mask = 0x3; /* UECC/CECC enable */
2629 3003
2630
2631 if (!s->nbctl_valid) 3004 if (!s->nbctl_valid)
2632 return; 3005 return;
2633 3006
@@ -2663,20 +3036,50 @@ static const char *ecc_msg =
2663 3036
2664static bool ecc_enabled(struct pci_dev *F3, u16 nid) 3037static bool ecc_enabled(struct pci_dev *F3, u16 nid)
2665{ 3038{
2666 u32 value;
2667 u8 ecc_en = 0;
2668 bool nb_mce_en = false; 3039 bool nb_mce_en = false;
3040 u8 ecc_en = 0, i;
3041 u32 value;
2669 3042
2670 amd64_read_pci_cfg(F3, NBCFG, &value); 3043 if (boot_cpu_data.x86 >= 0x17) {
3044 u8 umc_en_mask = 0, ecc_en_mask = 0;
2671 3045
2672 ecc_en = !!(value & NBCFG_ECC_ENABLE); 3046 for (i = 0; i < NUM_UMCS; i++) {
2673 amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled")); 3047 u32 base = get_umc_base(i);
3048
3049 /* Only check enabled UMCs. */
3050 if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value))
3051 continue;
3052
3053 if (!(value & UMC_SDP_INIT))
3054 continue;
3055
3056 umc_en_mask |= BIT(i);
2674 3057
2675 nb_mce_en = nb_mce_bank_enabled_on_node(nid); 3058 if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value))
2676 if (!nb_mce_en) 3059 continue;
2677 amd64_notice("NB MCE bank disabled, set MSR " 3060
2678 "0x%08x[4] on node %d to enable.\n", 3061 if (value & UMC_ECC_ENABLED)
2679 MSR_IA32_MCG_CTL, nid); 3062 ecc_en_mask |= BIT(i);
3063 }
3064
3065 /* Check whether at least one UMC is enabled: */
3066 if (umc_en_mask)
3067 ecc_en = umc_en_mask == ecc_en_mask;
3068
3069 /* Assume UMC MCA banks are enabled. */
3070 nb_mce_en = true;
3071 } else {
3072 amd64_read_pci_cfg(F3, NBCFG, &value);
3073
3074 ecc_en = !!(value & NBCFG_ECC_ENABLE);
3075
3076 nb_mce_en = nb_mce_bank_enabled_on_node(nid);
3077 if (!nb_mce_en)
3078 amd64_notice("NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
3079 MSR_IA32_MCG_CTL, nid);
3080 }
3081
3082 amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2680 3083
2681 if (!ecc_en || !nb_mce_en) { 3084 if (!ecc_en || !nb_mce_en) {
2682 amd64_notice("%s", ecc_msg); 3085 amd64_notice("%s", ecc_msg);
@@ -2685,6 +3088,27 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
2685 return true; 3088 return true;
2686} 3089}
2687 3090
3091static inline void
3092f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
3093{
3094 u8 i, ecc_en = 1, cpk_en = 1;
3095
3096 for (i = 0; i < NUM_UMCS; i++) {
3097 if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
3098 ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
3099 cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
3100 }
3101 }
3102
3103 /* Set chipkill only if ECC is enabled: */
3104 if (ecc_en) {
3105 mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
3106
3107 if (cpk_en)
3108 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3109 }
3110}
3111
2688static void setup_mci_misc_attrs(struct mem_ctl_info *mci, 3112static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
2689 struct amd64_family_type *fam) 3113 struct amd64_family_type *fam)
2690{ 3114{
@@ -2693,17 +3117,21 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
2693 mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2; 3117 mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2694 mci->edac_ctl_cap = EDAC_FLAG_NONE; 3118 mci->edac_ctl_cap = EDAC_FLAG_NONE;
2695 3119
2696 if (pvt->nbcap & NBCAP_SECDED) 3120 if (pvt->umc) {
2697 mci->edac_ctl_cap |= EDAC_FLAG_SECDED; 3121 f17h_determine_edac_ctl_cap(mci, pvt);
3122 } else {
3123 if (pvt->nbcap & NBCAP_SECDED)
3124 mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2698 3125
2699 if (pvt->nbcap & NBCAP_CHIPKILL) 3126 if (pvt->nbcap & NBCAP_CHIPKILL)
2700 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; 3127 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3128 }
2701 3129
2702 mci->edac_cap = determine_edac_cap(pvt); 3130 mci->edac_cap = determine_edac_cap(pvt);
2703 mci->mod_name = EDAC_MOD_STR; 3131 mci->mod_name = EDAC_MOD_STR;
2704 mci->mod_ver = EDAC_AMD64_VERSION; 3132 mci->mod_ver = EDAC_AMD64_VERSION;
2705 mci->ctl_name = fam->ctl_name; 3133 mci->ctl_name = fam->ctl_name;
2706 mci->dev_name = pci_name(pvt->F2); 3134 mci->dev_name = pci_name(pvt->F3);
2707 mci->ctl_page_to_phys = NULL; 3135 mci->ctl_page_to_phys = NULL;
2708 3136
2709 /* memory scrubber interface */ 3137 /* memory scrubber interface */
@@ -2759,6 +3187,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
2759 pvt->ops = &family_types[F16_CPUS].ops; 3187 pvt->ops = &family_types[F16_CPUS].ops;
2760 break; 3188 break;
2761 3189
3190 case 0x17:
3191 fam_type = &family_types[F17_CPUS];
3192 pvt->ops = &family_types[F17_CPUS].ops;
3193 break;
3194
2762 default: 3195 default:
2763 amd64_err("Unsupported family!\n"); 3196 amd64_err("Unsupported family!\n");
2764 return NULL; 3197 return NULL;
@@ -2789,6 +3222,7 @@ static int init_one_instance(unsigned int nid)
2789 struct mem_ctl_info *mci = NULL; 3222 struct mem_ctl_info *mci = NULL;
2790 struct edac_mc_layer layers[2]; 3223 struct edac_mc_layer layers[2];
2791 struct amd64_pvt *pvt = NULL; 3224 struct amd64_pvt *pvt = NULL;
3225 u16 pci_id1, pci_id2;
2792 int err = 0, ret; 3226 int err = 0, ret;
2793 3227
2794 ret = -ENOMEM; 3228 ret = -ENOMEM;
@@ -2804,10 +3238,23 @@ static int init_one_instance(unsigned int nid)
2804 if (!fam_type) 3238 if (!fam_type)
2805 goto err_free; 3239 goto err_free;
2806 3240
2807 ret = -ENODEV; 3241 if (pvt->fam >= 0x17) {
2808 err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id); 3242 pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
3243 if (!pvt->umc) {
3244 ret = -ENOMEM;
3245 goto err_free;
3246 }
3247
3248 pci_id1 = fam_type->f0_id;
3249 pci_id2 = fam_type->f6_id;
3250 } else {
3251 pci_id1 = fam_type->f1_id;
3252 pci_id2 = fam_type->f2_id;
3253 }
3254
3255 err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
2809 if (err) 3256 if (err)
2810 goto err_free; 3257 goto err_post_init;
2811 3258
2812 read_mc_regs(pvt); 3259 read_mc_regs(pvt);
2813 3260
@@ -2857,7 +3304,10 @@ static int init_one_instance(unsigned int nid)
2857 if (report_gart_errors) 3304 if (report_gart_errors)
2858 amd_report_gart_errors(true); 3305 amd_report_gart_errors(true);
2859 3306
2860 amd_register_ecc_decoder(decode_bus_error); 3307 if (pvt->umc)
3308 amd_register_ecc_decoder(decode_umc_error);
3309 else
3310 amd_register_ecc_decoder(decode_bus_error);
2861 3311
2862 return 0; 3312 return 0;
2863 3313
@@ -2867,6 +3317,10 @@ err_add_mc:
2867err_siblings: 3317err_siblings:
2868 free_mc_sibling_devs(pvt); 3318 free_mc_sibling_devs(pvt);
2869 3319
3320err_post_init:
3321 if (pvt->fam >= 0x17)
3322 kfree(pvt->umc);
3323
2870err_free: 3324err_free:
2871 kfree(pvt); 3325 kfree(pvt);
2872 3326
@@ -2893,7 +3347,11 @@ static int probe_one_instance(unsigned int nid)
2893 if (!ecc_enable_override) 3347 if (!ecc_enable_override)
2894 goto err_enable; 3348 goto err_enable;
2895 3349
2896 amd64_warn("Forcing ECC on!\n"); 3350 if (boot_cpu_data.x86 >= 0x17) {
3351 amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
3352 goto err_enable;
3353 } else
3354 amd64_warn("Forcing ECC on!\n");
2897 3355
2898 if (!enable_ecc_error_reporting(s, nid, F3)) 3356 if (!enable_ecc_error_reporting(s, nid, F3))
2899 goto err_enable; 3357 goto err_enable;
@@ -2902,7 +3360,9 @@ static int probe_one_instance(unsigned int nid)
2902 ret = init_one_instance(nid); 3360 ret = init_one_instance(nid);
2903 if (ret < 0) { 3361 if (ret < 0) {
2904 amd64_err("Error probing instance: %d\n", nid); 3362 amd64_err("Error probing instance: %d\n", nid);
2905 restore_ecc_error_reporting(s, nid, F3); 3363
3364 if (boot_cpu_data.x86 < 0x17)
3365 restore_ecc_error_reporting(s, nid, F3);
2906 } 3366 }
2907 3367
2908 return ret; 3368 return ret;
@@ -2938,7 +3398,11 @@ static void remove_one_instance(unsigned int nid)
2938 3398
2939 /* unregister from EDAC MCE */ 3399 /* unregister from EDAC MCE */
2940 amd_report_gart_errors(false); 3400 amd_report_gart_errors(false);
2941 amd_unregister_ecc_decoder(decode_bus_error); 3401
3402 if (pvt->umc)
3403 amd_unregister_ecc_decoder(decode_umc_error);
3404 else
3405 amd_unregister_ecc_decoder(decode_bus_error);
2942 3406
2943 kfree(ecc_stngs[nid]); 3407 kfree(ecc_stngs[nid]);
2944 ecc_stngs[nid] = NULL; 3408 ecc_stngs[nid] = NULL;
@@ -2963,7 +3427,10 @@ static void setup_pci_device(void)
2963 return; 3427 return;
2964 3428
2965 pvt = mci->pvt_info; 3429 pvt = mci->pvt_info;
2966 pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); 3430 if (pvt->umc)
3431 pci_ctl = edac_pci_create_generic_ctl(&pvt->F0->dev, EDAC_MOD_STR);
3432 else
3433 pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2967 if (!pci_ctl) { 3434 if (!pci_ctl) {
2968 pr_warn("%s(): Unable to create PCI control\n", __func__); 3435 pr_warn("%s(): Unable to create PCI control\n", __func__);
2969 pr_warn("%s(): PCI error report via EDAC not set\n", __func__); 3436 pr_warn("%s(): PCI error report via EDAC not set\n", __func__);
@@ -2975,6 +3442,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
2975 { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3442 { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
2976 { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3443 { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
2977 { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, 3444 { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3445 { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
2978 { } 3446 { }
2979}; 3447};
2980MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); 3448MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index c08870479054..f14c24d5b140 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -30,10 +30,10 @@
30 edac_printk(KERN_NOTICE, "amd64", fmt, ##arg) 30 edac_printk(KERN_NOTICE, "amd64", fmt, ##arg)
31 31
32#define amd64_warn(fmt, arg...) \ 32#define amd64_warn(fmt, arg...) \
33 edac_printk(KERN_WARNING, "amd64", fmt, ##arg) 33 edac_printk(KERN_WARNING, "amd64", "Warning: " fmt, ##arg)
34 34
35#define amd64_err(fmt, arg...) \ 35#define amd64_err(fmt, arg...) \
36 edac_printk(KERN_ERR, "amd64", fmt, ##arg) 36 edac_printk(KERN_ERR, "amd64", "Error: " fmt, ##arg)
37 37
38#define amd64_mc_warn(mci, fmt, arg...) \ 38#define amd64_mc_warn(mci, fmt, arg...) \
39 edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg) 39 edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg)
@@ -118,6 +118,8 @@
118#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 118#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
119#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 119#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581
120#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 120#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582
121#define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460
122#define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466
121 123
122/* 124/*
123 * Function 1 - Address Map 125 * Function 1 - Address Map
@@ -202,6 +204,8 @@
202#define DCT_SEL_HI 0x114 204#define DCT_SEL_HI 0x114
203 205
204#define F15H_M60H_SCRCTRL 0x1C8 206#define F15H_M60H_SCRCTRL 0x1C8
207#define F17H_SCR_BASE_ADDR 0x48
208#define F17H_SCR_LIMIT_ADDR 0x4C
205 209
206/* 210/*
207 * Function 3 - Misc Control 211 * Function 3 - Misc Control
@@ -248,6 +252,31 @@
248/* MSRs */ 252/* MSRs */
249#define MSR_MCGCTL_NBE BIT(4) 253#define MSR_MCGCTL_NBE BIT(4)
250 254
255/* F17h */
256
257/* F0: */
258#define DF_DHAR 0x104
259
260/* UMC CH register offsets */
261#define UMCCH_BASE_ADDR 0x0
262#define UMCCH_ADDR_MASK 0x20
263#define UMCCH_ADDR_CFG 0x30
264#define UMCCH_DIMM_CFG 0x80
265#define UMCCH_UMC_CFG 0x100
266#define UMCCH_SDP_CTRL 0x104
267#define UMCCH_ECC_CTRL 0x14C
268#define UMCCH_ECC_BAD_SYMBOL 0xD90
269#define UMCCH_UMC_CAP 0xDF0
270#define UMCCH_UMC_CAP_HI 0xDF4
271
272/* UMC CH bitfields */
273#define UMC_ECC_CHIPKILL_CAP BIT(31)
274#define UMC_ECC_ENABLED BIT(30)
275
276#define UMC_SDP_INIT BIT(31)
277
278#define NUM_UMCS 2
279
251enum amd_families { 280enum amd_families {
252 K8_CPUS = 0, 281 K8_CPUS = 0,
253 F10_CPUS, 282 F10_CPUS,
@@ -256,6 +285,7 @@ enum amd_families {
256 F15_M60H_CPUS, 285 F15_M60H_CPUS,
257 F16_CPUS, 286 F16_CPUS,
258 F16_M30H_CPUS, 287 F16_M30H_CPUS,
288 F17_CPUS,
259 NUM_FAMILIES, 289 NUM_FAMILIES,
260}; 290};
261 291
@@ -288,11 +318,19 @@ struct chip_select {
288 u8 m_cnt; 318 u8 m_cnt;
289}; 319};
290 320
321struct amd64_umc {
322 u32 dimm_cfg; /* DIMM Configuration reg */
323 u32 umc_cfg; /* Configuration reg */
324 u32 sdp_ctrl; /* SDP Control reg */
325 u32 ecc_ctrl; /* DRAM ECC Control reg */
326 u32 umc_cap_hi; /* Capabilities High reg */
327};
328
291struct amd64_pvt { 329struct amd64_pvt {
292 struct low_ops *ops; 330 struct low_ops *ops;
293 331
294 /* pci_device handles which we utilize */ 332 /* pci_device handles which we utilize */
295 struct pci_dev *F1, *F2, *F3; 333 struct pci_dev *F0, *F1, *F2, *F3, *F6;
296 334
297 u16 mc_node_id; /* MC index of this MC node */ 335 u16 mc_node_id; /* MC index of this MC node */
298 u8 fam; /* CPU family */ 336 u8 fam; /* CPU family */
@@ -335,6 +373,8 @@ struct amd64_pvt {
335 373
336 /* cache the dram_type */ 374 /* cache the dram_type */
337 enum mem_type dram_type; 375 enum mem_type dram_type;
376
377 struct amd64_umc *umc; /* UMC registers */
338}; 378};
339 379
340enum err_codes { 380enum err_codes {
@@ -342,6 +382,8 @@ enum err_codes {
342 ERR_NODE = -1, 382 ERR_NODE = -1,
343 ERR_CSROW = -2, 383 ERR_CSROW = -2,
344 ERR_CHANNEL = -3, 384 ERR_CHANNEL = -3,
385 ERR_SYND = -4,
386 ERR_NORM_ADDR = -5,
345}; 387};
346 388
347struct err_info { 389struct err_info {
@@ -354,6 +396,12 @@ struct err_info {
354 u32 offset; 396 u32 offset;
355}; 397};
356 398
399static inline u32 get_umc_base(u8 channel)
400{
401 /* ch0: 0x50000, ch1: 0x150000 */
402 return 0x50000 + (!!channel << 20);
403}
404
357static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) 405static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
358{ 406{
359 u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; 407 u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
@@ -422,7 +470,7 @@ struct low_ops {
422 470
423struct amd64_family_type { 471struct amd64_family_type {
424 const char *ctl_name; 472 const char *ctl_name;
425 u16 f1_id, f2_id; 473 u16 f0_id, f1_id, f2_id, f6_id;
426 struct low_ops ops; 474 struct low_ops ops;
427}; 475};
428 476
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index c3ee3ad98a63..d2ea9c4f1824 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -482,15 +482,8 @@ void edac_mc_free(struct mem_ctl_info *mci)
482} 482}
483EXPORT_SYMBOL_GPL(edac_mc_free); 483EXPORT_SYMBOL_GPL(edac_mc_free);
484 484
485 485/* Caller must hold mem_ctls_mutex */
486/** 486static struct mem_ctl_info *__find_mci_by_dev(struct device *dev)
487 * find_mci_by_dev
488 *
489 * scan list of controllers looking for the one that manages
490 * the 'dev' device
491 * @dev: pointer to a struct device related with the MCI
492 */
493struct mem_ctl_info *find_mci_by_dev(struct device *dev)
494{ 487{
495 struct mem_ctl_info *mci; 488 struct mem_ctl_info *mci;
496 struct list_head *item; 489 struct list_head *item;
@@ -506,6 +499,24 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
506 499
507 return NULL; 500 return NULL;
508} 501}
502
503/**
504 * find_mci_by_dev
505 *
506 * scan list of controllers looking for the one that manages
507 * the 'dev' device
508 * @dev: pointer to a struct device related with the MCI
509 */
510struct mem_ctl_info *find_mci_by_dev(struct device *dev)
511{
512 struct mem_ctl_info *ret;
513
514 mutex_lock(&mem_ctls_mutex);
515 ret = __find_mci_by_dev(dev);
516 mutex_unlock(&mem_ctls_mutex);
517
518 return ret;
519}
509EXPORT_SYMBOL_GPL(find_mci_by_dev); 520EXPORT_SYMBOL_GPL(find_mci_by_dev);
510 521
511/* 522/*
@@ -588,7 +599,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
588 599
589 insert_before = &mc_devices; 600 insert_before = &mc_devices;
590 601
591 p = find_mci_by_dev(mci->pdev); 602 p = __find_mci_by_dev(mci->pdev);
592 if (unlikely(p != NULL)) 603 if (unlikely(p != NULL))
593 goto fail0; 604 goto fail0;
594 605
@@ -640,26 +651,28 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci)
640 * 651 *
641 * If found, return a pointer to the structure. 652 * If found, return a pointer to the structure.
642 * Else return NULL. 653 * Else return NULL.
643 *
644 * Caller must hold mem_ctls_mutex.
645 */ 654 */
646struct mem_ctl_info *edac_mc_find(int idx) 655struct mem_ctl_info *edac_mc_find(int idx)
647{ 656{
657 struct mem_ctl_info *mci = NULL;
648 struct list_head *item; 658 struct list_head *item;
649 struct mem_ctl_info *mci; 659
660 mutex_lock(&mem_ctls_mutex);
650 661
651 list_for_each(item, &mc_devices) { 662 list_for_each(item, &mc_devices) {
652 mci = list_entry(item, struct mem_ctl_info, link); 663 mci = list_entry(item, struct mem_ctl_info, link);
653 664
654 if (mci->mc_idx >= idx) { 665 if (mci->mc_idx >= idx) {
655 if (mci->mc_idx == idx) 666 if (mci->mc_idx == idx) {
656 return mci; 667 goto unlock;
657 668 }
658 break; 669 break;
659 } 670 }
660 } 671 }
661 672
662 return NULL; 673unlock:
674 mutex_unlock(&mem_ctls_mutex);
675 return mci;
663} 676}
664EXPORT_SYMBOL(edac_mc_find); 677EXPORT_SYMBOL(edac_mc_find);
665 678
@@ -779,7 +792,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
779 mutex_lock(&mem_ctls_mutex); 792 mutex_lock(&mem_ctls_mutex);
780 793
781 /* find the requested mci struct in the global list */ 794 /* find the requested mci struct in the global list */
782 mci = find_mci_by_dev(dev); 795 mci = __find_mci_by_dev(dev);
783 if (mci == NULL) { 796 if (mci == NULL) {
784 mutex_unlock(&mem_ctls_mutex); 797 mutex_unlock(&mem_ctls_mutex);
785 return NULL; 798 return NULL;
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 80762acd8cc8..34208f38c5b1 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -8,7 +8,7 @@ static struct amd_decoder_ops *fam_ops;
8static u8 xec_mask = 0xf; 8static u8 xec_mask = 0xf;
9 9
10static bool report_gart_errors; 10static bool report_gart_errors;
11static void (*nb_bus_decoder)(int node_id, struct mce *m); 11static void (*decode_dram_ecc)(int node_id, struct mce *m);
12 12
13void amd_report_gart_errors(bool v) 13void amd_report_gart_errors(bool v)
14{ 14{
@@ -18,16 +18,16 @@ EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18 18
19void amd_register_ecc_decoder(void (*f)(int, struct mce *)) 19void amd_register_ecc_decoder(void (*f)(int, struct mce *))
20{ 20{
21 nb_bus_decoder = f; 21 decode_dram_ecc = f;
22} 22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); 23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24 24
25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)) 25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
26{ 26{
27 if (nb_bus_decoder) { 27 if (decode_dram_ecc) {
28 WARN_ON(nb_bus_decoder != f); 28 WARN_ON(decode_dram_ecc != f);
29 29
30 nb_bus_decoder = NULL; 30 decode_dram_ecc = NULL;
31 } 31 }
32} 32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); 33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
@@ -763,8 +763,8 @@ static void decode_mc4_mce(struct mce *m)
763 763
764 pr_cont("%s.\n", mc4_mce_desc[xec]); 764 pr_cont("%s.\n", mc4_mce_desc[xec]);
765 765
766 if (nb_bus_decoder) 766 if (decode_dram_ecc)
767 nb_bus_decoder(node_id, m); 767 decode_dram_ecc(node_id, m);
768 return; 768 return;
769 } 769 }
770 break; 770 break;
@@ -877,6 +877,13 @@ static void decode_smca_errors(struct mce *m)
877 pr_emerg(HW_ERR "%s Error: ", ip_name); 877 pr_emerg(HW_ERR "%s Error: ", ip_name);
878 pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); 878 pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
879 } 879 }
880
881 /*
882 * amd_get_nb_id() returns the last level cache id.
883 * The last level cache on Fam17h is 1 level below the node.
884 */
885 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
886 decode_dram_ecc(amd_get_nb_id(m->extcpu) >> 1, m);
880} 887}
881 888
882static inline void amd_decode_err_code(u16 ec) 889static inline void amd_decode_err_code(u16 ec)
@@ -957,10 +964,13 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
957 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), 964 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
958 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); 965 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
959 966
960 if (c->x86 >= 0x15) 967 if (c->x86 >= 0x15) {
961 pr_cont("|%s|%s", 968 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
962 ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), 969
963 ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); 970 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
971 if (c->x86 != 0x15 || m->bank != 4)
972 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
973 }
964 974
965 if (boot_cpu_has(X86_FEATURE_SMCA)) { 975 if (boot_cpu_has(X86_FEATURE_SMCA)) {
966 u32 low, high; 976 u32 low, high;
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index ff0567526ee3..c62602141f95 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -300,6 +300,22 @@ err:
300 return res; 300 return res;
301} 301}
302 302
303static int mpc85xx_pci_err_remove(struct platform_device *op)
304{
305 struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
306 struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
307
308 edac_dbg(0, "\n");
309
310 out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, orig_pci_err_cap_dr);
311 out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, orig_pci_err_en);
312
313 edac_pci_del_device(&op->dev);
314 edac_pci_free_ctl_info(pci);
315
316 return 0;
317}
318
303static const struct platform_device_id mpc85xx_pci_err_match[] = { 319static const struct platform_device_id mpc85xx_pci_err_match[] = {
304 { 320 {
305 .name = "mpc85xx-pci-edac" 321 .name = "mpc85xx-pci-edac"
@@ -309,6 +325,7 @@ static const struct platform_device_id mpc85xx_pci_err_match[] = {
309 325
310static struct platform_driver mpc85xx_pci_err_driver = { 326static struct platform_driver mpc85xx_pci_err_driver = {
311 .probe = mpc85xx_pci_err_probe, 327 .probe = mpc85xx_pci_err_probe,
328 .remove = mpc85xx_pci_err_remove,
312 .id_table = mpc85xx_pci_err_match, 329 .id_table = mpc85xx_pci_err_match,
313 .driver = { 330 .driver = {
314 .name = "mpc85xx_pci_err", 331 .name = "mpc85xx_pci_err",
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 54775221a01f..c1ad0eb7d5dd 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -23,6 +23,7 @@
23#include <linux/math64.h> 23#include <linux/math64.h>
24#include <linux/mod_devicetable.h> 24#include <linux/mod_devicetable.h>
25#include <asm/cpu_device_id.h> 25#include <asm/cpu_device_id.h>
26#include <asm/intel-family.h>
26#include <asm/processor.h> 27#include <asm/processor.h>
27#include <asm/mce.h> 28#include <asm/mce.h>
28 29
@@ -3365,12 +3366,13 @@ fail0:
3365 { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } 3366 { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
3366 3367
3367static const struct x86_cpu_id sbridge_cpuids[] = { 3368static const struct x86_cpu_id sbridge_cpuids[] = {
3368 ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ 3369 ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table),
3369 ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ 3370 ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table),
3370 ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ 3371 ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table),
3371 ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ 3372 ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table),
3372 ICPU(0x56, pci_dev_descr_broadwell_table), /* BROADWELL-DE */ 3373 ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table),
3373 ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ 3374 ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table),
3375 ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table),
3374 { } 3376 { }
3375}; 3377};
3376MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); 3378MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
index 0ff4878c2aa1..9edcb29b3001 100644
--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -25,6 +25,7 @@
25#include <linux/math64.h> 25#include <linux/math64.h>
26#include <linux/mod_devicetable.h> 26#include <linux/mod_devicetable.h>
27#include <asm/cpu_device_id.h> 27#include <asm/cpu_device_id.h>
28#include <asm/intel-family.h>
28#include <asm/processor.h> 29#include <asm/processor.h>
29#include <asm/mce.h> 30#include <asm/mce.h>
30 31
@@ -262,8 +263,8 @@ fail:
262 return -ENODEV; 263 return -ENODEV;
263} 264}
264 265
265const struct x86_cpu_id skx_cpuids[] = { 266static const struct x86_cpu_id skx_cpuids[] = {
266 { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */ 267 { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 },
267 { } 268 { }
268}; 269};
269MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); 270MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
@@ -1036,7 +1037,7 @@ static void skx_remove(void)
1036 * search for all the devices we need 1037 * search for all the devices we need
1037 * check which DIMMs are present. 1038 * check which DIMMs are present.
1038 */ 1039 */
1039int __init skx_init(void) 1040static int __init skx_init(void)
1040{ 1041{
1041 const struct x86_cpu_id *id; 1042 const struct x86_cpu_id *id;
1042 const struct munit *m; 1043 const struct munit *m;
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index bf19b6e3bd12..5569391ea800 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -1602,16 +1602,16 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1602 dev_err(edac_dev->dev, "IOB PA read data RAM error\n"); 1602 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1603 if (reg & IOBPA_M_RDATA_CORRUPT_MASK) 1603 if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1604 dev_err(edac_dev->dev, 1604 dev_err(edac_dev->dev,
1605 "Mutilple IOB PA read data RAM error\n"); 1605 "Multiple IOB PA read data RAM error\n");
1606 if (reg & IOBPA_WDATA_CORRUPT_MASK) 1606 if (reg & IOBPA_WDATA_CORRUPT_MASK)
1607 dev_err(edac_dev->dev, "IOB PA write data RAM error\n"); 1607 dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1608 if (reg & IOBPA_M_WDATA_CORRUPT_MASK) 1608 if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1609 dev_err(edac_dev->dev, 1609 dev_err(edac_dev->dev,
1610 "Mutilple IOB PA write data RAM error\n"); 1610 "Multiple IOB PA write data RAM error\n");
1611 if (reg & IOBPA_TRANS_CORRUPT_MASK) 1611 if (reg & IOBPA_TRANS_CORRUPT_MASK)
1612 dev_err(edac_dev->dev, "IOB PA transaction error\n"); 1612 dev_err(edac_dev->dev, "IOB PA transaction error\n");
1613 if (reg & IOBPA_M_TRANS_CORRUPT_MASK) 1613 if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1614 dev_err(edac_dev->dev, "Mutilple IOB PA transaction error\n"); 1614 dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1615 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK) 1615 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1616 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n"); 1616 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1617 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK) 1617 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 9e0d78966552..cb56dcba68c6 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -134,6 +134,7 @@ enum dev_type {
134enum hw_event_mc_err_type { 134enum hw_event_mc_err_type {
135 HW_EVENT_ERR_CORRECTED, 135 HW_EVENT_ERR_CORRECTED,
136 HW_EVENT_ERR_UNCORRECTED, 136 HW_EVENT_ERR_UNCORRECTED,
137 HW_EVENT_ERR_DEFERRED,
137 HW_EVENT_ERR_FATAL, 138 HW_EVENT_ERR_FATAL,
138 HW_EVENT_ERR_INFO, 139 HW_EVENT_ERR_INFO,
139}; 140};
@@ -145,6 +146,8 @@ static inline char *mc_event_error_type(const unsigned int err_type)
145 return "Corrected"; 146 return "Corrected";
146 case HW_EVENT_ERR_UNCORRECTED: 147 case HW_EVENT_ERR_UNCORRECTED:
147 return "Uncorrected"; 148 return "Uncorrected";
149 case HW_EVENT_ERR_DEFERRED:
150 return "Deferred";
148 case HW_EVENT_ERR_FATAL: 151 case HW_EVENT_ERR_FATAL:
149 return "Fatal"; 152 return "Fatal";
150 default: 153 default:
@@ -192,10 +195,11 @@ static inline char *mc_event_error_type(const unsigned int err_type)
192 * @MEM_DDR3: DDR3 RAM 195 * @MEM_DDR3: DDR3 RAM
193 * @MEM_RDDR3: Registered DDR3 RAM 196 * @MEM_RDDR3: Registered DDR3 RAM
194 * This is a variant of the DDR3 memories. 197 * This is a variant of the DDR3 memories.
195 * @MEM_LRDDR3 Load-Reduced DDR3 memory. 198 * @MEM_LRDDR3: Load-Reduced DDR3 memory.
196 * @MEM_DDR4: Unbuffered DDR4 RAM 199 * @MEM_DDR4: Unbuffered DDR4 RAM
197 * @MEM_RDDR4: Registered DDR4 RAM 200 * @MEM_RDDR4: Registered DDR4 RAM
198 * This is a variant of the DDR4 memories. 201 * This is a variant of the DDR4 memories.
202 * @MEM_LRDDR4: Load-Reduced DDR4 memory.
199 */ 203 */
200enum mem_type { 204enum mem_type {
201 MEM_EMPTY = 0, 205 MEM_EMPTY = 0,
@@ -218,6 +222,7 @@ enum mem_type {
218 MEM_LRDDR3, 222 MEM_LRDDR3,
219 MEM_DDR4, 223 MEM_DDR4,
220 MEM_RDDR4, 224 MEM_RDDR4,
225 MEM_LRDDR4,
221}; 226};
222 227
223#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) 228#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
@@ -239,6 +244,7 @@ enum mem_type {
239#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) 244#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
240#define MEM_FLAG_DDR4 BIT(MEM_DDR4) 245#define MEM_FLAG_DDR4 BIT(MEM_DDR4)
241#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) 246#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
247#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
242 248
243/** 249/**
244 * enum edac-type - Error Detection and Correction capabilities and mode 250 * enum edac-type - Error Detection and Correction capabilities and mode