aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-17 12:55:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-17 12:55:52 -0400
commitde55a8958f6e3ef5ce5f0971b80bd44bfcac7cf1 (patch)
tree6a29f442764bb13dbd6c2abe60e70f9f2cb1961a
parent79b520e87e1214cfa107bdc8528b5d6c055a8b82 (diff)
parent06724535f8fa26e78238bf8adfc9c81650a665f7 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: amd64_edac: check NB MCE bank enable on the current node properly amd64_edac: Rewrite unganged mode code of f10_early_channel_count amd64_edac: cleanup amd64_check_ecc_enabled x86, EDAC: Provide function to return NodeId of a CPU amd64_edac: build driver only on AMD hardware
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/cpu/amd.c10
-rw-r--r--drivers/edac/Kconfig2
-rw-r--r--drivers/edac/amd64_edac.c175
-rw-r--r--drivers/edac/edac_mce_amd.c2
5 files changed, 88 insertions, 103 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e08ea043e085..42a3f936dadc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -1020,4 +1020,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
1020extern int get_tsc_mode(unsigned long adr); 1020extern int get_tsc_mode(unsigned long adr);
1021extern int set_tsc_mode(unsigned int val); 1021extern int set_tsc_mode(unsigned int val);
1022 1022
1023extern int amd_get_nb_id(int cpu);
1024
1023#endif /* _ASM_X86_PROCESSOR_H */ 1025#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22a47c82f3c0..f32fa71ccf97 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
333#endif 333#endif
334} 334}
335 335
336int amd_get_nb_id(int cpu)
337{
338 int id = 0;
339#ifdef CONFIG_SMP
340 id = per_cpu(cpu_llc_id, cpu);
341#endif
342 return id;
343}
344EXPORT_SYMBOL_GPL(amd_get_nb_id);
345
336static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 346static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
337{ 347{
338#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 348#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 4339b1a879cd..a3ca18e2d7cf 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -59,7 +59,7 @@ config EDAC_MM_EDAC
59 59
60config EDAC_AMD64 60config EDAC_AMD64
61 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" 61 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
62 depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI 62 depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD
63 help 63 help
64 Support for error detection and correction on the AMD 64 64 Support for error detection and correction on the AMD 64
65 Families of Memory Controllers (K8, F10h and F11h) 65 Families of Memory Controllers (K8, F10h and F11h)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 173dc4a84166..4e551e63b6dc 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1255,7 +1255,9 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
1255 */ 1255 */
1256static int f10_early_channel_count(struct amd64_pvt *pvt) 1256static int f10_early_channel_count(struct amd64_pvt *pvt)
1257{ 1257{
1258 int dbams[] = { DBAM0, DBAM1 };
1258 int err = 0, channels = 0; 1259 int err = 0, channels = 0;
1260 int i, j;
1259 u32 dbam; 1261 u32 dbam;
1260 1262
1261 err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); 1263 err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
@@ -1288,46 +1290,19 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
1288 * is more than just one DIMM present in unganged mode. Need to check 1290 * is more than just one DIMM present in unganged mode. Need to check
1289 * both controllers since DIMMs can be placed in either one. 1291 * both controllers since DIMMs can be placed in either one.
1290 */ 1292 */
1291 channels = 0; 1293 for (i = 0; i < ARRAY_SIZE(dbams); i++) {
1292 err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam); 1294 err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam);
1293 if (err)
1294 goto err_reg;
1295
1296 if (DBAM_DIMM(0, dbam) > 0)
1297 channels++;
1298 if (DBAM_DIMM(1, dbam) > 0)
1299 channels++;
1300 if (DBAM_DIMM(2, dbam) > 0)
1301 channels++;
1302 if (DBAM_DIMM(3, dbam) > 0)
1303 channels++;
1304
1305 /* If more than 2 DIMMs are present, then we have 2 channels */
1306 if (channels > 2)
1307 channels = 2;
1308 else if (channels == 0) {
1309 /* No DIMMs on DCT0, so look at DCT1 */
1310 err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam);
1311 if (err) 1295 if (err)
1312 goto err_reg; 1296 goto err_reg;
1313 1297
1314 if (DBAM_DIMM(0, dbam) > 0) 1298 for (j = 0; j < 4; j++) {
1315 channels++; 1299 if (DBAM_DIMM(j, dbam) > 0) {
1316 if (DBAM_DIMM(1, dbam) > 0) 1300 channels++;
1317 channels++; 1301 break;
1318 if (DBAM_DIMM(2, dbam) > 0) 1302 }
1319 channels++; 1303 }
1320 if (DBAM_DIMM(3, dbam) > 0)
1321 channels++;
1322
1323 if (channels > 2)
1324 channels = 2;
1325 } 1304 }
1326 1305
1327 /* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
1328 if (channels == 0)
1329 channels = 1;
1330
1331 debugf0("MCT channel count: %d\n", channels); 1306 debugf0("MCT channel count: %d\n", channels);
1332 1307
1333 return channels; 1308 return channels;
@@ -2766,30 +2741,53 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
2766 wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); 2741 wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2767} 2742}
2768 2743
2769static void check_mcg_ctl(void *ret) 2744/* get all cores on this DCT */
2745static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
2770{ 2746{
2771 u64 msr_val = 0; 2747 int cpu;
2772 u8 nbe;
2773
2774 rdmsrl(MSR_IA32_MCG_CTL, msr_val);
2775 nbe = msr_val & K8_MSR_MCGCTL_NBE;
2776
2777 debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2778 raw_smp_processor_id(), msr_val,
2779 (nbe ? "enabled" : "disabled"));
2780 2748
2781 if (!nbe) 2749 for_each_online_cpu(cpu)
2782 *(int *)ret = 0; 2750 if (amd_get_nb_id(cpu) == nid)
2751 cpumask_set_cpu(cpu, mask);
2783} 2752}
2784 2753
2785/* check MCG_CTL on all the cpus on this node */ 2754/* check MCG_CTL on all the cpus on this node */
2786static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask) 2755static bool amd64_nb_mce_bank_enabled_on_node(int nid)
2787{ 2756{
2788 int ret = 1; 2757 cpumask_t mask;
2789 preempt_disable(); 2758 struct msr *msrs;
2790 smp_call_function_many(mask, check_mcg_ctl, &ret, 1); 2759 int cpu, nbe, idx = 0;
2791 preempt_enable(); 2760 bool ret = false;
2792 2761
2762 cpumask_clear(&mask);
2763
2764 get_cpus_on_this_dct_cpumask(&mask, nid);
2765
2766 msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
2767 if (!msrs) {
2768 amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
2769 __func__);
2770 return false;
2771 }
2772
2773 rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
2774
2775 for_each_cpu(cpu, &mask) {
2776 nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
2777
2778 debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2779 cpu, msrs[idx].q,
2780 (nbe ? "enabled" : "disabled"));
2781
2782 if (!nbe)
2783 goto out;
2784
2785 idx++;
2786 }
2787 ret = true;
2788
2789out:
2790 kfree(msrs);
2793 return ret; 2791 return ret;
2794} 2792}
2795 2793
@@ -2799,71 +2797,46 @@ static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
2799 * the memory system completely. A command line option allows to force-enable 2797 * the memory system completely. A command line option allows to force-enable
2800 * hardware ECC later in amd64_enable_ecc_error_reporting(). 2798 * hardware ECC later in amd64_enable_ecc_error_reporting().
2801 */ 2799 */
2800static const char *ecc_warning =
2801 "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
2802 " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
2803 " Also, use of the override can cause unknown side effects.\n";
2804
2802static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) 2805static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
2803{ 2806{
2804 u32 value; 2807 u32 value;
2805 int err = 0, ret = 0; 2808 int err = 0;
2806 u8 ecc_enabled = 0; 2809 u8 ecc_enabled = 0;
2810 bool nb_mce_en = false;
2807 2811
2808 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); 2812 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2809 if (err) 2813 if (err)
2810 debugf0("Reading K8_NBCTL failed\n"); 2814 debugf0("Reading K8_NBCTL failed\n");
2811 2815
2812 ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); 2816 ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
2817 if (!ecc_enabled)
2818 amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
2819 "is currently disabled, set F3x%x[22] (%s).\n",
2820 K8_NBCFG, pci_name(pvt->misc_f3_ctl));
2821 else
2822 amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
2813 2823
2814 ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id)); 2824 nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
2815 2825 if (!nb_mce_en)
2816 debugf0("K8_NBCFG=0x%x, DRAM ECC is %s\n", value, 2826 amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
2817 (value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled")); 2827 "0x%08x[4] on node %d to enable.\n",
2818 2828 MSR_IA32_MCG_CTL, pvt->mc_node_id);
2819 if (!ecc_enabled || !ret) {
2820 if (!ecc_enabled) {
2821 amd64_printk(KERN_WARNING, "This node reports that "
2822 "Memory ECC is currently "
2823 "disabled.\n");
2824 2829
2825 amd64_printk(KERN_WARNING, "bit 0x%lx in register " 2830 if (!ecc_enabled || !nb_mce_en) {
2826 "F3x%x of the MISC_CONTROL device (%s) "
2827 "should be enabled\n", K8_NBCFG_ECC_ENABLE,
2828 K8_NBCFG, pci_name(pvt->misc_f3_ctl));
2829 }
2830 if (!ret) {
2831 amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
2832 "of node %d should be enabled\n",
2833 K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
2834 pvt->mc_node_id);
2835 }
2836 if (!ecc_enable_override) { 2831 if (!ecc_enable_override) {
2837 amd64_printk(KERN_WARNING, "WARNING: ECC is NOT " 2832 amd64_printk(KERN_WARNING, "%s", ecc_warning);
2838 "currently enabled by the BIOS. Module " 2833 return -ENODEV;
2839 "will NOT be loaded.\n" 2834 }
2840 " Either Enable ECC in the BIOS, " 2835 } else
2841 "or use the 'ecc_enable_override' "
2842 "parameter.\n"
2843 " Might be a BIOS bug, if BIOS says "
2844 "ECC is enabled\n"
2845 " Use of the override can cause "
2846 "unknown side effects.\n");
2847 ret = -ENODEV;
2848 } else
2849 /*
2850 * enable further driver loading if ECC enable is
2851 * overridden.
2852 */
2853 ret = 0;
2854 } else {
2855 amd64_printk(KERN_INFO,
2856 "ECC is enabled by BIOS, Proceeding "
2857 "with EDAC module initialization\n");
2858
2859 /* Signal good ECC status */
2860 ret = 0;
2861
2862 /* CLEAR the override, since BIOS controlled it */ 2836 /* CLEAR the override, since BIOS controlled it */
2863 ecc_enable_override = 0; 2837 ecc_enable_override = 0;
2864 }
2865 2838
2866 return ret; 2839 return 0;
2867} 2840}
2868 2841
2869struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + 2842struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index c8ca7136dacc..0c21c370c9dd 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -405,7 +405,7 @@ void decode_mce(struct mce *m)
405 regs.nbsh = (u32)(m->status >> 32); 405 regs.nbsh = (u32)(m->status >> 32);
406 regs.nbeal = (u32) m->addr; 406 regs.nbeal = (u32) m->addr;
407 regs.nbeah = (u32)(m->addr >> 32); 407 regs.nbeah = (u32)(m->addr >> 32);
408 node = per_cpu(cpu_llc_id, m->extcpu); 408 node = amd_get_nb_id(m->extcpu);
409 409
410 amd_decode_nb_mce(node, &regs, 1); 410 amd_decode_nb_mce(node, &regs, 1);
411 break; 411 break;