diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-17 12:55:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-17 12:55:52 -0400 |
commit | de55a8958f6e3ef5ce5f0971b80bd44bfcac7cf1 (patch) | |
tree | 6a29f442764bb13dbd6c2abe60e70f9f2cb1961a | |
parent | 79b520e87e1214cfa107bdc8528b5d6c055a8b82 (diff) | |
parent | 06724535f8fa26e78238bf8adfc9c81650a665f7 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
amd64_edac: check NB MCE bank enable on the current node properly
amd64_edac: Rewrite unganged mode code of f10_early_channel_count
amd64_edac: cleanup amd64_check_ecc_enabled
x86, EDAC: Provide function to return NodeId of a CPU
amd64_edac: build driver only on AMD hardware
-rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 10 | ||||
-rw-r--r-- | drivers/edac/Kconfig | 2 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 175 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 2 |
5 files changed, 88 insertions, 103 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e08ea043e085..42a3f936dadc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -1020,4 +1020,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | |||
1020 | extern int get_tsc_mode(unsigned long adr); | 1020 | extern int get_tsc_mode(unsigned long adr); |
1021 | extern int set_tsc_mode(unsigned int val); | 1021 | extern int set_tsc_mode(unsigned int val); |
1022 | 1022 | ||
1023 | extern int amd_get_nb_id(int cpu); | ||
1024 | |||
1023 | #endif /* _ASM_X86_PROCESSOR_H */ | 1025 | #endif /* _ASM_X86_PROCESSOR_H */ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22a47c82f3c0..f32fa71ccf97 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
333 | #endif | 333 | #endif |
334 | } | 334 | } |
335 | 335 | ||
336 | int amd_get_nb_id(int cpu) | ||
337 | { | ||
338 | int id = 0; | ||
339 | #ifdef CONFIG_SMP | ||
340 | id = per_cpu(cpu_llc_id, cpu); | ||
341 | #endif | ||
342 | return id; | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(amd_get_nb_id); | ||
345 | |||
336 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 346 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
337 | { | 347 | { |
338 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 348 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 4339b1a879cd..a3ca18e2d7cf 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
@@ -59,7 +59,7 @@ config EDAC_MM_EDAC | |||
59 | 59 | ||
60 | config EDAC_AMD64 | 60 | config EDAC_AMD64 |
61 | tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" | 61 | tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" |
62 | depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI | 62 | depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD |
63 | help | 63 | help |
64 | Support for error detection and correction on the AMD 64 | 64 | Support for error detection and correction on the AMD 64 |
65 | Families of Memory Controllers (K8, F10h and F11h) | 65 | Families of Memory Controllers (K8, F10h and F11h) |
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 173dc4a84166..4e551e63b6dc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -1255,7 +1255,9 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) | |||
1255 | */ | 1255 | */ |
1256 | static int f10_early_channel_count(struct amd64_pvt *pvt) | 1256 | static int f10_early_channel_count(struct amd64_pvt *pvt) |
1257 | { | 1257 | { |
1258 | int dbams[] = { DBAM0, DBAM1 }; | ||
1258 | int err = 0, channels = 0; | 1259 | int err = 0, channels = 0; |
1260 | int i, j; | ||
1259 | u32 dbam; | 1261 | u32 dbam; |
1260 | 1262 | ||
1261 | err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); | 1263 | err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); |
@@ -1288,46 +1290,19 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) | |||
1288 | * is more than just one DIMM present in unganged mode. Need to check | 1290 | * is more than just one DIMM present in unganged mode. Need to check |
1289 | * both controllers since DIMMs can be placed in either one. | 1291 | * both controllers since DIMMs can be placed in either one. |
1290 | */ | 1292 | */ |
1291 | channels = 0; | 1293 | for (i = 0; i < ARRAY_SIZE(dbams); i++) { |
1292 | err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam); | 1294 | err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam); |
1293 | if (err) | ||
1294 | goto err_reg; | ||
1295 | |||
1296 | if (DBAM_DIMM(0, dbam) > 0) | ||
1297 | channels++; | ||
1298 | if (DBAM_DIMM(1, dbam) > 0) | ||
1299 | channels++; | ||
1300 | if (DBAM_DIMM(2, dbam) > 0) | ||
1301 | channels++; | ||
1302 | if (DBAM_DIMM(3, dbam) > 0) | ||
1303 | channels++; | ||
1304 | |||
1305 | /* If more than 2 DIMMs are present, then we have 2 channels */ | ||
1306 | if (channels > 2) | ||
1307 | channels = 2; | ||
1308 | else if (channels == 0) { | ||
1309 | /* No DIMMs on DCT0, so look at DCT1 */ | ||
1310 | err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam); | ||
1311 | if (err) | 1295 | if (err) |
1312 | goto err_reg; | 1296 | goto err_reg; |
1313 | 1297 | ||
1314 | if (DBAM_DIMM(0, dbam) > 0) | 1298 | for (j = 0; j < 4; j++) { |
1315 | channels++; | 1299 | if (DBAM_DIMM(j, dbam) > 0) { |
1316 | if (DBAM_DIMM(1, dbam) > 0) | 1300 | channels++; |
1317 | channels++; | 1301 | break; |
1318 | if (DBAM_DIMM(2, dbam) > 0) | 1302 | } |
1319 | channels++; | 1303 | } |
1320 | if (DBAM_DIMM(3, dbam) > 0) | ||
1321 | channels++; | ||
1322 | |||
1323 | if (channels > 2) | ||
1324 | channels = 2; | ||
1325 | } | 1304 | } |
1326 | 1305 | ||
1327 | /* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */ | ||
1328 | if (channels == 0) | ||
1329 | channels = 1; | ||
1330 | |||
1331 | debugf0("MCT channel count: %d\n", channels); | 1306 | debugf0("MCT channel count: %d\n", channels); |
1332 | 1307 | ||
1333 | return channels; | 1308 | return channels; |
@@ -2766,30 +2741,53 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) | |||
2766 | wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); | 2741 | wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); |
2767 | } | 2742 | } |
2768 | 2743 | ||
2769 | static void check_mcg_ctl(void *ret) | 2744 | /* get all cores on this DCT */ |
2745 | static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid) | ||
2770 | { | 2746 | { |
2771 | u64 msr_val = 0; | 2747 | int cpu; |
2772 | u8 nbe; | ||
2773 | |||
2774 | rdmsrl(MSR_IA32_MCG_CTL, msr_val); | ||
2775 | nbe = msr_val & K8_MSR_MCGCTL_NBE; | ||
2776 | |||
2777 | debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", | ||
2778 | raw_smp_processor_id(), msr_val, | ||
2779 | (nbe ? "enabled" : "disabled")); | ||
2780 | 2748 | ||
2781 | if (!nbe) | 2749 | for_each_online_cpu(cpu) |
2782 | *(int *)ret = 0; | 2750 | if (amd_get_nb_id(cpu) == nid) |
2751 | cpumask_set_cpu(cpu, mask); | ||
2783 | } | 2752 | } |
2784 | 2753 | ||
2785 | /* check MCG_CTL on all the cpus on this node */ | 2754 | /* check MCG_CTL on all the cpus on this node */ |
2786 | static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask) | 2755 | static bool amd64_nb_mce_bank_enabled_on_node(int nid) |
2787 | { | 2756 | { |
2788 | int ret = 1; | 2757 | cpumask_t mask; |
2789 | preempt_disable(); | 2758 | struct msr *msrs; |
2790 | smp_call_function_many(mask, check_mcg_ctl, &ret, 1); | 2759 | int cpu, nbe, idx = 0; |
2791 | preempt_enable(); | 2760 | bool ret = false; |
2792 | 2761 | ||
2762 | cpumask_clear(&mask); | ||
2763 | |||
2764 | get_cpus_on_this_dct_cpumask(&mask, nid); | ||
2765 | |||
2766 | msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL); | ||
2767 | if (!msrs) { | ||
2768 | amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", | ||
2769 | __func__); | ||
2770 | return false; | ||
2771 | } | ||
2772 | |||
2773 | rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs); | ||
2774 | |||
2775 | for_each_cpu(cpu, &mask) { | ||
2776 | nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE; | ||
2777 | |||
2778 | debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", | ||
2779 | cpu, msrs[idx].q, | ||
2780 | (nbe ? "enabled" : "disabled")); | ||
2781 | |||
2782 | if (!nbe) | ||
2783 | goto out; | ||
2784 | |||
2785 | idx++; | ||
2786 | } | ||
2787 | ret = true; | ||
2788 | |||
2789 | out: | ||
2790 | kfree(msrs); | ||
2793 | return ret; | 2791 | return ret; |
2794 | } | 2792 | } |
2795 | 2793 | ||
@@ -2799,71 +2797,46 @@ static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask) | |||
2799 | * the memory system completely. A command line option allows to force-enable | 2797 | * the memory system completely. A command line option allows to force-enable |
2800 | * hardware ECC later in amd64_enable_ecc_error_reporting(). | 2798 | * hardware ECC later in amd64_enable_ecc_error_reporting(). |
2801 | */ | 2799 | */ |
2800 | static const char *ecc_warning = | ||
2801 | "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n" | ||
2802 | " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n" | ||
2803 | " Also, use of the override can cause unknown side effects.\n"; | ||
2804 | |||
2802 | static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) | 2805 | static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) |
2803 | { | 2806 | { |
2804 | u32 value; | 2807 | u32 value; |
2805 | int err = 0, ret = 0; | 2808 | int err = 0; |
2806 | u8 ecc_enabled = 0; | 2809 | u8 ecc_enabled = 0; |
2810 | bool nb_mce_en = false; | ||
2807 | 2811 | ||
2808 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); | 2812 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); |
2809 | if (err) | 2813 | if (err) |
2810 | debugf0("Reading K8_NBCTL failed\n"); | 2814 | debugf0("Reading K8_NBCTL failed\n"); |
2811 | 2815 | ||
2812 | ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); | 2816 | ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); |
2817 | if (!ecc_enabled) | ||
2818 | amd64_printk(KERN_WARNING, "This node reports that Memory ECC " | ||
2819 | "is currently disabled, set F3x%x[22] (%s).\n", | ||
2820 | K8_NBCFG, pci_name(pvt->misc_f3_ctl)); | ||
2821 | else | ||
2822 | amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n"); | ||
2813 | 2823 | ||
2814 | ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id)); | 2824 | nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); |
2815 | 2825 | if (!nb_mce_en) | |
2816 | debugf0("K8_NBCFG=0x%x, DRAM ECC is %s\n", value, | 2826 | amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR " |
2817 | (value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled")); | 2827 | "0x%08x[4] on node %d to enable.\n", |
2818 | 2828 | MSR_IA32_MCG_CTL, pvt->mc_node_id); | |
2819 | if (!ecc_enabled || !ret) { | ||
2820 | if (!ecc_enabled) { | ||
2821 | amd64_printk(KERN_WARNING, "This node reports that " | ||
2822 | "Memory ECC is currently " | ||
2823 | "disabled.\n"); | ||
2824 | 2829 | ||
2825 | amd64_printk(KERN_WARNING, "bit 0x%lx in register " | 2830 | if (!ecc_enabled || !nb_mce_en) { |
2826 | "F3x%x of the MISC_CONTROL device (%s) " | ||
2827 | "should be enabled\n", K8_NBCFG_ECC_ENABLE, | ||
2828 | K8_NBCFG, pci_name(pvt->misc_f3_ctl)); | ||
2829 | } | ||
2830 | if (!ret) { | ||
2831 | amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x " | ||
2832 | "of node %d should be enabled\n", | ||
2833 | K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL, | ||
2834 | pvt->mc_node_id); | ||
2835 | } | ||
2836 | if (!ecc_enable_override) { | 2831 | if (!ecc_enable_override) { |
2837 | amd64_printk(KERN_WARNING, "WARNING: ECC is NOT " | 2832 | amd64_printk(KERN_WARNING, "%s", ecc_warning); |
2838 | "currently enabled by the BIOS. Module " | 2833 | return -ENODEV; |
2839 | "will NOT be loaded.\n" | 2834 | } |
2840 | " Either Enable ECC in the BIOS, " | 2835 | } else |
2841 | "or use the 'ecc_enable_override' " | ||
2842 | "parameter.\n" | ||
2843 | " Might be a BIOS bug, if BIOS says " | ||
2844 | "ECC is enabled\n" | ||
2845 | " Use of the override can cause " | ||
2846 | "unknown side effects.\n"); | ||
2847 | ret = -ENODEV; | ||
2848 | } else | ||
2849 | /* | ||
2850 | * enable further driver loading if ECC enable is | ||
2851 | * overridden. | ||
2852 | */ | ||
2853 | ret = 0; | ||
2854 | } else { | ||
2855 | amd64_printk(KERN_INFO, | ||
2856 | "ECC is enabled by BIOS, Proceeding " | ||
2857 | "with EDAC module initialization\n"); | ||
2858 | |||
2859 | /* Signal good ECC status */ | ||
2860 | ret = 0; | ||
2861 | |||
2862 | /* CLEAR the override, since BIOS controlled it */ | 2836 | /* CLEAR the override, since BIOS controlled it */ |
2863 | ecc_enable_override = 0; | 2837 | ecc_enable_override = 0; |
2864 | } | ||
2865 | 2838 | ||
2866 | return ret; | 2839 | return 0; |
2867 | } | 2840 | } |
2868 | 2841 | ||
2869 | struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + | 2842 | struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + |
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index c8ca7136dacc..0c21c370c9dd 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c | |||
@@ -405,7 +405,7 @@ void decode_mce(struct mce *m) | |||
405 | regs.nbsh = (u32)(m->status >> 32); | 405 | regs.nbsh = (u32)(m->status >> 32); |
406 | regs.nbeal = (u32) m->addr; | 406 | regs.nbeal = (u32) m->addr; |
407 | regs.nbeah = (u32)(m->addr >> 32); | 407 | regs.nbeah = (u32)(m->addr >> 32); |
408 | node = per_cpu(cpu_llc_id, m->extcpu); | 408 | node = amd_get_nb_id(m->extcpu); |
409 | 409 | ||
410 | amd_decode_nb_mce(node, ®s, 1); | 410 | amd_decode_nb_mce(node, ®s, 1); |
411 | break; | 411 | break; |