aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDoug Thompson <dougthompson@xmission.com>2009-04-27 13:46:08 -0400
committerBorislav Petkov <borislav.petkov@amd.com>2009-06-10 06:19:01 -0400
commitf9431992b6227069bc54800d55531c6f78d276a7 (patch)
treed9b210e69b8d80308d7a512f047e598babf30084
parent0ec449ee95b20245fef4aa9fa2486456f1540514 (diff)
amd64_edac: add ECC reporting initializers
Borislav: - convert to the new {rd|wr}msr_on_cpus interfaces. - convert pvt->old_mcgctl to a bitmask thus saving some bytes - fix/cleanup comments - fix function return value patterns - add a proper bugfix found by Doug to amd64_check_ecc_enabled where we missed checking for the ECC enabled bit in NB CFG. - cleanup debug calls Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com> Signed-off-by: Doug Thompson <dougthompson@xmission.com> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
-rw-r--r--drivers/edac/amd64_edac.c207
-rw-r--r--drivers/edac/amd64_edac.h3
2 files changed, 209 insertions, 1 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5a6e714b115e..3b6c421531cf 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2771,3 +2771,210 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
2771 return empty; 2771 return empty;
2772} 2772}
2773 2773
2774/*
2775 * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
2776 * enable it.
2777 */
2778static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
2779{
2780 struct amd64_pvt *pvt = mci->pvt_info;
2781 const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
2782 int cpu, idx = 0, err = 0;
2783 struct msr msrs[cpumask_weight(cpumask)];
2784 u32 value;
2785 u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2786
2787 if (!ecc_enable_override)
2788 return;
2789
2790 memset(msrs, 0, sizeof(msrs));
2791
2792 amd64_printk(KERN_WARNING,
2793 "'ecc_enable_override' parameter is active, "
2794 "Enabling AMD ECC hardware now: CAUTION\n");
2795
2796 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
2797 if (err)
2798 debugf0("Reading K8_NBCTL failed\n");
2799
2800 /* turn on UECCn and CECCEn bits */
2801 pvt->old_nbctl = value & mask;
2802 pvt->nbctl_mcgctl_saved = 1;
2803
2804 value |= mask;
2805 pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
2806
2807 rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2808
2809 for_each_cpu(cpu, cpumask) {
2810 if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
2811 set_bit(idx, &pvt->old_mcgctl);
2812
2813 msrs[idx].l |= K8_MSR_MCGCTL_NBE;
2814 idx++;
2815 }
2816 wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2817
2818 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2819 if (err)
2820 debugf0("Reading K8_NBCFG failed\n");
2821
2822 debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
2823 (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
2824 (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
2825
2826 if (!(value & K8_NBCFG_ECC_ENABLE)) {
2827 amd64_printk(KERN_WARNING,
2828 "This node reports that DRAM ECC is "
2829 "currently Disabled; ENABLING now\n");
2830
2831 /* Attempt to turn on DRAM ECC Enable */
2832 value |= K8_NBCFG_ECC_ENABLE;
2833 pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
2834
2835 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2836 if (err)
2837 debugf0("Reading K8_NBCFG failed\n");
2838
2839 if (!(value & K8_NBCFG_ECC_ENABLE)) {
2840 amd64_printk(KERN_WARNING,
2841 "Hardware rejects Enabling DRAM ECC checking\n"
2842 "Check memory DIMM configuration\n");
2843 } else {
2844 amd64_printk(KERN_DEBUG,
2845 "Hardware accepted DRAM ECC Enable\n");
2846 }
2847 }
2848 debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
2849 (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
2850 (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
2851
2852 pvt->ctl_error_info.nbcfg = value;
2853}
2854
2855static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
2856{
2857 const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
2858 int cpu, idx = 0, err = 0;
2859 struct msr msrs[cpumask_weight(cpumask)];
2860 u32 value;
2861 u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2862
2863 if (!pvt->nbctl_mcgctl_saved)
2864 return;
2865
2866 memset(msrs, 0, sizeof(msrs));
2867
2868 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
2869 if (err)
2870 debugf0("Reading K8_NBCTL failed\n");
2871 value &= ~mask;
2872 value |= pvt->old_nbctl;
2873
2874 /* restore the NB Enable MCGCTL bit */
2875 pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
2876
2877 rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2878
2879 for_each_cpu(cpu, cpumask) {
2880 msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
2881 msrs[idx].l |=
2882 test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
2883 idx++;
2884 }
2885
2886 wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
2887}
2888
2889static void check_mcg_ctl(void *ret)
2890{
2891 u64 msr_val = 0;
2892 u8 nbe;
2893
2894 rdmsrl(MSR_IA32_MCG_CTL, msr_val);
2895 nbe = msr_val & K8_MSR_MCGCTL_NBE;
2896
2897 debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2898 raw_smp_processor_id(), msr_val,
2899 (nbe ? "enabled" : "disabled"));
2900
2901 if (!nbe)
2902 *(int *)ret = 0;
2903}
2904
2905/* check MCG_CTL on all the cpus on this node */
2906static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
2907{
2908 int ret = 1;
2909 preempt_disable();
2910 smp_call_function_many(mask, check_mcg_ctl, &ret, 1);
2911 preempt_enable();
2912
2913 return ret;
2914}
2915
2916/*
2917 * EDAC requires that the BIOS have ECC enabled before taking over the
2918 * processing of ECC errors. This is because the BIOS can properly initialize
2919 * the memory system completely. A command line option allows to force-enable
2920 * hardware ECC later in amd64_enable_ecc_error_reporting().
2921 */
2922static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
2923{
2924 u32 value;
2925 int err = 0, ret = 0;
2926 u8 ecc_enabled = 0;
2927
2928 err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
2929 if (err)
2930 debugf0("Reading K8_NBCTL failed\n");
2931
2932 ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
2933
2934 ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id));
2935
2936 debugf0("K8_NBCFG=0x%x, DRAM ECC is %s\n", value,
2937 (value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled"));
2938
2939 if (!ecc_enabled || !ret) {
2940 if (!ecc_enabled) {
2941 amd64_printk(KERN_WARNING, "This node reports that "
2942 "Memory ECC is currently "
2943 "disabled.\n");
2944
2945 amd64_printk(KERN_WARNING, "bit 0x%lx in register "
2946 "F3x%x of the MISC_CONTROL device (%s) "
2947 "should be enabled\n", K8_NBCFG_ECC_ENABLE,
2948 K8_NBCFG, pci_name(pvt->misc_f3_ctl));
2949 }
2950 if (!ret) {
2951 amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
2952 "of node %d should be enabled\n",
2953 K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
2954 pvt->mc_node_id);
2955 }
2956 if (!ecc_enable_override) {
2957 amd64_printk(KERN_WARNING, "WARNING: ECC is NOT "
2958 "currently enabled by the BIOS. Module "
2959 "will NOT be loaded.\n"
2960 " Either Enable ECC in the BIOS, "
2961 "or use the 'ecc_enable_override' "
2962 "parameter.\n"
2963 " Might be a BIOS bug, if BIOS says "
2964 "ECC is enabled\n"
2965 " Use of the override can cause "
2966 "unknown side effects.\n");
2967 ret = -ENODEV;
2968 }
2969 } else {
2970 amd64_printk(KERN_INFO,
2971 "ECC is enabled by BIOS, Proceeding "
2972 "with EDAC module initialization\n");
2973
2974 /* CLEAR the override, since BIOS controlled it */
2975 ecc_enable_override = 0;
2976 }
2977
2978 return ret;
2979}
2980
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 6f5d5d62cefc..e7aa760614ce 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -70,6 +70,7 @@
70#include <linux/slab.h> 70#include <linux/slab.h>
71#include <linux/mmzone.h> 71#include <linux/mmzone.h>
72#include <linux/edac.h> 72#include <linux/edac.h>
73#include <asm/msr.h>
73#include "edac_core.h" 74#include "edac_core.h"
74 75
75#define amd64_printk(level, fmt, arg...) \ 76#define amd64_printk(level, fmt, arg...) \
@@ -549,7 +550,7 @@ struct amd64_pvt {
549 /* Save old hw registers' values before we modified them */ 550 /* Save old hw registers' values before we modified them */
550 u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */ 551 u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */
551 u32 old_nbctl; 552 u32 old_nbctl;
552 u32 *old_mcgctl; /* per core on this node */ 553 unsigned long old_mcgctl; /* per core on this node */
553 554
554 /* MC Type Index value: socket F vs Family 10h */ 555 /* MC Type Index value: socket F vs Family 10h */
555 u32 mc_type_index; 556 u32 mc_type_index;