diff options
author | Doug Thompson <dougthompson@xmission.com> | 2009-04-27 13:46:08 -0400 |
---|---|---|
committer | Borislav Petkov <borislav.petkov@amd.com> | 2009-06-10 06:19:01 -0400 |
commit | f9431992b6227069bc54800d55531c6f78d276a7 (patch) | |
tree | d9b210e69b8d80308d7a512f047e598babf30084 | |
parent | 0ec449ee95b20245fef4aa9fa2486456f1540514 (diff) |
amd64_edac: add ECC reporting initializers
Borislav:
- convert to the new {rd|wr}msr_on_cpus interfaces.
- convert pvt->old_mcgctl to a bitmask thus saving some bytes
- fix/cleanup comments
- fix function return value patterns
- add a proper bugfix found by Doug to amd64_check_ecc_enabled where we
missed checking for the ECC enabled bit in NB CFG.
- cleanup debug calls
Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
-rw-r--r-- | drivers/edac/amd64_edac.c | 207 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 3 |
2 files changed, 209 insertions, 1 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5a6e714b115e..3b6c421531cf 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -2771,3 +2771,210 @@ static int amd64_init_csrows(struct mem_ctl_info *mci) | |||
2771 | return empty; | 2771 | return empty; |
2772 | } | 2772 | } |
2773 | 2773 | ||
2774 | /* | ||
2775 | * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we" | ||
2776 | * enable it. | ||
2777 | */ | ||
2778 | static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) | ||
2779 | { | ||
2780 | struct amd64_pvt *pvt = mci->pvt_info; | ||
2781 | const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id); | ||
2782 | int cpu, idx = 0, err = 0; | ||
2783 | struct msr msrs[cpumask_weight(cpumask)]; | ||
2784 | u32 value; | ||
2785 | u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; | ||
2786 | |||
2787 | if (!ecc_enable_override) | ||
2788 | return; | ||
2789 | |||
2790 | memset(msrs, 0, sizeof(msrs)); | ||
2791 | |||
2792 | amd64_printk(KERN_WARNING, | ||
2793 | "'ecc_enable_override' parameter is active, " | ||
2794 | "Enabling AMD ECC hardware now: CAUTION\n"); | ||
2795 | |||
2796 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); | ||
2797 | if (err) | ||
2798 | debugf0("Reading K8_NBCTL failed\n"); | ||
2799 | |||
2800 | /* turn on UECCn and CECCEn bits */ | ||
2801 | pvt->old_nbctl = value & mask; | ||
2802 | pvt->nbctl_mcgctl_saved = 1; | ||
2803 | |||
2804 | value |= mask; | ||
2805 | pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); | ||
2806 | |||
2807 | rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); | ||
2808 | |||
2809 | for_each_cpu(cpu, cpumask) { | ||
2810 | if (msrs[idx].l & K8_MSR_MCGCTL_NBE) | ||
2811 | set_bit(idx, &pvt->old_mcgctl); | ||
2812 | |||
2813 | msrs[idx].l |= K8_MSR_MCGCTL_NBE; | ||
2814 | idx++; | ||
2815 | } | ||
2816 | wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); | ||
2817 | |||
2818 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); | ||
2819 | if (err) | ||
2820 | debugf0("Reading K8_NBCFG failed\n"); | ||
2821 | |||
2822 | debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, | ||
2823 | (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", | ||
2824 | (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); | ||
2825 | |||
2826 | if (!(value & K8_NBCFG_ECC_ENABLE)) { | ||
2827 | amd64_printk(KERN_WARNING, | ||
2828 | "This node reports that DRAM ECC is " | ||
2829 | "currently Disabled; ENABLING now\n"); | ||
2830 | |||
2831 | /* Attempt to turn on DRAM ECC Enable */ | ||
2832 | value |= K8_NBCFG_ECC_ENABLE; | ||
2833 | pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); | ||
2834 | |||
2835 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); | ||
2836 | if (err) | ||
2837 | debugf0("Reading K8_NBCFG failed\n"); | ||
2838 | |||
2839 | if (!(value & K8_NBCFG_ECC_ENABLE)) { | ||
2840 | amd64_printk(KERN_WARNING, | ||
2841 | "Hardware rejects Enabling DRAM ECC checking\n" | ||
2842 | "Check memory DIMM configuration\n"); | ||
2843 | } else { | ||
2844 | amd64_printk(KERN_DEBUG, | ||
2845 | "Hardware accepted DRAM ECC Enable\n"); | ||
2846 | } | ||
2847 | } | ||
2848 | debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, | ||
2849 | (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", | ||
2850 | (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); | ||
2851 | |||
2852 | pvt->ctl_error_info.nbcfg = value; | ||
2853 | } | ||
2854 | |||
2855 | static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) | ||
2856 | { | ||
2857 | const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id); | ||
2858 | int cpu, idx = 0, err = 0; | ||
2859 | struct msr msrs[cpumask_weight(cpumask)]; | ||
2860 | u32 value; | ||
2861 | u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; | ||
2862 | |||
2863 | if (!pvt->nbctl_mcgctl_saved) | ||
2864 | return; | ||
2865 | |||
2866 | memset(msrs, 0, sizeof(msrs)); | ||
2867 | |||
2868 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); | ||
2869 | if (err) | ||
2870 | debugf0("Reading K8_NBCTL failed\n"); | ||
2871 | value &= ~mask; | ||
2872 | value |= pvt->old_nbctl; | ||
2873 | |||
2874 | /* restore the NB Enable MCGCTL bit */ | ||
2875 | pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); | ||
2876 | |||
2877 | rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); | ||
2878 | |||
2879 | for_each_cpu(cpu, cpumask) { | ||
2880 | msrs[idx].l &= ~K8_MSR_MCGCTL_NBE; | ||
2881 | msrs[idx].l |= | ||
2882 | test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE; | ||
2883 | idx++; | ||
2884 | } | ||
2885 | |||
2886 | wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); | ||
2887 | } | ||
2888 | |||
2889 | static void check_mcg_ctl(void *ret) | ||
2890 | { | ||
2891 | u64 msr_val = 0; | ||
2892 | u8 nbe; | ||
2893 | |||
2894 | rdmsrl(MSR_IA32_MCG_CTL, msr_val); | ||
2895 | nbe = msr_val & K8_MSR_MCGCTL_NBE; | ||
2896 | |||
2897 | debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", | ||
2898 | raw_smp_processor_id(), msr_val, | ||
2899 | (nbe ? "enabled" : "disabled")); | ||
2900 | |||
2901 | if (!nbe) | ||
2902 | *(int *)ret = 0; | ||
2903 | } | ||
2904 | |||
2905 | /* check MCG_CTL on all the cpus on this node */ | ||
2906 | static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask) | ||
2907 | { | ||
2908 | int ret = 1; | ||
2909 | preempt_disable(); | ||
2910 | smp_call_function_many(mask, check_mcg_ctl, &ret, 1); | ||
2911 | preempt_enable(); | ||
2912 | |||
2913 | return ret; | ||
2914 | } | ||
2915 | |||
2916 | /* | ||
2917 | * EDAC requires that the BIOS have ECC enabled before taking over the | ||
2918 | * processing of ECC errors. This is because the BIOS can properly initialize | ||
2919 | * the memory system completely. A command line option allows to force-enable | ||
2920 | * hardware ECC later in amd64_enable_ecc_error_reporting(). | ||
2921 | */ | ||
2922 | static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) | ||
2923 | { | ||
2924 | u32 value; | ||
2925 | int err = 0, ret = 0; | ||
2926 | u8 ecc_enabled = 0; | ||
2927 | |||
2928 | err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); | ||
2929 | if (err) | ||
2930 | debugf0("Reading K8_NBCTL failed\n"); | ||
2931 | |||
2932 | ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); | ||
2933 | |||
2934 | ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id)); | ||
2935 | |||
2936 | debugf0("K8_NBCFG=0x%x, DRAM ECC is %s\n", value, | ||
2937 | (value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled")); | ||
2938 | |||
2939 | if (!ecc_enabled || !ret) { | ||
2940 | if (!ecc_enabled) { | ||
2941 | amd64_printk(KERN_WARNING, "This node reports that " | ||
2942 | "Memory ECC is currently " | ||
2943 | "disabled.\n"); | ||
2944 | |||
2945 | amd64_printk(KERN_WARNING, "bit 0x%lx in register " | ||
2946 | "F3x%x of the MISC_CONTROL device (%s) " | ||
2947 | "should be enabled\n", K8_NBCFG_ECC_ENABLE, | ||
2948 | K8_NBCFG, pci_name(pvt->misc_f3_ctl)); | ||
2949 | } | ||
2950 | if (!ret) { | ||
2951 | amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x " | ||
2952 | "of node %d should be enabled\n", | ||
2953 | K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL, | ||
2954 | pvt->mc_node_id); | ||
2955 | } | ||
2956 | if (!ecc_enable_override) { | ||
2957 | amd64_printk(KERN_WARNING, "WARNING: ECC is NOT " | ||
2958 | "currently enabled by the BIOS. Module " | ||
2959 | "will NOT be loaded.\n" | ||
2960 | " Either Enable ECC in the BIOS, " | ||
2961 | "or use the 'ecc_enable_override' " | ||
2962 | "parameter.\n" | ||
2963 | " Might be a BIOS bug, if BIOS says " | ||
2964 | "ECC is enabled\n" | ||
2965 | " Use of the override can cause " | ||
2966 | "unknown side effects.\n"); | ||
2967 | ret = -ENODEV; | ||
2968 | } | ||
2969 | } else { | ||
2970 | amd64_printk(KERN_INFO, | ||
2971 | "ECC is enabled by BIOS, Proceeding " | ||
2972 | "with EDAC module initialization\n"); | ||
2973 | |||
2974 | /* CLEAR the override, since BIOS controlled it */ | ||
2975 | ecc_enable_override = 0; | ||
2976 | } | ||
2977 | |||
2978 | return ret; | ||
2979 | } | ||
2980 | |||
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 6f5d5d62cefc..e7aa760614ce 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h | |||
@@ -70,6 +70,7 @@ | |||
70 | #include <linux/slab.h> | 70 | #include <linux/slab.h> |
71 | #include <linux/mmzone.h> | 71 | #include <linux/mmzone.h> |
72 | #include <linux/edac.h> | 72 | #include <linux/edac.h> |
73 | #include <asm/msr.h> | ||
73 | #include "edac_core.h" | 74 | #include "edac_core.h" |
74 | 75 | ||
75 | #define amd64_printk(level, fmt, arg...) \ | 76 | #define amd64_printk(level, fmt, arg...) \ |
@@ -549,7 +550,7 @@ struct amd64_pvt { | |||
549 | /* Save old hw registers' values before we modified them */ | 550 | /* Save old hw registers' values before we modified them */ |
550 | u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */ | 551 | u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */ |
551 | u32 old_nbctl; | 552 | u32 old_nbctl; |
552 | u32 *old_mcgctl; /* per core on this node */ | 553 | unsigned long old_mcgctl; /* per core on this node */ |
553 | 554 | ||
554 | /* MC Type Index value: socket F vs Family 10h */ | 555 | /* MC Type Index value: socket F vs Family 10h */ |
555 | u32 mc_type_index; | 556 | u32 mc_type_index; |