aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig26
-rw-r--r--drivers/edac/Makefile7
-rw-r--r--drivers/edac/amd64_edac.c374
-rw-r--r--drivers/edac/amd64_edac.h15
4 files changed, 422 insertions, 0 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 956982f8739b..3dc650a7422c 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -58,6 +58,32 @@ config EDAC_MM_EDAC
58 occurred so that a particular failing memory module can be 58 occurred so that a particular failing memory module can be
59 replaced. If unsure, select 'Y'. 59 replaced. If unsure, select 'Y'.
60 60
61config EDAC_AMD64
62 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
63 depends on EDAC_MM_EDAC && X86 && PCI
64 default m
65 help
66 Support for error detection and correction on the AMD 64
67 Families of Memory Controllers (K8, F10h and F11h)
68
69config EDAC_AMD64_ERROR_INJECTION
70 bool "Sysfs Error Injection facilities"
71 depends on EDAC_AMD64
72 help
73 Recent Opterons (Family 10h and later) provide for Memory Error
74 Injection into the ECC detection circuits. The amd64_edac module
75 allows the operator/user to inject Uncorrectable and Correctable
76 errors into DRAM.
77
78 When enabled, in each of the respective memory controller directories
79 (/sys/devices/system/edac/mc/mcX), there are 3 input files:
80
81 - inject_section (0..3, 16-byte section of 64-byte cacheline),
82 - inject_word (0..8, 16-bit word of 16-byte section),
83 - inject_ecc_vector (hex ecc vector: select bits of inject word)
84
85 In addition, there are two control files, inject_read and inject_write,
86 which trigger the DRAM ECC Read and Write respectively.
61 87
62config EDAC_AMD76X 88config EDAC_AMD76X
63 tristate "AMD 76x (760, 762, 768)" 89 tristate "AMD 76x (760, 762, 768)"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 59076819135d..633dc5604ee3 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -30,6 +30,13 @@ obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
30obj-$(CONFIG_EDAC_X38) += x38_edac.o 30obj-$(CONFIG_EDAC_X38) += x38_edac.o
31obj-$(CONFIG_EDAC_I82860) += i82860_edac.o 31obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
32obj-$(CONFIG_EDAC_R82600) += r82600_edac.o 32obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
33
34amd64_edac_mod-y := amd64_edac_err_types.o amd64_edac.o
35amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
36amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
37
38obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o
39
33obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o 40obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o
34obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o 41obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o
35obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o 42obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 3b6c421531cf..c36bf40568cf 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,4 +1,5 @@
1#include "amd64_edac.h" 1#include "amd64_edac.h"
2#include <asm/k8.h>
2 3
3static struct edac_pci_ctl_info *amd64_ctl_pci; 4static struct edac_pci_ctl_info *amd64_ctl_pci;
4 5
@@ -2978,3 +2979,376 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
2978 return ret; 2979 return ret;
2979} 2980}
2980 2981
2982struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
2983 ARRAY_SIZE(amd64_inj_attrs) +
2984 1];
2985
2986struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
2987
2988static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
2989{
2990 unsigned int i = 0, j = 0;
2991
2992 for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
2993 sysfs_attrs[i] = amd64_dbg_attrs[i];
2994
2995 for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
2996 sysfs_attrs[i] = amd64_inj_attrs[j];
2997
2998 sysfs_attrs[i] = terminator;
2999
3000 mci->mc_driver_sysfs_attributes = sysfs_attrs;
3001}
3002
3003static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
3004{
3005 struct amd64_pvt *pvt = mci->pvt_info;
3006
3007 mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
3008 mci->edac_ctl_cap = EDAC_FLAG_NONE;
3009 mci->edac_cap = EDAC_FLAG_NONE;
3010
3011 if (pvt->nbcap & K8_NBCAP_SECDED)
3012 mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
3013
3014 if (pvt->nbcap & K8_NBCAP_CHIPKILL)
3015 mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
3016
3017 mci->edac_cap = amd64_determine_edac_cap(pvt);
3018 mci->mod_name = EDAC_MOD_STR;
3019 mci->mod_ver = EDAC_AMD64_VERSION;
3020 mci->ctl_name = get_amd_family_name(pvt->mc_type_index);
3021 mci->dev_name = pci_name(pvt->dram_f2_ctl);
3022 mci->ctl_page_to_phys = NULL;
3023
3024 /* IMPORTANT: Set the polling 'check' function in this module */
3025 mci->edac_check = amd64_check;
3026
3027 /* memory scrubber interface */
3028 mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
3029 mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
3030}
3031
3032/*
3033 * Init stuff for this DRAM Controller device.
3034 *
3035 * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
3036 * Space feature MUST be enabled on ALL Processors prior to actually reading
3037 * from the ECS registers. Since the loading of the module can occur on any
3038 * 'core', and cores don't 'see' all the other processors ECS data when the
3039 * others are NOT enabled. Our solution is to first enable ECS access in this
3040 * routine on all processors, gather some data in a amd64_pvt structure and
3041 * later come back in a finish-setup function to perform that final
3042 * initialization. See also amd64_init_2nd_stage() for that.
3043 */
3044static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
3045 int mc_type_index)
3046{
3047 struct amd64_pvt *pvt = NULL;
3048 int err = 0, ret;
3049
3050 ret = -ENOMEM;
3051 pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
3052 if (!pvt)
3053 goto err_exit;
3054
3055 pvt->mc_node_id = get_mc_node_id_from_pdev(dram_f2_ctl);
3056
3057 pvt->dram_f2_ctl = dram_f2_ctl;
3058 pvt->ext_model = boot_cpu_data.x86_model >> 4;
3059 pvt->mc_type_index = mc_type_index;
3060 pvt->ops = family_ops(mc_type_index);
3061 pvt->old_mcgctl = 0;
3062
3063 /*
3064 * We have the dram_f2_ctl device as an argument, now go reserve its
3065 * sibling devices from the PCI system.
3066 */
3067 ret = -ENODEV;
3068 err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
3069 if (err)
3070 goto err_free;
3071
3072 ret = -EINVAL;
3073 err = amd64_check_ecc_enabled(pvt);
3074 if (err)
3075 goto err_put;
3076
3077 /*
3078 * Key operation here: setup of HW prior to performing ops on it. Some
3079 * setup is required to access ECS data. After this is performed, the
3080 * 'teardown' function must be called upon error and normal exit paths.
3081 */
3082 if (boot_cpu_data.x86 >= 0x10)
3083 amd64_setup(pvt);
3084
3085 /*
3086 * Save the pointer to the private data for use in 2nd initialization
3087 * stage
3088 */
3089 pvt_lookup[pvt->mc_node_id] = pvt;
3090
3091 return 0;
3092
3093err_put:
3094 amd64_free_mc_sibling_devices(pvt);
3095
3096err_free:
3097 kfree(pvt);
3098
3099err_exit:
3100 return ret;
3101}
3102
3103/*
3104 * This is the finishing stage of the init code. Needs to be performed after all
3105 * MCs' hardware have been prepped for accessing extended config space.
3106 */
3107static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
3108{
3109 int node_id = pvt->mc_node_id;
3110 struct mem_ctl_info *mci;
3111 int ret, err = 0;
3112
3113 amd64_read_mc_registers(pvt);
3114
3115 ret = -ENODEV;
3116 if (pvt->ops->probe_valid_hardware) {
3117 err = pvt->ops->probe_valid_hardware(pvt);
3118 if (err)
3119 goto err_exit;
3120 }
3121
3122 /*
3123 * We need to determine how many memory channels there are. Then use
3124 * that information for calculating the size of the dynamic instance
3125 * tables in the 'mci' structure
3126 */
3127 pvt->channel_count = pvt->ops->early_channel_count(pvt);
3128 if (pvt->channel_count < 0)
3129 goto err_exit;
3130
3131 ret = -ENOMEM;
3132 mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id);
3133 if (!mci)
3134 goto err_exit;
3135
3136 mci->pvt_info = pvt;
3137
3138 mci->dev = &pvt->dram_f2_ctl->dev;
3139 amd64_setup_mci_misc_attributes(mci);
3140
3141 if (amd64_init_csrows(mci))
3142 mci->edac_cap = EDAC_FLAG_NONE;
3143
3144 amd64_enable_ecc_error_reporting(mci);
3145 amd64_set_mc_sysfs_attributes(mci);
3146
3147 ret = -ENODEV;
3148 if (edac_mc_add_mc(mci)) {
3149 debugf1("failed edac_mc_add_mc()\n");
3150 goto err_add_mc;
3151 }
3152
3153 mci_lookup[node_id] = mci;
3154 pvt_lookup[node_id] = NULL;
3155 return 0;
3156
3157err_add_mc:
3158 edac_mc_free(mci);
3159
3160err_exit:
3161 debugf0("failure to init 2nd stage: ret=%d\n", ret);
3162
3163 amd64_restore_ecc_error_reporting(pvt);
3164
3165 if (boot_cpu_data.x86 > 0xf)
3166 amd64_teardown(pvt);
3167
3168 amd64_free_mc_sibling_devices(pvt);
3169
3170 kfree(pvt_lookup[pvt->mc_node_id]);
3171 pvt_lookup[node_id] = NULL;
3172
3173 return ret;
3174}
3175
3176
3177static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
3178 const struct pci_device_id *mc_type)
3179{
3180 int ret = 0;
3181
3182 debugf0("(MC node=%d,mc_type='%s')\n",
3183 get_mc_node_id_from_pdev(pdev),
3184 get_amd_family_name(mc_type->driver_data));
3185
3186 ret = pci_enable_device(pdev);
3187 if (ret < 0)
3188 ret = -EIO;
3189 else
3190 ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
3191
3192 if (ret < 0)
3193 debugf0("ret=%d\n", ret);
3194
3195 return ret;
3196}
3197
3198static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
3199{
3200 struct mem_ctl_info *mci;
3201 struct amd64_pvt *pvt;
3202
3203 /* Remove from EDAC CORE tracking list */
3204 mci = edac_mc_del_mc(&pdev->dev);
3205 if (!mci)
3206 return;
3207
3208 pvt = mci->pvt_info;
3209
3210 amd64_restore_ecc_error_reporting(pvt);
3211
3212 if (boot_cpu_data.x86 > 0xf)
3213 amd64_teardown(pvt);
3214
3215 amd64_free_mc_sibling_devices(pvt);
3216
3217 kfree(pvt);
3218 mci->pvt_info = NULL;
3219
3220 mci_lookup[pvt->mc_node_id] = NULL;
3221
3222 /* Free the EDAC CORE resources */
3223 edac_mc_free(mci);
3224}
3225
3226/*
3227 * This table is part of the interface for loading drivers for PCI devices. The
3228 * PCI core identifies what devices are on a system during boot, and then
3229 * inquiry this table to see if this driver is for a given device found.
3230 */
3231static const struct pci_device_id amd64_pci_table[] __devinitdata = {
3232 {
3233 .vendor = PCI_VENDOR_ID_AMD,
3234 .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
3235 .subvendor = PCI_ANY_ID,
3236 .subdevice = PCI_ANY_ID,
3237 .class = 0,
3238 .class_mask = 0,
3239 .driver_data = K8_CPUS
3240 },
3241 {
3242 .vendor = PCI_VENDOR_ID_AMD,
3243 .device = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
3244 .subvendor = PCI_ANY_ID,
3245 .subdevice = PCI_ANY_ID,
3246 .class = 0,
3247 .class_mask = 0,
3248 .driver_data = F10_CPUS
3249 },
3250 {
3251 .vendor = PCI_VENDOR_ID_AMD,
3252 .device = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
3253 .subvendor = PCI_ANY_ID,
3254 .subdevice = PCI_ANY_ID,
3255 .class = 0,
3256 .class_mask = 0,
3257 .driver_data = F11_CPUS
3258 },
3259 {0, }
3260};
3261MODULE_DEVICE_TABLE(pci, amd64_pci_table);
3262
3263static struct pci_driver amd64_pci_driver = {
3264 .name = EDAC_MOD_STR,
3265 .probe = amd64_init_one_instance,
3266 .remove = __devexit_p(amd64_remove_one_instance),
3267 .id_table = amd64_pci_table,
3268};
3269
3270static void amd64_setup_pci_device(void)
3271{
3272 struct mem_ctl_info *mci;
3273 struct amd64_pvt *pvt;
3274
3275 if (amd64_ctl_pci)
3276 return;
3277
3278 mci = mci_lookup[0];
3279 if (mci) {
3280
3281 pvt = mci->pvt_info;
3282 amd64_ctl_pci =
3283 edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
3284 EDAC_MOD_STR);
3285
3286 if (!amd64_ctl_pci) {
3287 pr_warning("%s(): Unable to create PCI control\n",
3288 __func__);
3289
3290 pr_warning("%s(): PCI error report via EDAC not set\n",
3291 __func__);
3292 }
3293 }
3294}
3295
3296static int __init amd64_edac_init(void)
3297{
3298 int nb, err = -ENODEV;
3299
3300 edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
3301
3302 opstate_init();
3303
3304 if (cache_k8_northbridges() < 0)
3305 goto err_exit;
3306
3307 err = pci_register_driver(&amd64_pci_driver);
3308 if (err)
3309 return err;
3310
3311 /*
3312 * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
3313 * amd64_pvt structs. These will be used in the 2nd stage init function
3314 * to finish initialization of the MC instances.
3315 */
3316 for (nb = 0; nb < num_k8_northbridges; nb++) {
3317 if (!pvt_lookup[nb])
3318 continue;
3319
3320 err = amd64_init_2nd_stage(pvt_lookup[nb]);
3321 if (err)
3322 goto err_exit;
3323 }
3324
3325 amd64_setup_pci_device();
3326
3327 return 0;
3328
3329err_exit:
3330 debugf0("'finish_setup' stage failed\n");
3331 pci_unregister_driver(&amd64_pci_driver);
3332
3333 return err;
3334}
3335
3336static void __exit amd64_edac_exit(void)
3337{
3338 if (amd64_ctl_pci)
3339 edac_pci_release_generic_ctl(amd64_ctl_pci);
3340
3341 pci_unregister_driver(&amd64_pci_driver);
3342}
3343
3344module_init(amd64_edac_init);
3345module_exit(amd64_edac_exit);
3346
3347MODULE_LICENSE("GPL");
3348MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3349 "Dave Peterson, Thayne Harbaugh");
3350MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3351 EDAC_AMD64_VERSION);
3352
3353module_param(edac_op_state, int, 0444);
3354MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index e7aa760614ce..a159957e167b 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -577,6 +577,21 @@ extern const char *ii_msgs[4];
577extern const char *ext_msgs[32]; 577extern const char *ext_msgs[32];
578extern const char *htlink_msgs[8]; 578extern const char *htlink_msgs[8];
579 579
580#ifdef CONFIG_EDAC_DEBUG
581#define NUM_DBG_ATTRS 9
582#else
583#define NUM_DBG_ATTRS 0
584#endif
585
586#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
587#define NUM_INJ_ATTRS 5
588#else
589#define NUM_INJ_ATTRS 0
590#endif
591
592extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS],
593 amd64_inj_attrs[NUM_INJ_ATTRS];
594
580/* 595/*
581 * Each of the PCI Device IDs types have their own set of hardware accessor 596 * Each of the PCI Device IDs types have their own set of hardware accessor
582 * functions and per device encoding/decoding logic. 597 * functions and per device encoding/decoding logic.