diff options
author | Mauro Carvalho Chehab <mchehab@redhat.com> | 2009-06-22 21:48:29 -0400 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2010-05-10 10:44:46 -0400 |
commit | 442305b152778f07504e9fdf64815d4841279bbe (patch) | |
tree | ded4b61a2b5bc7ba5d98e0db63b3e53049fab9d5 /drivers/edac | |
parent | 87d1d272ba25a1863e40ebb1df4bc0eed7a8fd11 (diff) |
i7core_edac: Add a memory check routine, based on device 3 function 4
This function appears only on Xeon 5500 datasheet. Yet, testing with a
Xeon 3503 showed that this is also implemented on other Nehalem
processors.
At the first read, MC_TEST_ERR_RCV1 and MC_TEST_ERR_RCV0 can contain any
value. Modify CE error logic to update the error count only after the
second read.
An alternative approach would be to do a write at rcv0 and rcv1
registers, but it seemed better to keep they untouched, since BIOS might
eventually assume that they are exclusive for their usage.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/i7core_edac.c | 115 |
1 files changed, 108 insertions, 7 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 190596af601a..b5dbc2b83961 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -62,6 +62,18 @@ | |||
62 | #define MC_STATUS 0x4c | 62 | #define MC_STATUS 0x4c |
63 | #define MC_MAX_DOD 0x64 | 63 | #define MC_MAX_DOD 0x64 |
64 | 64 | ||
65 | /* | ||
66 | * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet: | ||
67 | * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf | ||
68 | */ | ||
69 | |||
70 | #define MC_TEST_ERR_RCV1 0x60 | ||
71 | #define DIMM2_COR_ERR(r) ((r) & 0x7fff) | ||
72 | |||
73 | #define MC_TEST_ERR_RCV0 0x64 | ||
74 | #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) | ||
75 | #define DIMM0_COR_ERR(r) ((r) & 0x7fff) | ||
76 | |||
65 | /* OFFSETS for Devices 4,5 and 6 Function 0 */ | 77 | /* OFFSETS for Devices 4,5 and 6 Function 0 */ |
66 | 78 | ||
67 | #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 | 79 | #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 |
@@ -136,8 +148,9 @@ | |||
136 | */ | 148 | */ |
137 | 149 | ||
138 | #define NUM_CHANS 3 | 150 | #define NUM_CHANS 3 |
139 | #define NUM_MCR_FUNCS 4 | 151 | #define MAX_DIMMS 3 /* Max DIMMS per channel */ |
140 | #define NUM_CHAN_FUNCS 3 | 152 | #define MAX_MCR_FUNC 4 |
153 | #define MAX_CHAN_FUNC 3 | ||
141 | 154 | ||
142 | struct i7core_info { | 155 | struct i7core_info { |
143 | u32 mc_control; | 156 | u32 mc_control; |
@@ -159,8 +172,8 @@ struct i7core_inject { | |||
159 | }; | 172 | }; |
160 | 173 | ||
161 | struct i7core_channel { | 174 | struct i7core_channel { |
162 | u32 ranks; | 175 | u32 ranks; |
163 | u32 dimms; | 176 | u32 dimms; |
164 | }; | 177 | }; |
165 | 178 | ||
166 | struct pci_id_descr { | 179 | struct pci_id_descr { |
@@ -171,11 +184,16 @@ struct pci_id_descr { | |||
171 | }; | 184 | }; |
172 | 185 | ||
173 | struct i7core_pvt { | 186 | struct i7core_pvt { |
174 | struct pci_dev *pci_mcr[NUM_MCR_FUNCS]; | 187 | struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1]; |
175 | struct pci_dev *pci_ch[NUM_CHANS][NUM_CHAN_FUNCS]; | 188 | struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1]; |
176 | struct i7core_info info; | 189 | struct i7core_info info; |
177 | struct i7core_inject inject; | 190 | struct i7core_inject inject; |
178 | struct i7core_channel channel[NUM_CHANS]; | 191 | struct i7core_channel channel[NUM_CHANS]; |
192 | |||
193 | int ce_count_available; | ||
194 | unsigned long ce_count[MAX_DIMMS]; /* ECC corrected errors counts per dimm */ | ||
195 | int last_ce_count[MAX_DIMMS]; | ||
196 | |||
179 | }; | 197 | }; |
180 | 198 | ||
181 | /* Device name and register DID (Device ID) */ | 199 | /* Device name and register DID (Device ID) */ |
@@ -749,6 +767,19 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, | |||
749 | return sprintf(data, "%d\n", pvt->inject.enable); | 767 | return sprintf(data, "%d\n", pvt->inject.enable); |
750 | } | 768 | } |
751 | 769 | ||
770 | static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data) | ||
771 | { | ||
772 | struct i7core_pvt *pvt = mci->pvt_info; | ||
773 | |||
774 | if (!pvt->ce_count_available) | ||
775 | return sprintf(data, "unavailable\n"); | ||
776 | |||
777 | return sprintf(data, "dimm0: %lu\ndimm1: %lu\ndimm2: %lu\n", | ||
778 | pvt->ce_count[0], | ||
779 | pvt->ce_count[1], | ||
780 | pvt->ce_count[2]); | ||
781 | } | ||
782 | |||
752 | /* | 783 | /* |
753 | * Sysfs struct | 784 | * Sysfs struct |
754 | */ | 785 | */ |
@@ -789,6 +820,13 @@ static struct mcidev_sysfs_attribute i7core_inj_attrs[] = { | |||
789 | }, | 820 | }, |
790 | .show = i7core_inject_enable_show, | 821 | .show = i7core_inject_enable_show, |
791 | .store = i7core_inject_enable_store, | 822 | .store = i7core_inject_enable_store, |
823 | }, { | ||
824 | .attr = { | ||
825 | .name = "corrected_error_counts", | ||
826 | .mode = (S_IRUGO | S_IWUSR) | ||
827 | }, | ||
828 | .show = i7core_ce_regs_show, | ||
829 | .store = NULL, | ||
792 | }, | 830 | }, |
793 | }; | 831 | }; |
794 | 832 | ||
@@ -879,13 +917,76 @@ static int i7core_get_devices(struct mem_ctl_info *mci, struct pci_dev *mcidev) | |||
879 | return 0; | 917 | return 0; |
880 | } | 918 | } |
881 | 919 | ||
920 | /**************************************************************************** | ||
921 | Error check routines | ||
922 | ****************************************************************************/ | ||
923 | |||
924 | /* This function is based on the device 3 function 4 registers as described on: | ||
925 | * Intel Xeon Processor 5500 Series Datasheet Volume 2 | ||
926 | * http://www.intel.com/Assets/PDF/datasheet/321322.pdf | ||
927 | * also available at: | ||
928 | * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf | ||
929 | */ | ||
930 | static void check_mc_test_err(struct mem_ctl_info *mci) | ||
931 | { | ||
932 | struct i7core_pvt *pvt = mci->pvt_info; | ||
933 | u32 rcv1, rcv0; | ||
934 | int new0, new1, new2; | ||
935 | |||
936 | if (!pvt->pci_mcr[4]) { | ||
937 | debugf0("%s MCR registers not found\n",__func__); | ||
938 | return; | ||
939 | } | ||
940 | |||
941 | /* Corrected error reads */ | ||
942 | pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1); | ||
943 | pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0); | ||
944 | |||
945 | /* Store the new values */ | ||
946 | new2 = DIMM2_COR_ERR(rcv1); | ||
947 | new1 = DIMM1_COR_ERR(rcv0); | ||
948 | new0 = DIMM0_COR_ERR(rcv0); | ||
949 | |||
950 | debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n", | ||
951 | (pvt->ce_count_available ? "UPDATE" : "READ"), | ||
952 | rcv1, rcv0, new0, new1, new2); | ||
953 | |||
954 | /* Updates CE counters if it is not the first time here */ | ||
955 | if (pvt->ce_count_available) { | ||
956 | /* Updates CE counters */ | ||
957 | int add0, add1, add2; | ||
958 | |||
959 | add2 = new2 - pvt->last_ce_count[2]; | ||
960 | add1 = new1 - pvt->last_ce_count[1]; | ||
961 | add0 = new0 - pvt->last_ce_count[0]; | ||
962 | |||
963 | if (add2 < 0) | ||
964 | add2 += 0x7fff; | ||
965 | pvt->ce_count[2] += add2; | ||
966 | |||
967 | if (add1 < 0) | ||
968 | add1 += 0x7fff; | ||
969 | pvt->ce_count[1] += add1; | ||
970 | |||
971 | if (add0 < 0) | ||
972 | add0 += 0x7fff; | ||
973 | pvt->ce_count[0] += add0; | ||
974 | } else | ||
975 | pvt->ce_count_available = 1; | ||
976 | |||
977 | /* Store the new values */ | ||
978 | pvt->last_ce_count[2] = new2; | ||
979 | pvt->last_ce_count[1] = new1; | ||
980 | pvt->last_ce_count[0] = new0; | ||
981 | } | ||
982 | |||
882 | /* | 983 | /* |
883 | * i7core_check_error Retrieve and process errors reported by the | 984 | * i7core_check_error Retrieve and process errors reported by the |
884 | * hardware. Called by the Core module. | 985 | * hardware. Called by the Core module. |
885 | */ | 986 | */ |
886 | static void i7core_check_error(struct mem_ctl_info *mci) | 987 | static void i7core_check_error(struct mem_ctl_info *mci) |
887 | { | 988 | { |
888 | /* FIXME: need a real code here */ | 989 | check_mc_test_err(mci); |
889 | } | 990 | } |
890 | 991 | ||
891 | /* | 992 | /* |