aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorJeff Roberson <jroberson@jroberson.net>2009-10-26 19:50:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-10-29 10:39:30 -0400
commit156edd4aaa819ec5867ced83c7b8dba9193789ea (patch)
tree58066a63e881f3dc359d8d439f04ffece0a34e6a /drivers/edac
parent8c85dd8730bfb696e691145335f884c7baef8277 (diff)
edac: i5400 fix csrow mapping
The i5400 EDAC driver has several bugs with chip-select row computation which most likely lead to bugs in detailed error reporting. Attempts to contact the authors have gone mostly unanswered so I am presenting my diff here. I do not subscribe to lkml and would appreciate being kept in the cc. The most egregious problem was miscalculating the addresses of MTR registers after register 0 by assuming they are 32bit rather than 16. This caused the driver to miss half of the memories. Most motherboards tend to have only 8 dimm slots and not 16, so this may not have been noticed before. Further, the row calculations multiplied the number of dimms several times, ultimately ending up with a maximum row of 32. The chipset only supports 4 dimms in each of 4 channels, so csrow could not be higher than 4 unless you use a row per-rank with dual-rank dimms. I opted to eliminate this behavior as it is confusing to the user and the error reporting works by slot and not rank. This gives a much clearer view of memory by slot and channel in /sys. Signed-off-by: Jeff Roberson <jroberson@jroberson.net> Signed-off-by: Doug Thompson <dougthompson@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/i5400_edac.c89
1 files changed, 28 insertions, 61 deletions
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index b08b6d8e2dc7..f99d10655ed4 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -46,9 +46,10 @@
46/* Limits for i5400 */ 46/* Limits for i5400 */
47#define NUM_MTRS_PER_BRANCH 4 47#define NUM_MTRS_PER_BRANCH 4
48#define CHANNELS_PER_BRANCH 2 48#define CHANNELS_PER_BRANCH 2
49#define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH
49#define MAX_CHANNELS 4 50#define MAX_CHANNELS 4
50#define MAX_DIMMS (MAX_CHANNELS * 4) /* Up to 4 DIMM's per channel */ 51/* max possible csrows per channel */
51#define MAX_CSROWS (MAX_DIMMS * 2) /* max possible csrows per channel */ 52#define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL)
52 53
53/* Device 16, 54/* Device 16,
54 * Function 0: System Address 55 * Function 0: System Address
@@ -331,7 +332,6 @@ static const struct i5400_dev_info i5400_devs[] = {
331 332
332struct i5400_dimm_info { 333struct i5400_dimm_info {
333 int megabytes; /* size, 0 means not present */ 334 int megabytes; /* size, 0 means not present */
334 int dual_rank;
335}; 335};
336 336
337/* driver private data structure */ 337/* driver private data structure */
@@ -849,11 +849,9 @@ static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
849 int n; 849 int n;
850 850
851 /* There is one MTR for each slot pair of FB-DIMMs, 851 /* There is one MTR for each slot pair of FB-DIMMs,
852 Each slot may have one or two ranks (2 csrows),
853 Each slot pair may be at branch 0 or branch 1. 852 Each slot pair may be at branch 0 or branch 1.
854 So, csrow should be divided by eight
855 */ 853 */
856 n = csrow >> 3; 854 n = csrow;
857 855
858 if (n >= NUM_MTRS_PER_BRANCH) { 856 if (n >= NUM_MTRS_PER_BRANCH) {
859 debugf0("ERROR: trying to access an invalid csrow: %d\n", 857 debugf0("ERROR: trying to access an invalid csrow: %d\n",
@@ -905,25 +903,22 @@ static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
905 amb_present_reg = determine_amb_present_reg(pvt, channel); 903 amb_present_reg = determine_amb_present_reg(pvt, channel);
906 904
907 /* Determine if there is a DIMM present in this DIMM slot */ 905 /* Determine if there is a DIMM present in this DIMM slot */
908 if (amb_present_reg & (1 << (csrow >> 1))) { 906 if (amb_present_reg & (1 << csrow)) {
909 dinfo->dual_rank = MTR_DIMM_RANK(mtr); 907 /* Start with the number of bits for a Bank
910 908 * on the DRAM */
911 if (!((dinfo->dual_rank == 0) && 909 addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
912 ((csrow & 0x1) == 0x1))) { 910 /* Add thenumber of ROW bits */
913 /* Start with the number of bits for a Bank 911 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
914 * on the DRAM */ 912 /* add the number of COLUMN bits */
915 addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); 913 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
916 /* Add thenumber of ROW bits */ 914 /* add the number of RANK bits */
917 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); 915 addrBits += MTR_DIMM_RANK(mtr);
918 /* add the number of COLUMN bits */ 916
919 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); 917 addrBits += 6; /* add 64 bits per DIMM */
920 918 addrBits -= 20; /* divide by 2^^20 */
921 addrBits += 6; /* add 64 bits per DIMM */ 919 addrBits -= 3; /* 8 bits per bytes */
922 addrBits -= 20; /* divide by 2^^20 */ 920
923 addrBits -= 3; /* 8 bits per bytes */ 921 dinfo->megabytes = 1 << addrBits;
924
925 dinfo->megabytes = 1 << addrBits;
926 }
927 } 922 }
928 } 923 }
929} 924}
@@ -951,12 +946,12 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
951 return; 946 return;
952 } 947 }
953 948
954 /* Scan all the actual CSROWS (which is # of DIMMS * 2) 949 /* Scan all the actual CSROWS
955 * and calculate the information for each DIMM 950 * and calculate the information for each DIMM
956 * Start with the highest csrow first, to display it first 951 * Start with the highest csrow first, to display it first
957 * and work toward the 0th csrow 952 * and work toward the 0th csrow
958 */ 953 */
959 max_csrows = pvt->maxdimmperch * 2; 954 max_csrows = pvt->maxdimmperch;
960 for (csrow = max_csrows - 1; csrow >= 0; csrow--) { 955 for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
961 956
962 /* on an odd csrow, first output a 'boundary' marker, 957 /* on an odd csrow, first output a 'boundary' marker,
@@ -1064,7 +1059,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
1064 1059
1065 /* Get the set of MTR[0-3] regs by each branch */ 1060 /* Get the set of MTR[0-3] regs by each branch */
1066 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) { 1061 for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) {
1067 int where = MTR0 + (slot_row * sizeof(u32)); 1062 int where = MTR0 + (slot_row * sizeof(u16));
1068 1063
1069 /* Branch 0 set of MTR registers */ 1064 /* Branch 0 set of MTR registers */
1070 pci_read_config_word(pvt->branch_0, where, 1065 pci_read_config_word(pvt->branch_0, where,
@@ -1146,7 +1141,7 @@ static int i5400_init_csrows(struct mem_ctl_info *mci)
1146 pvt = mci->pvt_info; 1141 pvt = mci->pvt_info;
1147 1142
1148 channel_count = pvt->maxch; 1143 channel_count = pvt->maxch;
1149 max_csrows = pvt->maxdimmperch * 2; 1144 max_csrows = pvt->maxdimmperch;
1150 1145
1151 empty = 1; /* Assume NO memory */ 1146 empty = 1; /* Assume NO memory */
1152 1147
@@ -1215,28 +1210,6 @@ static void i5400_enable_error_reporting(struct mem_ctl_info *mci)
1215} 1210}
1216 1211
1217/* 1212/*
1218 * i5400_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels)
1219 *
1220 * ask the device how many channels are present and how many CSROWS
1221 * as well
1222 */
1223static void i5400_get_dimm_and_channel_counts(struct pci_dev *pdev,
1224 int *num_dimms_per_channel,
1225 int *num_channels)
1226{
1227 u8 value;
1228
1229 /* Need to retrieve just how many channels and dimms per channel are
1230 * supported on this memory controller
1231 */
1232 pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
1233 *num_dimms_per_channel = (int)value * 2;
1234
1235 pci_read_config_byte(pdev, MAXCH, &value);
1236 *num_channels = (int)value;
1237}
1238
1239/*
1240 * i5400_probe1 Probe for ONE instance of device to see if it is 1213 * i5400_probe1 Probe for ONE instance of device to see if it is
1241 * present. 1214 * present.
1242 * return: 1215 * return:
@@ -1263,22 +1236,16 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
1263 if (PCI_FUNC(pdev->devfn) != 0) 1236 if (PCI_FUNC(pdev->devfn) != 0)
1264 return -ENODEV; 1237 return -ENODEV;
1265 1238
1266 /* Ask the devices for the number of CSROWS and CHANNELS so 1239 /* As we don't have a motherboard identification routine to determine
1267 * that we can calculate the memory resources, etc
1268 *
1269 * The Chipset will report what it can handle which will be greater
1270 * or equal to what the motherboard manufacturer will implement.
1271 *
1272 * As we don't have a motherboard identification routine to determine
1273 * actual number of slots/dimms per channel, we thus utilize the 1240 * actual number of slots/dimms per channel, we thus utilize the
1274 * resource as specified by the chipset. Thus, we might have 1241 * resource as specified by the chipset. Thus, we might have
1275 * have more DIMMs per channel than actually on the mobo, but this 1242 * have more DIMMs per channel than actually on the mobo, but this
1276 * allows the driver to support upto the chipset max, without 1243 * allows the driver to support upto the chipset max, without
1277 * some fancy mobo determination. 1244 * some fancy mobo determination.
1278 */ 1245 */
1279 i5400_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel, 1246 num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL;
1280 &num_channels); 1247 num_channels = MAX_CHANNELS;
1281 num_csrows = num_dimms_per_channel * 2; 1248 num_csrows = num_dimms_per_channel;
1282 1249
1283 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", 1250 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
1284 __func__, num_channels, num_dimms_per_channel, num_csrows); 1251 __func__, num_channels, num_dimms_per_channel, num_csrows);