aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Uhlenkott <juhlenko@akamai.com>2009-09-23 18:57:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-24 10:21:04 -0400
commitdd8ef1db87a486577b3a76e6ad45df52e12d0145 (patch)
tree66d3c2f464743d8f9a1b19c9f709fb4782db0769
parent30a61fff3a2b19506c66ea81fecb6a7747af3d47 (diff)
edac: i3200 memory controller driver
A driver for the Intel 3200 and 3210 memory controllers. It has only had light testing so far, and currently makes no attempt to decode error addresses at anything finer than csrow granularity. Signed-off-by: Jason Uhlenkott <juhlenko@akamai.com> Signed-off-by: Doug Thompson <dougthompson@xmission.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/i3200_edac.c527
3 files changed, 536 insertions, 0 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index b82ad57c1082..02127e59fe8e 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -133,6 +133,13 @@ config EDAC_I3000
133 Support for error detection and correction on the Intel 133 Support for error detection and correction on the Intel
134 3000 and 3010 server chipsets. 134 3000 and 3010 server chipsets.
135 135
136config EDAC_I3200
137 tristate "Intel 3200"
138 depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
139 help
140 Support for error detection and correction on the Intel
141 3200 and 3210 server chipsets.
142
136config EDAC_X38 143config EDAC_X38
137 tristate "Intel X38" 144 tristate "Intel X38"
138 depends on EDAC_MM_EDAC && PCI && X86 145 depends on EDAC_MM_EDAC && PCI && X86
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index cfa033ce53a7..7a473bbe8abd 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
32obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o 32obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o
33obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o 33obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o
34obj-$(CONFIG_EDAC_I3000) += i3000_edac.o 34obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
35obj-$(CONFIG_EDAC_I3200) += i3200_edac.o
35obj-$(CONFIG_EDAC_X38) += x38_edac.o 36obj-$(CONFIG_EDAC_X38) += x38_edac.o
36obj-$(CONFIG_EDAC_I82860) += i82860_edac.o 37obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
37obj-$(CONFIG_EDAC_R82600) += r82600_edac.o 38obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
@@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL) += cell_edac.o
49obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o 50obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
50obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o 51obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
51obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o 52obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
53
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
new file mode 100644
index 000000000000..fde4db91c4d2
--- /dev/null
+++ b/drivers/edac/i3200_edac.c
@@ -0,0 +1,527 @@
1/*
2 * Intel 3200/3210 Memory Controller kernel module
3 * Copyright (C) 2008-2009 Akamai Technologies, Inc.
4 * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/pci.h>
13#include <linux/pci_ids.h>
14#include <linux/slab.h>
15#include <linux/edac.h>
16#include <linux/io.h>
17#include "edac_core.h"
18
19#define I3200_REVISION "1.1"
20
21#define EDAC_MOD_STR "i3200_edac"
22
23#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
24
25#define I3200_RANKS 8
26#define I3200_RANKS_PER_CHANNEL 4
27#define I3200_CHANNELS 2
28
29/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
30
31#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
32#define I3200_MCHBAR_HIGH 0x4c
33#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
34#define I3200_MMR_WINDOW_SIZE 16384
35
36#define I3200_TOM 0xa0 /* Top of Memory (16b)
37 *
38 * 15:10 reserved
39 * 9:0 total populated physical memory
40 */
41#define I3200_TOM_MASK 0x3ff /* bits 9:0 */
42#define I3200_TOM_SHIFT 26 /* 64MiB grain */
43
44#define I3200_ERRSTS 0xc8 /* Error Status Register (16b)
45 *
46 * 15 reserved
47 * 14 Isochronous TBWRR Run Behind FIFO Full
48 * (ITCV)
49 * 13 Isochronous TBWRR Run Behind FIFO Put
50 * (ITSTV)
51 * 12 reserved
52 * 11 MCH Thermal Sensor Event
53 * for SMI/SCI/SERR (GTSE)
54 * 10 reserved
55 * 9 LOCK to non-DRAM Memory Flag (LCKF)
56 * 8 reserved
57 * 7 DRAM Throttle Flag (DTF)
58 * 6:2 reserved
59 * 1 Multi-bit DRAM ECC Error Flag (DMERR)
60 * 0 Single-bit DRAM ECC Error Flag (DSERR)
61 */
62#define I3200_ERRSTS_UE 0x0002
63#define I3200_ERRSTS_CE 0x0001
64#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
65
66
67/* Intel MMIO register space - device 0 function 0 - MMR space */
68
69#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
70 *
71 * 15:10 reserved
72 * 9:0 Channel 0 DRAM Rank Boundary Address
73 */
74#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
75#define I3200_DRB_MASK 0x3ff /* bits 9:0 */
76#define I3200_DRB_SHIFT 26 /* 64MiB grain */
77
78#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
79 *
80 * 63:48 Error Column Address (ERRCOL)
81 * 47:32 Error Row Address (ERRROW)
82 * 31:29 Error Bank Address (ERRBANK)
83 * 28:27 Error Rank Address (ERRRANK)
84 * 26:24 reserved
85 * 23:16 Error Syndrome (ERRSYND)
86 * 15: 2 reserved
87 * 1 Multiple Bit Error Status (MERRSTS)
88 * 0 Correctable Error Status (CERRSTS)
89 */
90#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */
91#define I3200_ECCERRLOG_CE 0x1
92#define I3200_ECCERRLOG_UE 0x2
93#define I3200_ECCERRLOG_RANK_BITS 0x18000000
94#define I3200_ECCERRLOG_RANK_SHIFT 27
95#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000
96#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
97#define I3200_CAPID0 0xe0 /* P.95 of spec for details */
98
99struct i3200_priv {
100 void __iomem *window;
101};
102
103static int nr_channels;
104
105static int how_many_channels(struct pci_dev *pdev)
106{
107 unsigned char capid0_8b; /* 8th byte of CAPID0 */
108
109 pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
110 if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
111 debugf0("In single channel mode.\n");
112 return 1;
113 } else {
114 debugf0("In dual channel mode.\n");
115 return 2;
116 }
117}
118
119static unsigned long eccerrlog_syndrome(u64 log)
120{
121 return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
122 I3200_ECCERRLOG_SYNDROME_SHIFT;
123}
124
125static int eccerrlog_row(int channel, u64 log)
126{
127 u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
128 I3200_ECCERRLOG_RANK_SHIFT);
129 return rank | (channel * I3200_RANKS_PER_CHANNEL);
130}
131
132enum i3200_chips {
133 I3200 = 0,
134};
135
136struct i3200_dev_info {
137 const char *ctl_name;
138};
139
140struct i3200_error_info {
141 u16 errsts;
142 u16 errsts2;
143 u64 eccerrlog[I3200_CHANNELS];
144};
145
146static const struct i3200_dev_info i3200_devs[] = {
147 [I3200] = {
148 .ctl_name = "i3200"
149 },
150};
151
152static struct pci_dev *mci_pdev;
153static int i3200_registered = 1;
154
155
156static void i3200_clear_error_info(struct mem_ctl_info *mci)
157{
158 struct pci_dev *pdev;
159
160 pdev = to_pci_dev(mci->dev);
161
162 /*
163 * Clear any error bits.
164 * (Yes, we really clear bits by writing 1 to them.)
165 */
166 pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
167 I3200_ERRSTS_BITS);
168}
169
170static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
171 struct i3200_error_info *info)
172{
173 struct pci_dev *pdev;
174 struct i3200_priv *priv = mci->pvt_info;
175 void __iomem *window = priv->window;
176
177 pdev = to_pci_dev(mci->dev);
178
179 /*
180 * This is a mess because there is no atomic way to read all the
181 * registers at once and the registers can transition from CE being
182 * overwritten by UE.
183 */
184 pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
185 if (!(info->errsts & I3200_ERRSTS_BITS))
186 return;
187
188 info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
189 if (nr_channels == 2)
190 info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
191
192 pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
193
194 /*
195 * If the error is the same for both reads then the first set
196 * of reads is valid. If there is a change then there is a CE
197 * with no info and the second set of reads is valid and
198 * should be UE info.
199 */
200 if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
201 info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
202 if (nr_channels == 2)
203 info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
204 }
205
206 i3200_clear_error_info(mci);
207}
208
209static void i3200_process_error_info(struct mem_ctl_info *mci,
210 struct i3200_error_info *info)
211{
212 int channel;
213 u64 log;
214
215 if (!(info->errsts & I3200_ERRSTS_BITS))
216 return;
217
218 if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
219 edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
220 info->errsts = info->errsts2;
221 }
222
223 for (channel = 0; channel < nr_channels; channel++) {
224 log = info->eccerrlog[channel];
225 if (log & I3200_ECCERRLOG_UE) {
226 edac_mc_handle_ue(mci, 0, 0,
227 eccerrlog_row(channel, log),
228 "i3200 UE");
229 } else if (log & I3200_ECCERRLOG_CE) {
230 edac_mc_handle_ce(mci, 0, 0,
231 eccerrlog_syndrome(log),
232 eccerrlog_row(channel, log), 0,
233 "i3200 CE");
234 }
235 }
236}
237
238static void i3200_check(struct mem_ctl_info *mci)
239{
240 struct i3200_error_info info;
241
242 debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
243 i3200_get_and_clear_error_info(mci, &info);
244 i3200_process_error_info(mci, &info);
245}
246
247
248void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
249{
250 union {
251 u64 mchbar;
252 struct {
253 u32 mchbar_low;
254 u32 mchbar_high;
255 };
256 } u;
257 void __iomem *window;
258
259 pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
260 pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
261 u.mchbar &= I3200_MCHBAR_MASK;
262
263 if (u.mchbar != (resource_size_t)u.mchbar) {
264 printk(KERN_ERR
265 "i3200: mmio space beyond accessible range (0x%llx)\n",
266 (unsigned long long)u.mchbar);
267 return NULL;
268 }
269
270 window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
271 if (!window)
272 printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
273 (unsigned long long)u.mchbar);
274
275 return window;
276}
277
278
279static void i3200_get_drbs(void __iomem *window,
280 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
281{
282 int i;
283
284 for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
285 drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
286 drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
287 }
288}
289
290static bool i3200_is_stacked(struct pci_dev *pdev,
291 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
292{
293 u16 tom;
294
295 pci_read_config_word(pdev, I3200_TOM, &tom);
296 tom &= I3200_TOM_MASK;
297
298 return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
299}
300
301static unsigned long drb_to_nr_pages(
302 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
303 int channel, int rank)
304{
305 int n;
306
307 n = drbs[channel][rank];
308 if (rank > 0)
309 n -= drbs[channel][rank - 1];
310 if (stacked && (channel == 1) &&
311 drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
312 n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
313
314 n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
315 return n;
316}
317
318static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
319{
320 int rc;
321 int i;
322 struct mem_ctl_info *mci = NULL;
323 unsigned long last_page;
324 u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
325 bool stacked;
326 void __iomem *window;
327 struct i3200_priv *priv;
328
329 debugf0("MC: %s()\n", __func__);
330
331 window = i3200_map_mchbar(pdev);
332 if (!window)
333 return -ENODEV;
334
335 i3200_get_drbs(window, drbs);
336 nr_channels = how_many_channels(pdev);
337
338 mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
339 nr_channels, 0);
340 if (!mci)
341 return -ENOMEM;
342
343 debugf3("MC: %s(): init mci\n", __func__);
344
345 mci->dev = &pdev->dev;
346 mci->mtype_cap = MEM_FLAG_DDR2;
347
348 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
349 mci->edac_cap = EDAC_FLAG_SECDED;
350
351 mci->mod_name = EDAC_MOD_STR;
352 mci->mod_ver = I3200_REVISION;
353 mci->ctl_name = i3200_devs[dev_idx].ctl_name;
354 mci->dev_name = pci_name(pdev);
355 mci->edac_check = i3200_check;
356 mci->ctl_page_to_phys = NULL;
357 priv = mci->pvt_info;
358 priv->window = window;
359
360 stacked = i3200_is_stacked(pdev, drbs);
361
362 /*
363 * The dram rank boundary (DRB) reg values are boundary addresses
364 * for each DRAM rank with a granularity of 64MB. DRB regs are
365 * cumulative; the last one will contain the total memory
366 * contained in all ranks.
367 */
368 last_page = -1UL;
369 for (i = 0; i < mci->nr_csrows; i++) {
370 unsigned long nr_pages;
371 struct csrow_info *csrow = &mci->csrows[i];
372
373 nr_pages = drb_to_nr_pages(drbs, stacked,
374 i / I3200_RANKS_PER_CHANNEL,
375 i % I3200_RANKS_PER_CHANNEL);
376
377 if (nr_pages == 0) {
378 csrow->mtype = MEM_EMPTY;
379 continue;
380 }
381
382 csrow->first_page = last_page + 1;
383 last_page += nr_pages;
384 csrow->last_page = last_page;
385 csrow->nr_pages = nr_pages;
386
387 csrow->grain = nr_pages << PAGE_SHIFT;
388 csrow->mtype = MEM_DDR2;
389 csrow->dtype = DEV_UNKNOWN;
390 csrow->edac_mode = EDAC_UNKNOWN;
391 }
392
393 i3200_clear_error_info(mci);
394
395 rc = -ENODEV;
396 if (edac_mc_add_mc(mci)) {
397 debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
398 goto fail;
399 }
400
401 /* get this far and it's successful */
402 debugf3("MC: %s(): success\n", __func__);
403 return 0;
404
405fail:
406 iounmap(window);
407 if (mci)
408 edac_mc_free(mci);
409
410 return rc;
411}
412
413static int __devinit i3200_init_one(struct pci_dev *pdev,
414 const struct pci_device_id *ent)
415{
416 int rc;
417
418 debugf0("MC: %s()\n", __func__);
419
420 if (pci_enable_device(pdev) < 0)
421 return -EIO;
422
423 rc = i3200_probe1(pdev, ent->driver_data);
424 if (!mci_pdev)
425 mci_pdev = pci_dev_get(pdev);
426
427 return rc;
428}
429
430static void __devexit i3200_remove_one(struct pci_dev *pdev)
431{
432 struct mem_ctl_info *mci;
433 struct i3200_priv *priv;
434
435 debugf0("%s()\n", __func__);
436
437 mci = edac_mc_del_mc(&pdev->dev);
438 if (!mci)
439 return;
440
441 priv = mci->pvt_info;
442 iounmap(priv->window);
443
444 edac_mc_free(mci);
445}
446
447static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
448 {
449 PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
450 I3200},
451 {
452 0,
453 } /* 0 terminated list. */
454};
455
456MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
457
458static struct pci_driver i3200_driver = {
459 .name = EDAC_MOD_STR,
460 .probe = i3200_init_one,
461 .remove = __devexit_p(i3200_remove_one),
462 .id_table = i3200_pci_tbl,
463};
464
465static int __init i3200_init(void)
466{
467 int pci_rc;
468
469 debugf3("MC: %s()\n", __func__);
470
471 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
472 opstate_init();
473
474 pci_rc = pci_register_driver(&i3200_driver);
475 if (pci_rc < 0)
476 goto fail0;
477
478 if (!mci_pdev) {
479 i3200_registered = 0;
480 mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
481 PCI_DEVICE_ID_INTEL_3200_HB, NULL);
482 if (!mci_pdev) {
483 debugf0("i3200 pci_get_device fail\n");
484 pci_rc = -ENODEV;
485 goto fail1;
486 }
487
488 pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
489 if (pci_rc < 0) {
490 debugf0("i3200 init fail\n");
491 pci_rc = -ENODEV;
492 goto fail1;
493 }
494 }
495
496 return 0;
497
498fail1:
499 pci_unregister_driver(&i3200_driver);
500
501fail0:
502 if (mci_pdev)
503 pci_dev_put(mci_pdev);
504
505 return pci_rc;
506}
507
508static void __exit i3200_exit(void)
509{
510 debugf3("MC: %s()\n", __func__);
511
512 pci_unregister_driver(&i3200_driver);
513 if (!i3200_registered) {
514 i3200_remove_one(mci_pdev);
515 pci_dev_put(mci_pdev);
516 }
517}
518
519module_init(i3200_init);
520module_exit(i3200_exit);
521
522MODULE_LICENSE("GPL");
523MODULE_AUTHOR("Akamai Technologies, Inc.");
524MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
525
526module_param(edac_op_state, int, 0444);
527MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");