aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorHitoshi Mitake <mitake@clustcom.com>2008-10-29 17:00:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-30 14:38:45 -0400
commitdf8bc08c192f00f155185bfd6f052d46a728814a (patch)
tree613b51ea3e9c7b16079d70964c101c2d229b3415 /drivers/edac
parentc17dad6905fc82d8f523399e5c3f014e81d61df6 (diff)
edac x38: new MC driver module
I wrote a new module for Intel X38 chipset. This chipset is very similar to Intel 3200 chipset, but there are some different points, so I copyed i3200_edac.c and modified. This is Intel's web page describing this chipset. http://www.intel.com/Products/Desktop/Chipsets/X38/X38-overview.htm I've tested this new module with broken memory, and it seems to be working well. Signed-off-by: Hitoshi Mitake <mitake@clustcom.com> Signed-off-by: Doug Thompson <dougthompson@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/x38_edac.c524
3 files changed, 532 insertions, 0 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 5a11e3cbcae2..e0dbd388757f 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -102,6 +102,13 @@ config EDAC_I3000
102 Support for error detection and correction on the Intel 102 Support for error detection and correction on the Intel
103 3000 and 3010 server chipsets. 103 3000 and 3010 server chipsets.
104 104
105config EDAC_X38
106 tristate "Intel X38"
107 depends on EDAC_MM_EDAC && PCI && X86
108 help
109 Support for error detection and correction on the Intel
110 X38 server chipsets.
111
105config EDAC_I82860 112config EDAC_I82860
106 tristate "Intel 82860" 113 tristate "Intel 82860"
107 depends on EDAC_MM_EDAC && PCI && X86_32 114 depends on EDAC_MM_EDAC && PCI && X86_32
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index e5e9104b5520..62c2d9bad8dc 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
26obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o 26obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o
27obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o 27obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o
28obj-$(CONFIG_EDAC_I3000) += i3000_edac.o 28obj-$(CONFIG_EDAC_I3000) += i3000_edac.o
29obj-$(CONFIG_EDAC_X38) += x38_edac.o
29obj-$(CONFIG_EDAC_I82860) += i82860_edac.o 30obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
30obj-$(CONFIG_EDAC_R82600) += r82600_edac.o 31obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
31obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o 32obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
new file mode 100644
index 000000000000..2406c2ce2844
--- /dev/null
+++ b/drivers/edac/x38_edac.c
@@ -0,0 +1,524 @@
1/*
2 * Intel X38 Memory Controller kernel module
3 * Copyright (C) 2008 Cluster Computing, Inc.
4 *
5 * This file may be distributed under the terms of the
6 * GNU General Public License.
7 *
8 * This file is based on i3200_edac.c
9 *
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/pci.h>
15#include <linux/pci_ids.h>
16#include <linux/slab.h>
17#include <linux/edac.h>
18#include "edac_core.h"
19
20#define X38_REVISION "1.1"
21
22#define EDAC_MOD_STR "x38_edac"
23
24#define PCI_DEVICE_ID_INTEL_X38_HB 0x29e0
25
26#define X38_RANKS 8
27#define X38_RANKS_PER_CHANNEL 4
28#define X38_CHANNELS 2
29
30/* Intel X38 register addresses - device 0 function 0 - DRAM Controller */
31
32#define X38_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */
33#define X38_MCHBAR_HIGH 0x4b
34#define X38_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */
35#define X38_MMR_WINDOW_SIZE 16384
36
37#define X38_TOM 0xa0 /* Top of Memory (16b)
38 *
39 * 15:10 reserved
40 * 9:0 total populated physical memory
41 */
42#define X38_TOM_MASK 0x3ff /* bits 9:0 */
43#define X38_TOM_SHIFT 26 /* 64MiB grain */
44
45#define X38_ERRSTS 0xc8 /* Error Status Register (16b)
46 *
47 * 15 reserved
48 * 14 Isochronous TBWRR Run Behind FIFO Full
49 * (ITCV)
50 * 13 Isochronous TBWRR Run Behind FIFO Put
51 * (ITSTV)
52 * 12 reserved
53 * 11 MCH Thermal Sensor Event
54 * for SMI/SCI/SERR (GTSE)
55 * 10 reserved
56 * 9 LOCK to non-DRAM Memory Flag (LCKF)
57 * 8 reserved
58 * 7 DRAM Throttle Flag (DTF)
59 * 6:2 reserved
60 * 1 Multi-bit DRAM ECC Error Flag (DMERR)
61 * 0 Single-bit DRAM ECC Error Flag (DSERR)
62 */
63#define X38_ERRSTS_UE 0x0002
64#define X38_ERRSTS_CE 0x0001
65#define X38_ERRSTS_BITS (X38_ERRSTS_UE | X38_ERRSTS_CE)
66
67
68/* Intel MMIO register space - device 0 function 0 - MMR space */
69
70#define X38_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4)
71 *
72 * 15:10 reserved
73 * 9:0 Channel 0 DRAM Rank Boundary Address
74 */
75#define X38_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */
76#define X38_DRB_MASK 0x3ff /* bits 9:0 */
77#define X38_DRB_SHIFT 26 /* 64MiB grain */
78
79#define X38_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b)
80 *
81 * 63:48 Error Column Address (ERRCOL)
82 * 47:32 Error Row Address (ERRROW)
83 * 31:29 Error Bank Address (ERRBANK)
84 * 28:27 Error Rank Address (ERRRANK)
85 * 26:24 reserved
86 * 23:16 Error Syndrome (ERRSYND)
87 * 15: 2 reserved
88 * 1 Multiple Bit Error Status (MERRSTS)
89 * 0 Correctable Error Status (CERRSTS)
90 */
91#define X38_C1ECCERRLOG 0x680 /* Channel 1 ECC Error Log (64b) */
92#define X38_ECCERRLOG_CE 0x1
93#define X38_ECCERRLOG_UE 0x2
94#define X38_ECCERRLOG_RANK_BITS 0x18000000
95#define X38_ECCERRLOG_SYNDROME_BITS 0xff0000
96
97#define X38_CAPID0 0xe0 /* see P.94 of spec for details */
98
99static int x38_channel_num;
100
101static int how_many_channel(struct pci_dev *pdev)
102{
103 unsigned char capid0_8b; /* 8th byte of CAPID0 */
104
105 pci_read_config_byte(pdev, X38_CAPID0 + 8, &capid0_8b);
106 if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
107 debugf0("In single channel mode.\n");
108 x38_channel_num = 1;
109 } else {
110 debugf0("In dual channel mode.\n");
111 x38_channel_num = 2;
112 }
113
114 return x38_channel_num;
115}
116
117static unsigned long eccerrlog_syndrome(u64 log)
118{
119 return (log & X38_ECCERRLOG_SYNDROME_BITS) >> 16;
120}
121
122static int eccerrlog_row(int channel, u64 log)
123{
124 return ((log & X38_ECCERRLOG_RANK_BITS) >> 27) |
125 (channel * X38_RANKS_PER_CHANNEL);
126}
127
128enum x38_chips {
129 X38 = 0,
130};
131
132struct x38_dev_info {
133 const char *ctl_name;
134};
135
136struct x38_error_info {
137 u16 errsts;
138 u16 errsts2;
139 u64 eccerrlog[X38_CHANNELS];
140};
141
142static const struct x38_dev_info x38_devs[] = {
143 [X38] = {
144 .ctl_name = "x38"},
145};
146
147static struct pci_dev *mci_pdev;
148static int x38_registered = 1;
149
150
151static void x38_clear_error_info(struct mem_ctl_info *mci)
152{
153 struct pci_dev *pdev;
154
155 pdev = to_pci_dev(mci->dev);
156
157 /*
158 * Clear any error bits.
159 * (Yes, we really clear bits by writing 1 to them.)
160 */
161 pci_write_bits16(pdev, X38_ERRSTS, X38_ERRSTS_BITS,
162 X38_ERRSTS_BITS);
163}
164
165static u64 x38_readq(const void __iomem *addr)
166{
167 return readl(addr) | (((u64)readl(addr + 4)) << 32);
168}
169
170static void x38_get_and_clear_error_info(struct mem_ctl_info *mci,
171 struct x38_error_info *info)
172{
173 struct pci_dev *pdev;
174 void __iomem *window = mci->pvt_info;
175
176 pdev = to_pci_dev(mci->dev);
177
178 /*
179 * This is a mess because there is no atomic way to read all the
180 * registers at once and the registers can transition from CE being
181 * overwritten by UE.
182 */
183 pci_read_config_word(pdev, X38_ERRSTS, &info->errsts);
184 if (!(info->errsts & X38_ERRSTS_BITS))
185 return;
186
187 info->eccerrlog[0] = x38_readq(window + X38_C0ECCERRLOG);
188 if (x38_channel_num == 2)
189 info->eccerrlog[1] = x38_readq(window + X38_C1ECCERRLOG);
190
191 pci_read_config_word(pdev, X38_ERRSTS, &info->errsts2);
192
193 /*
194 * If the error is the same for both reads then the first set
195 * of reads is valid. If there is a change then there is a CE
196 * with no info and the second set of reads is valid and
197 * should be UE info.
198 */
199 if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) {
200 info->eccerrlog[0] = x38_readq(window + X38_C0ECCERRLOG);
201 if (x38_channel_num == 2)
202 info->eccerrlog[1] =
203 x38_readq(window + X38_C1ECCERRLOG);
204 }
205
206 x38_clear_error_info(mci);
207}
208
209static void x38_process_error_info(struct mem_ctl_info *mci,
210 struct x38_error_info *info)
211{
212 int channel;
213 u64 log;
214
215 if (!(info->errsts & X38_ERRSTS_BITS))
216 return;
217
218 if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) {
219 edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
220 info->errsts = info->errsts2;
221 }
222
223 for (channel = 0; channel < x38_channel_num; channel++) {
224 log = info->eccerrlog[channel];
225 if (log & X38_ECCERRLOG_UE) {
226 edac_mc_handle_ue(mci, 0, 0,
227 eccerrlog_row(channel, log), "x38 UE");
228 } else if (log & X38_ECCERRLOG_CE) {
229 edac_mc_handle_ce(mci, 0, 0,
230 eccerrlog_syndrome(log),
231 eccerrlog_row(channel, log), 0, "x38 CE");
232 }
233 }
234}
235
236static void x38_check(struct mem_ctl_info *mci)
237{
238 struct x38_error_info info;
239
240 debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
241 x38_get_and_clear_error_info(mci, &info);
242 x38_process_error_info(mci, &info);
243}
244
245
246void __iomem *x38_map_mchbar(struct pci_dev *pdev)
247{
248 union {
249 u64 mchbar;
250 struct {
251 u32 mchbar_low;
252 u32 mchbar_high;
253 };
254 } u;
255 void __iomem *window;
256
257 pci_read_config_dword(pdev, X38_MCHBAR_LOW, &u.mchbar_low);
258 pci_write_config_dword(pdev, X38_MCHBAR_LOW, u.mchbar_low | 0x1);
259 pci_read_config_dword(pdev, X38_MCHBAR_HIGH, &u.mchbar_high);
260 u.mchbar &= X38_MCHBAR_MASK;
261
262 if (u.mchbar != (resource_size_t)u.mchbar) {
263 printk(KERN_ERR
264 "x38: mmio space beyond accessible range (0x%llx)\n",
265 (unsigned long long)u.mchbar);
266 return NULL;
267 }
268
269 window = ioremap_nocache(u.mchbar, X38_MMR_WINDOW_SIZE);
270 if (!window)
271 printk(KERN_ERR "x38: cannot map mmio space at 0x%llx\n",
272 (unsigned long long)u.mchbar);
273
274 return window;
275}
276
277
278static void x38_get_drbs(void __iomem *window,
279 u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL])
280{
281 int i;
282
283 for (i = 0; i < X38_RANKS_PER_CHANNEL; i++) {
284 drbs[0][i] = readw(window + X38_C0DRB + 2*i) & X38_DRB_MASK;
285 drbs[1][i] = readw(window + X38_C1DRB + 2*i) & X38_DRB_MASK;
286 }
287}
288
289static bool x38_is_stacked(struct pci_dev *pdev,
290 u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL])
291{
292 u16 tom;
293
294 pci_read_config_word(pdev, X38_TOM, &tom);
295 tom &= X38_TOM_MASK;
296
297 return drbs[X38_CHANNELS - 1][X38_RANKS_PER_CHANNEL - 1] == tom;
298}
299
300static unsigned long drb_to_nr_pages(
301 u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL],
302 bool stacked, int channel, int rank)
303{
304 int n;
305
306 n = drbs[channel][rank];
307 if (rank > 0)
308 n -= drbs[channel][rank - 1];
309 if (stacked && (channel == 1) && drbs[channel][rank] ==
310 drbs[channel][X38_RANKS_PER_CHANNEL - 1]) {
311 n -= drbs[0][X38_RANKS_PER_CHANNEL - 1];
312 }
313
314 n <<= (X38_DRB_SHIFT - PAGE_SHIFT);
315 return n;
316}
317
318static int x38_probe1(struct pci_dev *pdev, int dev_idx)
319{
320 int rc;
321 int i;
322 struct mem_ctl_info *mci = NULL;
323 unsigned long last_page;
324 u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL];
325 bool stacked;
326 void __iomem *window;
327
328 debugf0("MC: %s()\n", __func__);
329
330 window = x38_map_mchbar(pdev);
331 if (!window)
332 return -ENODEV;
333
334 x38_get_drbs(window, drbs);
335
336 how_many_channel(pdev);
337
338 /* FIXME: unconventional pvt_info usage */
339 mci = edac_mc_alloc(0, X38_RANKS, x38_channel_num, 0);
340 if (!mci)
341 return -ENOMEM;
342
343 debugf3("MC: %s(): init mci\n", __func__);
344
345 mci->dev = &pdev->dev;
346 mci->mtype_cap = MEM_FLAG_DDR2;
347
348 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
349 mci->edac_cap = EDAC_FLAG_SECDED;
350
351 mci->mod_name = EDAC_MOD_STR;
352 mci->mod_ver = X38_REVISION;
353 mci->ctl_name = x38_devs[dev_idx].ctl_name;
354 mci->dev_name = pci_name(pdev);
355 mci->edac_check = x38_check;
356 mci->ctl_page_to_phys = NULL;
357 mci->pvt_info = window;
358
359 stacked = x38_is_stacked(pdev, drbs);
360
361 /*
362 * The dram rank boundary (DRB) reg values are boundary addresses
363 * for each DRAM rank with a granularity of 64MB. DRB regs are
364 * cumulative; the last one will contain the total memory
365 * contained in all ranks.
366 */
367 last_page = -1UL;
368 for (i = 0; i < mci->nr_csrows; i++) {
369 unsigned long nr_pages;
370 struct csrow_info *csrow = &mci->csrows[i];
371
372 nr_pages = drb_to_nr_pages(drbs, stacked,
373 i / X38_RANKS_PER_CHANNEL,
374 i % X38_RANKS_PER_CHANNEL);
375
376 if (nr_pages == 0) {
377 csrow->mtype = MEM_EMPTY;
378 continue;
379 }
380
381 csrow->first_page = last_page + 1;
382 last_page += nr_pages;
383 csrow->last_page = last_page;
384 csrow->nr_pages = nr_pages;
385
386 csrow->grain = nr_pages << PAGE_SHIFT;
387 csrow->mtype = MEM_DDR2;
388 csrow->dtype = DEV_UNKNOWN;
389 csrow->edac_mode = EDAC_UNKNOWN;
390 }
391
392 x38_clear_error_info(mci);
393
394 rc = -ENODEV;
395 if (edac_mc_add_mc(mci)) {
396 debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
397 goto fail;
398 }
399
400 /* get this far and it's successful */
401 debugf3("MC: %s(): success\n", __func__);
402 return 0;
403
404fail:
405 iounmap(window);
406 if (mci)
407 edac_mc_free(mci);
408
409 return rc;
410}
411
412static int __devinit x38_init_one(struct pci_dev *pdev,
413 const struct pci_device_id *ent)
414{
415 int rc;
416
417 debugf0("MC: %s()\n", __func__);
418
419 if (pci_enable_device(pdev) < 0)
420 return -EIO;
421
422 rc = x38_probe1(pdev, ent->driver_data);
423 if (!mci_pdev)
424 mci_pdev = pci_dev_get(pdev);
425
426 return rc;
427}
428
429static void __devexit x38_remove_one(struct pci_dev *pdev)
430{
431 struct mem_ctl_info *mci;
432
433 debugf0("%s()\n", __func__);
434
435 mci = edac_mc_del_mc(&pdev->dev);
436 if (!mci)
437 return;
438
439 iounmap(mci->pvt_info);
440
441 edac_mc_free(mci);
442}
443
444static const struct pci_device_id x38_pci_tbl[] __devinitdata = {
445 {
446 PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
447 X38},
448 {
449 0,
450 } /* 0 terminated list. */
451};
452
453MODULE_DEVICE_TABLE(pci, x38_pci_tbl);
454
455static struct pci_driver x38_driver = {
456 .name = EDAC_MOD_STR,
457 .probe = x38_init_one,
458 .remove = __devexit_p(x38_remove_one),
459 .id_table = x38_pci_tbl,
460};
461
462static int __init x38_init(void)
463{
464 int pci_rc;
465
466 debugf3("MC: %s()\n", __func__);
467
468 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
469 opstate_init();
470
471 pci_rc = pci_register_driver(&x38_driver);
472 if (pci_rc < 0)
473 goto fail0;
474
475 if (!mci_pdev) {
476 x38_registered = 0;
477 mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
478 PCI_DEVICE_ID_INTEL_X38_HB, NULL);
479 if (!mci_pdev) {
480 debugf0("x38 pci_get_device fail\n");
481 pci_rc = -ENODEV;
482 goto fail1;
483 }
484
485 pci_rc = x38_init_one(mci_pdev, x38_pci_tbl);
486 if (pci_rc < 0) {
487 debugf0("x38 init fail\n");
488 pci_rc = -ENODEV;
489 goto fail1;
490 }
491 }
492
493 return 0;
494
495fail1:
496 pci_unregister_driver(&x38_driver);
497
498fail0:
499 if (mci_pdev)
500 pci_dev_put(mci_pdev);
501
502 return pci_rc;
503}
504
505static void __exit x38_exit(void)
506{
507 debugf3("MC: %s()\n", __func__);
508
509 pci_unregister_driver(&x38_driver);
510 if (!x38_registered) {
511 x38_remove_one(mci_pdev);
512 pci_dev_put(mci_pdev);
513 }
514}
515
516module_init(x38_init);
517module_exit(x38_exit);
518
519MODULE_LICENSE("GPL");
520MODULE_AUTHOR("Cluster Computing, Inc. Hitoshi Mitake");
521MODULE_DESCRIPTION("MC support for Intel X38 memory hub controllers");
522
523module_param(edac_op_state, int, 0444);
524MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");