diff options
Diffstat (limited to 'drivers/edac/x38_edac.c')
-rw-r--r-- | drivers/edac/x38_edac.c | 524 |
1 files changed, 524 insertions, 0 deletions
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c new file mode 100644 index 000000000000..2406c2ce2844 --- /dev/null +++ b/drivers/edac/x38_edac.c | |||
@@ -0,0 +1,524 @@ | |||
1 | /* | ||
2 | * Intel X38 Memory Controller kernel module | ||
3 | * Copyright (C) 2008 Cluster Computing, Inc. | ||
4 | * | ||
5 | * This file may be distributed under the terms of the | ||
6 | * GNU General Public License. | ||
7 | * | ||
8 | * This file is based on i3200_edac.c | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/pci.h> | ||
15 | #include <linux/pci_ids.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/edac.h> | ||
18 | #include "edac_core.h" | ||
19 | |||
20 | #define X38_REVISION "1.1" | ||
21 | |||
22 | #define EDAC_MOD_STR "x38_edac" | ||
23 | |||
24 | #define PCI_DEVICE_ID_INTEL_X38_HB 0x29e0 | ||
25 | |||
26 | #define X38_RANKS 8 | ||
27 | #define X38_RANKS_PER_CHANNEL 4 | ||
28 | #define X38_CHANNELS 2 | ||
29 | |||
30 | /* Intel X38 register addresses - device 0 function 0 - DRAM Controller */ | ||
31 | |||
32 | #define X38_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */ | ||
33 | #define X38_MCHBAR_HIGH 0x4b | ||
34 | #define X38_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */ | ||
35 | #define X38_MMR_WINDOW_SIZE 16384 | ||
36 | |||
37 | #define X38_TOM 0xa0 /* Top of Memory (16b) | ||
38 | * | ||
39 | * 15:10 reserved | ||
40 | * 9:0 total populated physical memory | ||
41 | */ | ||
42 | #define X38_TOM_MASK 0x3ff /* bits 9:0 */ | ||
43 | #define X38_TOM_SHIFT 26 /* 64MiB grain */ | ||
44 | |||
45 | #define X38_ERRSTS 0xc8 /* Error Status Register (16b) | ||
46 | * | ||
47 | * 15 reserved | ||
48 | * 14 Isochronous TBWRR Run Behind FIFO Full | ||
49 | * (ITCV) | ||
50 | * 13 Isochronous TBWRR Run Behind FIFO Put | ||
51 | * (ITSTV) | ||
52 | * 12 reserved | ||
53 | * 11 MCH Thermal Sensor Event | ||
54 | * for SMI/SCI/SERR (GTSE) | ||
55 | * 10 reserved | ||
56 | * 9 LOCK to non-DRAM Memory Flag (LCKF) | ||
57 | * 8 reserved | ||
58 | * 7 DRAM Throttle Flag (DTF) | ||
59 | * 6:2 reserved | ||
60 | * 1 Multi-bit DRAM ECC Error Flag (DMERR) | ||
61 | * 0 Single-bit DRAM ECC Error Flag (DSERR) | ||
62 | */ | ||
63 | #define X38_ERRSTS_UE 0x0002 | ||
64 | #define X38_ERRSTS_CE 0x0001 | ||
65 | #define X38_ERRSTS_BITS (X38_ERRSTS_UE | X38_ERRSTS_CE) | ||
66 | |||
67 | |||
68 | /* Intel MMIO register space - device 0 function 0 - MMR space */ | ||
69 | |||
70 | #define X38_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4) | ||
71 | * | ||
72 | * 15:10 reserved | ||
73 | * 9:0 Channel 0 DRAM Rank Boundary Address | ||
74 | */ | ||
75 | #define X38_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */ | ||
76 | #define X38_DRB_MASK 0x3ff /* bits 9:0 */ | ||
77 | #define X38_DRB_SHIFT 26 /* 64MiB grain */ | ||
78 | |||
79 | #define X38_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b) | ||
80 | * | ||
81 | * 63:48 Error Column Address (ERRCOL) | ||
82 | * 47:32 Error Row Address (ERRROW) | ||
83 | * 31:29 Error Bank Address (ERRBANK) | ||
84 | * 28:27 Error Rank Address (ERRRANK) | ||
85 | * 26:24 reserved | ||
86 | * 23:16 Error Syndrome (ERRSYND) | ||
87 | * 15: 2 reserved | ||
88 | * 1 Multiple Bit Error Status (MERRSTS) | ||
89 | * 0 Correctable Error Status (CERRSTS) | ||
90 | */ | ||
91 | #define X38_C1ECCERRLOG 0x680 /* Channel 1 ECC Error Log (64b) */ | ||
92 | #define X38_ECCERRLOG_CE 0x1 | ||
93 | #define X38_ECCERRLOG_UE 0x2 | ||
94 | #define X38_ECCERRLOG_RANK_BITS 0x18000000 | ||
95 | #define X38_ECCERRLOG_SYNDROME_BITS 0xff0000 | ||
96 | |||
97 | #define X38_CAPID0 0xe0 /* see P.94 of spec for details */ | ||
98 | |||
99 | static int x38_channel_num; | ||
100 | |||
101 | static int how_many_channel(struct pci_dev *pdev) | ||
102 | { | ||
103 | unsigned char capid0_8b; /* 8th byte of CAPID0 */ | ||
104 | |||
105 | pci_read_config_byte(pdev, X38_CAPID0 + 8, &capid0_8b); | ||
106 | if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ | ||
107 | debugf0("In single channel mode.\n"); | ||
108 | x38_channel_num = 1; | ||
109 | } else { | ||
110 | debugf0("In dual channel mode.\n"); | ||
111 | x38_channel_num = 2; | ||
112 | } | ||
113 | |||
114 | return x38_channel_num; | ||
115 | } | ||
116 | |||
117 | static unsigned long eccerrlog_syndrome(u64 log) | ||
118 | { | ||
119 | return (log & X38_ECCERRLOG_SYNDROME_BITS) >> 16; | ||
120 | } | ||
121 | |||
122 | static int eccerrlog_row(int channel, u64 log) | ||
123 | { | ||
124 | return ((log & X38_ECCERRLOG_RANK_BITS) >> 27) | | ||
125 | (channel * X38_RANKS_PER_CHANNEL); | ||
126 | } | ||
127 | |||
128 | enum x38_chips { | ||
129 | X38 = 0, | ||
130 | }; | ||
131 | |||
132 | struct x38_dev_info { | ||
133 | const char *ctl_name; | ||
134 | }; | ||
135 | |||
136 | struct x38_error_info { | ||
137 | u16 errsts; | ||
138 | u16 errsts2; | ||
139 | u64 eccerrlog[X38_CHANNELS]; | ||
140 | }; | ||
141 | |||
142 | static const struct x38_dev_info x38_devs[] = { | ||
143 | [X38] = { | ||
144 | .ctl_name = "x38"}, | ||
145 | }; | ||
146 | |||
147 | static struct pci_dev *mci_pdev; | ||
148 | static int x38_registered = 1; | ||
149 | |||
150 | |||
151 | static void x38_clear_error_info(struct mem_ctl_info *mci) | ||
152 | { | ||
153 | struct pci_dev *pdev; | ||
154 | |||
155 | pdev = to_pci_dev(mci->dev); | ||
156 | |||
157 | /* | ||
158 | * Clear any error bits. | ||
159 | * (Yes, we really clear bits by writing 1 to them.) | ||
160 | */ | ||
161 | pci_write_bits16(pdev, X38_ERRSTS, X38_ERRSTS_BITS, | ||
162 | X38_ERRSTS_BITS); | ||
163 | } | ||
164 | |||
165 | static u64 x38_readq(const void __iomem *addr) | ||
166 | { | ||
167 | return readl(addr) | (((u64)readl(addr + 4)) << 32); | ||
168 | } | ||
169 | |||
170 | static void x38_get_and_clear_error_info(struct mem_ctl_info *mci, | ||
171 | struct x38_error_info *info) | ||
172 | { | ||
173 | struct pci_dev *pdev; | ||
174 | void __iomem *window = mci->pvt_info; | ||
175 | |||
176 | pdev = to_pci_dev(mci->dev); | ||
177 | |||
178 | /* | ||
179 | * This is a mess because there is no atomic way to read all the | ||
180 | * registers at once and the registers can transition from CE being | ||
181 | * overwritten by UE. | ||
182 | */ | ||
183 | pci_read_config_word(pdev, X38_ERRSTS, &info->errsts); | ||
184 | if (!(info->errsts & X38_ERRSTS_BITS)) | ||
185 | return; | ||
186 | |||
187 | info->eccerrlog[0] = x38_readq(window + X38_C0ECCERRLOG); | ||
188 | if (x38_channel_num == 2) | ||
189 | info->eccerrlog[1] = x38_readq(window + X38_C1ECCERRLOG); | ||
190 | |||
191 | pci_read_config_word(pdev, X38_ERRSTS, &info->errsts2); | ||
192 | |||
193 | /* | ||
194 | * If the error is the same for both reads then the first set | ||
195 | * of reads is valid. If there is a change then there is a CE | ||
196 | * with no info and the second set of reads is valid and | ||
197 | * should be UE info. | ||
198 | */ | ||
199 | if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) { | ||
200 | info->eccerrlog[0] = x38_readq(window + X38_C0ECCERRLOG); | ||
201 | if (x38_channel_num == 2) | ||
202 | info->eccerrlog[1] = | ||
203 | x38_readq(window + X38_C1ECCERRLOG); | ||
204 | } | ||
205 | |||
206 | x38_clear_error_info(mci); | ||
207 | } | ||
208 | |||
209 | static void x38_process_error_info(struct mem_ctl_info *mci, | ||
210 | struct x38_error_info *info) | ||
211 | { | ||
212 | int channel; | ||
213 | u64 log; | ||
214 | |||
215 | if (!(info->errsts & X38_ERRSTS_BITS)) | ||
216 | return; | ||
217 | |||
218 | if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) { | ||
219 | edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); | ||
220 | info->errsts = info->errsts2; | ||
221 | } | ||
222 | |||
223 | for (channel = 0; channel < x38_channel_num; channel++) { | ||
224 | log = info->eccerrlog[channel]; | ||
225 | if (log & X38_ECCERRLOG_UE) { | ||
226 | edac_mc_handle_ue(mci, 0, 0, | ||
227 | eccerrlog_row(channel, log), "x38 UE"); | ||
228 | } else if (log & X38_ECCERRLOG_CE) { | ||
229 | edac_mc_handle_ce(mci, 0, 0, | ||
230 | eccerrlog_syndrome(log), | ||
231 | eccerrlog_row(channel, log), 0, "x38 CE"); | ||
232 | } | ||
233 | } | ||
234 | } | ||
235 | |||
236 | static void x38_check(struct mem_ctl_info *mci) | ||
237 | { | ||
238 | struct x38_error_info info; | ||
239 | |||
240 | debugf1("MC%d: %s()\n", mci->mc_idx, __func__); | ||
241 | x38_get_and_clear_error_info(mci, &info); | ||
242 | x38_process_error_info(mci, &info); | ||
243 | } | ||
244 | |||
245 | |||
246 | void __iomem *x38_map_mchbar(struct pci_dev *pdev) | ||
247 | { | ||
248 | union { | ||
249 | u64 mchbar; | ||
250 | struct { | ||
251 | u32 mchbar_low; | ||
252 | u32 mchbar_high; | ||
253 | }; | ||
254 | } u; | ||
255 | void __iomem *window; | ||
256 | |||
257 | pci_read_config_dword(pdev, X38_MCHBAR_LOW, &u.mchbar_low); | ||
258 | pci_write_config_dword(pdev, X38_MCHBAR_LOW, u.mchbar_low | 0x1); | ||
259 | pci_read_config_dword(pdev, X38_MCHBAR_HIGH, &u.mchbar_high); | ||
260 | u.mchbar &= X38_MCHBAR_MASK; | ||
261 | |||
262 | if (u.mchbar != (resource_size_t)u.mchbar) { | ||
263 | printk(KERN_ERR | ||
264 | "x38: mmio space beyond accessible range (0x%llx)\n", | ||
265 | (unsigned long long)u.mchbar); | ||
266 | return NULL; | ||
267 | } | ||
268 | |||
269 | window = ioremap_nocache(u.mchbar, X38_MMR_WINDOW_SIZE); | ||
270 | if (!window) | ||
271 | printk(KERN_ERR "x38: cannot map mmio space at 0x%llx\n", | ||
272 | (unsigned long long)u.mchbar); | ||
273 | |||
274 | return window; | ||
275 | } | ||
276 | |||
277 | |||
278 | static void x38_get_drbs(void __iomem *window, | ||
279 | u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL]) | ||
280 | { | ||
281 | int i; | ||
282 | |||
283 | for (i = 0; i < X38_RANKS_PER_CHANNEL; i++) { | ||
284 | drbs[0][i] = readw(window + X38_C0DRB + 2*i) & X38_DRB_MASK; | ||
285 | drbs[1][i] = readw(window + X38_C1DRB + 2*i) & X38_DRB_MASK; | ||
286 | } | ||
287 | } | ||
288 | |||
289 | static bool x38_is_stacked(struct pci_dev *pdev, | ||
290 | u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL]) | ||
291 | { | ||
292 | u16 tom; | ||
293 | |||
294 | pci_read_config_word(pdev, X38_TOM, &tom); | ||
295 | tom &= X38_TOM_MASK; | ||
296 | |||
297 | return drbs[X38_CHANNELS - 1][X38_RANKS_PER_CHANNEL - 1] == tom; | ||
298 | } | ||
299 | |||
300 | static unsigned long drb_to_nr_pages( | ||
301 | u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL], | ||
302 | bool stacked, int channel, int rank) | ||
303 | { | ||
304 | int n; | ||
305 | |||
306 | n = drbs[channel][rank]; | ||
307 | if (rank > 0) | ||
308 | n -= drbs[channel][rank - 1]; | ||
309 | if (stacked && (channel == 1) && drbs[channel][rank] == | ||
310 | drbs[channel][X38_RANKS_PER_CHANNEL - 1]) { | ||
311 | n -= drbs[0][X38_RANKS_PER_CHANNEL - 1]; | ||
312 | } | ||
313 | |||
314 | n <<= (X38_DRB_SHIFT - PAGE_SHIFT); | ||
315 | return n; | ||
316 | } | ||
317 | |||
318 | static int x38_probe1(struct pci_dev *pdev, int dev_idx) | ||
319 | { | ||
320 | int rc; | ||
321 | int i; | ||
322 | struct mem_ctl_info *mci = NULL; | ||
323 | unsigned long last_page; | ||
324 | u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL]; | ||
325 | bool stacked; | ||
326 | void __iomem *window; | ||
327 | |||
328 | debugf0("MC: %s()\n", __func__); | ||
329 | |||
330 | window = x38_map_mchbar(pdev); | ||
331 | if (!window) | ||
332 | return -ENODEV; | ||
333 | |||
334 | x38_get_drbs(window, drbs); | ||
335 | |||
336 | how_many_channel(pdev); | ||
337 | |||
338 | /* FIXME: unconventional pvt_info usage */ | ||
339 | mci = edac_mc_alloc(0, X38_RANKS, x38_channel_num, 0); | ||
340 | if (!mci) | ||
341 | return -ENOMEM; | ||
342 | |||
343 | debugf3("MC: %s(): init mci\n", __func__); | ||
344 | |||
345 | mci->dev = &pdev->dev; | ||
346 | mci->mtype_cap = MEM_FLAG_DDR2; | ||
347 | |||
348 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; | ||
349 | mci->edac_cap = EDAC_FLAG_SECDED; | ||
350 | |||
351 | mci->mod_name = EDAC_MOD_STR; | ||
352 | mci->mod_ver = X38_REVISION; | ||
353 | mci->ctl_name = x38_devs[dev_idx].ctl_name; | ||
354 | mci->dev_name = pci_name(pdev); | ||
355 | mci->edac_check = x38_check; | ||
356 | mci->ctl_page_to_phys = NULL; | ||
357 | mci->pvt_info = window; | ||
358 | |||
359 | stacked = x38_is_stacked(pdev, drbs); | ||
360 | |||
361 | /* | ||
362 | * The dram rank boundary (DRB) reg values are boundary addresses | ||
363 | * for each DRAM rank with a granularity of 64MB. DRB regs are | ||
364 | * cumulative; the last one will contain the total memory | ||
365 | * contained in all ranks. | ||
366 | */ | ||
367 | last_page = -1UL; | ||
368 | for (i = 0; i < mci->nr_csrows; i++) { | ||
369 | unsigned long nr_pages; | ||
370 | struct csrow_info *csrow = &mci->csrows[i]; | ||
371 | |||
372 | nr_pages = drb_to_nr_pages(drbs, stacked, | ||
373 | i / X38_RANKS_PER_CHANNEL, | ||
374 | i % X38_RANKS_PER_CHANNEL); | ||
375 | |||
376 | if (nr_pages == 0) { | ||
377 | csrow->mtype = MEM_EMPTY; | ||
378 | continue; | ||
379 | } | ||
380 | |||
381 | csrow->first_page = last_page + 1; | ||
382 | last_page += nr_pages; | ||
383 | csrow->last_page = last_page; | ||
384 | csrow->nr_pages = nr_pages; | ||
385 | |||
386 | csrow->grain = nr_pages << PAGE_SHIFT; | ||
387 | csrow->mtype = MEM_DDR2; | ||
388 | csrow->dtype = DEV_UNKNOWN; | ||
389 | csrow->edac_mode = EDAC_UNKNOWN; | ||
390 | } | ||
391 | |||
392 | x38_clear_error_info(mci); | ||
393 | |||
394 | rc = -ENODEV; | ||
395 | if (edac_mc_add_mc(mci)) { | ||
396 | debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); | ||
397 | goto fail; | ||
398 | } | ||
399 | |||
400 | /* get this far and it's successful */ | ||
401 | debugf3("MC: %s(): success\n", __func__); | ||
402 | return 0; | ||
403 | |||
404 | fail: | ||
405 | iounmap(window); | ||
406 | if (mci) | ||
407 | edac_mc_free(mci); | ||
408 | |||
409 | return rc; | ||
410 | } | ||
411 | |||
412 | static int __devinit x38_init_one(struct pci_dev *pdev, | ||
413 | const struct pci_device_id *ent) | ||
414 | { | ||
415 | int rc; | ||
416 | |||
417 | debugf0("MC: %s()\n", __func__); | ||
418 | |||
419 | if (pci_enable_device(pdev) < 0) | ||
420 | return -EIO; | ||
421 | |||
422 | rc = x38_probe1(pdev, ent->driver_data); | ||
423 | if (!mci_pdev) | ||
424 | mci_pdev = pci_dev_get(pdev); | ||
425 | |||
426 | return rc; | ||
427 | } | ||
428 | |||
429 | static void __devexit x38_remove_one(struct pci_dev *pdev) | ||
430 | { | ||
431 | struct mem_ctl_info *mci; | ||
432 | |||
433 | debugf0("%s()\n", __func__); | ||
434 | |||
435 | mci = edac_mc_del_mc(&pdev->dev); | ||
436 | if (!mci) | ||
437 | return; | ||
438 | |||
439 | iounmap(mci->pvt_info); | ||
440 | |||
441 | edac_mc_free(mci); | ||
442 | } | ||
443 | |||
444 | static const struct pci_device_id x38_pci_tbl[] __devinitdata = { | ||
445 | { | ||
446 | PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, | ||
447 | X38}, | ||
448 | { | ||
449 | 0, | ||
450 | } /* 0 terminated list. */ | ||
451 | }; | ||
452 | |||
453 | MODULE_DEVICE_TABLE(pci, x38_pci_tbl); | ||
454 | |||
455 | static struct pci_driver x38_driver = { | ||
456 | .name = EDAC_MOD_STR, | ||
457 | .probe = x38_init_one, | ||
458 | .remove = __devexit_p(x38_remove_one), | ||
459 | .id_table = x38_pci_tbl, | ||
460 | }; | ||
461 | |||
462 | static int __init x38_init(void) | ||
463 | { | ||
464 | int pci_rc; | ||
465 | |||
466 | debugf3("MC: %s()\n", __func__); | ||
467 | |||
468 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ | ||
469 | opstate_init(); | ||
470 | |||
471 | pci_rc = pci_register_driver(&x38_driver); | ||
472 | if (pci_rc < 0) | ||
473 | goto fail0; | ||
474 | |||
475 | if (!mci_pdev) { | ||
476 | x38_registered = 0; | ||
477 | mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
478 | PCI_DEVICE_ID_INTEL_X38_HB, NULL); | ||
479 | if (!mci_pdev) { | ||
480 | debugf0("x38 pci_get_device fail\n"); | ||
481 | pci_rc = -ENODEV; | ||
482 | goto fail1; | ||
483 | } | ||
484 | |||
485 | pci_rc = x38_init_one(mci_pdev, x38_pci_tbl); | ||
486 | if (pci_rc < 0) { | ||
487 | debugf0("x38 init fail\n"); | ||
488 | pci_rc = -ENODEV; | ||
489 | goto fail1; | ||
490 | } | ||
491 | } | ||
492 | |||
493 | return 0; | ||
494 | |||
495 | fail1: | ||
496 | pci_unregister_driver(&x38_driver); | ||
497 | |||
498 | fail0: | ||
499 | if (mci_pdev) | ||
500 | pci_dev_put(mci_pdev); | ||
501 | |||
502 | return pci_rc; | ||
503 | } | ||
504 | |||
505 | static void __exit x38_exit(void) | ||
506 | { | ||
507 | debugf3("MC: %s()\n", __func__); | ||
508 | |||
509 | pci_unregister_driver(&x38_driver); | ||
510 | if (!x38_registered) { | ||
511 | x38_remove_one(mci_pdev); | ||
512 | pci_dev_put(mci_pdev); | ||
513 | } | ||
514 | } | ||
515 | |||
516 | module_init(x38_init); | ||
517 | module_exit(x38_exit); | ||
518 | |||
519 | MODULE_LICENSE("GPL"); | ||
520 | MODULE_AUTHOR("Cluster Computing, Inc. Hitoshi Mitake"); | ||
521 | MODULE_DESCRIPTION("MC support for Intel X38 memory hub controllers"); | ||
522 | |||
523 | module_param(edac_op_state, int, 0444); | ||
524 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); | ||