diff options
author | Mauro Carvalho Chehab <mchehab@redhat.com> | 2009-06-22 21:48:28 -0400 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2010-05-10 10:44:45 -0400 |
commit | 194a40feabb7cab38911a357c86e968e98024281 (patch) | |
tree | 6c133fd3db2ea07830fa7028c827fb2b534b74ba /drivers/edac | |
parent | a0c36a1f0fbab42590dab3c13c10fa7d20e6c2cd (diff) |
i7core_edac: Add error insertion code for Nehalem
Implements set_inject_error() with the low-level code needed to inject
memory errors at Nehalem, and adds some sysfs nodes to allow error injection
The next patch will add an API for error injection.
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/i7core_edac.c | 427 |
1 files changed, 419 insertions, 8 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 7ecf15e66a3f..b590f8468693 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -20,7 +20,6 @@ | |||
20 | * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf | 20 | * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf |
21 | */ | 21 | */ |
22 | 22 | ||
23 | |||
24 | #include <linux/module.h> | 23 | #include <linux/module.h> |
25 | #include <linux/init.h> | 24 | #include <linux/init.h> |
26 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
@@ -64,12 +63,16 @@ | |||
64 | /* OFFSETS for Devices 4,5 and 6 Function 0 */ | 63 | /* OFFSETS for Devices 4,5 and 6 Function 0 */ |
65 | 64 | ||
66 | #define MC_CHANNEL_ADDR_MATCH 0xf0 | 65 | #define MC_CHANNEL_ADDR_MATCH 0xf0 |
67 | 66 | #define MC_CHANNEL_ERROR_MASK 0xf8 | |
68 | #define MC_MASK_DIMM (1 << 41) | 67 | #define MC_CHANNEL_ERROR_INJECT 0xfc |
69 | #define MC_MASK_RANK (1 << 40) | 68 | #define INJECT_ADDR_PARITY 0x10 |
70 | #define MC_MASK_BANK (1 << 39) | 69 | #define INJECT_ECC 0x08 |
71 | #define MC_MASK_PAGE (1 << 38) | 70 | #define MASK_CACHELINE 0x06 |
72 | #define MC_MASK_COL (1 << 37) | 71 | #define MASK_FULL_CACHELINE 0x06 |
72 | #define MASK_MSB32_CACHELINE 0x04 | ||
73 | #define MASK_LSB32_CACHELINE 0x02 | ||
74 | #define NO_MASK_CACHELINE 0x00 | ||
75 | #define REPEAT_EN 0x01 | ||
73 | 76 | ||
74 | /* | 77 | /* |
75 | * i7core structs | 78 | * i7core structs |
@@ -84,10 +87,23 @@ struct i7core_info { | |||
84 | u32 max_dod; | 87 | u32 max_dod; |
85 | }; | 88 | }; |
86 | 89 | ||
90 | |||
91 | struct i7core_inject { | ||
92 | int enable; | ||
93 | |||
94 | u32 section; | ||
95 | u32 type; | ||
96 | u32 eccmask; | ||
97 | |||
98 | /* Error address mask */ | ||
99 | int channel, dimm, rank, bank, page, col; | ||
100 | }; | ||
101 | |||
87 | struct i7core_pvt { | 102 | struct i7core_pvt { |
88 | struct pci_dev *pci_mcr; /* Dev 3:0 */ | 103 | struct pci_dev *pci_mcr; /* Dev 3:0 */ |
89 | struct pci_dev *pci_ch[NUM_CHANS][NUM_FUNCS]; | 104 | struct pci_dev *pci_ch[NUM_CHANS][NUM_FUNCS]; |
90 | struct i7core_info info; | 105 | struct i7core_info info; |
106 | struct i7core_inject inject; | ||
91 | }; | 107 | }; |
92 | 108 | ||
93 | /* Device name and register DID (Device ID) */ | 109 | /* Device name and register DID (Device ID) */ |
@@ -166,6 +182,7 @@ static inline int maxnumcol(struct i7core_pvt *pvt) | |||
166 | return cols[((pvt->info.max_dod >> 9) & 0x3) << 12]; | 182 | return cols[((pvt->info.max_dod >> 9) & 0x3) << 12]; |
167 | } | 183 | } |
168 | 184 | ||
185 | |||
169 | /**************************************************************************** | 186 | /**************************************************************************** |
170 | Memory check routines | 187 | Memory check routines |
171 | ****************************************************************************/ | 188 | ****************************************************************************/ |
@@ -200,6 +217,390 @@ static int get_dimm_config(struct mem_ctl_info *mci) | |||
200 | } | 217 | } |
201 | 218 | ||
202 | /**************************************************************************** | 219 | /**************************************************************************** |
220 | Error insertion routines | ||
221 | ****************************************************************************/ | ||
222 | |||
223 | /* The i7core has independent error injection features per channel. | ||
224 | However, to have a simpler code, we don't allow enabling error injection | ||
225 | on more than one channel. | ||
226 | Also, since a change at an inject parameter will be applied only at enable, | ||
227 | we're disabling error injection on all write calls to the sysfs nodes that | ||
228 | controls the error code injection. | ||
229 | */ | ||
230 | static void disable_inject(struct mem_ctl_info *mci) | ||
231 | { | ||
232 | struct i7core_pvt *pvt = mci->pvt_info; | ||
233 | |||
234 | pvt->inject.enable = 0; | ||
235 | |||
236 | pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], | ||
237 | MC_CHANNEL_ERROR_MASK, 0); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * i7core inject inject.section | ||
242 | * | ||
243 | * accept and store error injection inject.section value | ||
244 | * bit 0 - refers to the lower 32-byte half cacheline | ||
245 | * bit 1 - refers to the upper 32-byte half cacheline | ||
246 | */ | ||
247 | static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci, | ||
248 | const char *data, size_t count) | ||
249 | { | ||
250 | struct i7core_pvt *pvt = mci->pvt_info; | ||
251 | unsigned long value; | ||
252 | int rc; | ||
253 | |||
254 | if (pvt->inject.enable) | ||
255 | disable_inject(mci); | ||
256 | |||
257 | rc = strict_strtoul(data, 10, &value); | ||
258 | if ((rc < 0) || (value > 3)) | ||
259 | return 0; | ||
260 | |||
261 | pvt->inject.section = (u32) value; | ||
262 | return count; | ||
263 | } | ||
264 | |||
265 | static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci, | ||
266 | char *data) | ||
267 | { | ||
268 | struct i7core_pvt *pvt = mci->pvt_info; | ||
269 | return sprintf(data, "0x%08x\n", pvt->inject.section); | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * i7core inject.type | ||
274 | * | ||
275 | * accept and store error injection inject.section value | ||
276 | * bit 0 - repeat enable - Enable error repetition | ||
277 | * bit 1 - inject ECC error | ||
278 | * bit 2 - inject parity error | ||
279 | */ | ||
280 | static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci, | ||
281 | const char *data, size_t count) | ||
282 | { | ||
283 | struct i7core_pvt *pvt = mci->pvt_info; | ||
284 | unsigned long value; | ||
285 | int rc; | ||
286 | |||
287 | if (pvt->inject.enable) | ||
288 | disable_inject(mci); | ||
289 | |||
290 | rc = strict_strtoul(data, 10, &value); | ||
291 | if ((rc < 0) || (value > 7)) | ||
292 | return 0; | ||
293 | |||
294 | pvt->inject.type = (u32) value; | ||
295 | return count; | ||
296 | } | ||
297 | |||
298 | static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci, | ||
299 | char *data) | ||
300 | { | ||
301 | struct i7core_pvt *pvt = mci->pvt_info; | ||
302 | return sprintf(data, "0x%08x\n", pvt->inject.type); | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * i7core_inject_inject.eccmask_store | ||
307 | * | ||
308 | * The type of error (UE/CE) will depend on the inject.eccmask value: | ||
309 | * Any bits set to a 1 will flip the corresponding ECC bit | ||
310 | * Correctable errors can be injected by flipping 1 bit or the bits within | ||
311 | * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or | ||
312 | * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an | ||
313 | * uncorrectable error to be injected. | ||
314 | */ | ||
315 | static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci, | ||
316 | const char *data, size_t count) | ||
317 | { | ||
318 | struct i7core_pvt *pvt = mci->pvt_info; | ||
319 | unsigned long value; | ||
320 | int rc; | ||
321 | |||
322 | if (pvt->inject.enable) | ||
323 | disable_inject(mci); | ||
324 | |||
325 | rc = strict_strtoul(data, 10, &value); | ||
326 | if (rc < 0) | ||
327 | return 0; | ||
328 | |||
329 | pvt->inject.eccmask = (u32) value; | ||
330 | return count; | ||
331 | } | ||
332 | |||
333 | static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci, | ||
334 | char *data) | ||
335 | { | ||
336 | struct i7core_pvt *pvt = mci->pvt_info; | ||
337 | return sprintf(data, "0x%08x\n", pvt->inject.eccmask); | ||
338 | } | ||
339 | |||
340 | /* | ||
341 | * i7core_addrmatch | ||
342 | * | ||
343 | * The type of error (UE/CE) will depend on the inject.eccmask value: | ||
344 | * Any bits set to a 1 will flip the corresponding ECC bit | ||
345 | * Correctable errors can be injected by flipping 1 bit or the bits within | ||
346 | * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or | ||
347 | * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an | ||
348 | * uncorrectable error to be injected. | ||
349 | */ | ||
350 | static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci, | ||
351 | const char *data, size_t count) | ||
352 | { | ||
353 | struct i7core_pvt *pvt = mci->pvt_info; | ||
354 | char *cmd, *val; | ||
355 | long value; | ||
356 | int rc; | ||
357 | |||
358 | if (pvt->inject.enable) | ||
359 | disable_inject(mci); | ||
360 | |||
361 | do { | ||
362 | cmd = strsep((char **) &data, ":"); | ||
363 | if (!cmd) | ||
364 | break; | ||
365 | val = strsep((char **) &data, " \n\t"); | ||
366 | if (!val) | ||
367 | return cmd - data; | ||
368 | |||
369 | if (!strcasecmp(val,"any")) | ||
370 | value = -1; | ||
371 | else { | ||
372 | rc = strict_strtol(val, 10, &value); | ||
373 | if ((rc < 0) || (value < 0)) | ||
374 | return cmd - data; | ||
375 | } | ||
376 | |||
377 | if (!strcasecmp(cmd,"channel")) { | ||
378 | if (value < 3) | ||
379 | pvt->inject.channel = value; | ||
380 | else | ||
381 | return cmd - data; | ||
382 | } else if (!strcasecmp(cmd,"dimm")) { | ||
383 | if (value < 4) | ||
384 | pvt->inject.dimm = value; | ||
385 | else | ||
386 | return cmd - data; | ||
387 | } else if (!strcasecmp(cmd,"rank")) { | ||
388 | if (value < 4) | ||
389 | pvt->inject.rank = value; | ||
390 | else | ||
391 | return cmd - data; | ||
392 | } else if (!strcasecmp(cmd,"bank")) { | ||
393 | if (value < 4) | ||
394 | pvt->inject.bank = value; | ||
395 | else | ||
396 | return cmd - data; | ||
397 | } else if (!strcasecmp(cmd,"page")) { | ||
398 | if (value <= 0xffff) | ||
399 | pvt->inject.page = value; | ||
400 | else | ||
401 | return cmd - data; | ||
402 | } else if (!strcasecmp(cmd,"col") || | ||
403 | !strcasecmp(cmd,"column")) { | ||
404 | if (value <= 0x3fff) | ||
405 | pvt->inject.col = value; | ||
406 | else | ||
407 | return cmd - data; | ||
408 | } | ||
409 | } while (1); | ||
410 | |||
411 | return count; | ||
412 | } | ||
413 | |||
414 | static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci, | ||
415 | char *data) | ||
416 | { | ||
417 | struct i7core_pvt *pvt = mci->pvt_info; | ||
418 | char channel[4], dimm[4], bank[4], rank[4], page[7], col[7]; | ||
419 | |||
420 | if (pvt->inject.channel < 0) | ||
421 | sprintf(channel, "any"); | ||
422 | else | ||
423 | sprintf(channel, "%d", pvt->inject.channel); | ||
424 | if (pvt->inject.dimm < 0) | ||
425 | sprintf(dimm, "any"); | ||
426 | else | ||
427 | sprintf(dimm, "%d", pvt->inject.dimm); | ||
428 | if (pvt->inject.bank < 0) | ||
429 | sprintf(bank, "any"); | ||
430 | else | ||
431 | sprintf(bank, "%d", pvt->inject.bank); | ||
432 | if (pvt->inject.rank < 0) | ||
433 | sprintf(rank, "any"); | ||
434 | else | ||
435 | sprintf(rank, "%d", pvt->inject.rank); | ||
436 | if (pvt->inject.page < 0) | ||
437 | sprintf(page, "any"); | ||
438 | else | ||
439 | sprintf(page, "0x%04x", pvt->inject.page); | ||
440 | if (pvt->inject.col < 0) | ||
441 | sprintf(col, "any"); | ||
442 | else | ||
443 | sprintf(col, "0x%04x", pvt->inject.col); | ||
444 | |||
445 | return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n" | ||
446 | "rank: %s\npage: %s\ncolumn: %s\n", | ||
447 | channel, dimm, bank, rank, page, col); | ||
448 | } | ||
449 | |||
450 | /* | ||
451 | * This routine prepares the Memory Controller for error injection. | ||
452 | * The error will be injected when some process tries to write to the | ||
453 | * memory that matches the given criteria. | ||
454 | * The criteria can be set in terms of a mask where dimm, rank, bank, page | ||
455 | * and col can be specified. | ||
456 | * A -1 value for any of the mask items will make the MCU to ignore | ||
457 | * that matching criteria for error injection. | ||
458 | * | ||
459 | * It should be noticed that the error will only happen after a write operation | ||
460 | * on a memory that matches the condition. if REPEAT_EN is not enabled at | ||
461 | * inject mask, then it will produce just one error. Otherwise, it will repeat | ||
462 | * until the injectmask would be cleaned. | ||
463 | * | ||
464 | * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD | ||
465 | * is reliable enough to check if the MC is using the | ||
466 | * three channels. However, this is not clear at the datasheet. | ||
467 | */ | ||
468 | static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci, | ||
469 | const char *data, size_t count) | ||
470 | { | ||
471 | struct i7core_pvt *pvt = mci->pvt_info; | ||
472 | u32 injectmask; | ||
473 | u64 mask = 0; | ||
474 | int rc; | ||
475 | long enable; | ||
476 | |||
477 | rc = strict_strtoul(data, 10, &enable); | ||
478 | if ((rc < 0)) | ||
479 | return 0; | ||
480 | |||
481 | if (enable) { | ||
482 | pvt->inject.enable = 1; | ||
483 | } else { | ||
484 | disable_inject(mci); | ||
485 | return count; | ||
486 | } | ||
487 | |||
488 | /* Sets pvt->inject.dimm mask */ | ||
489 | if (pvt->inject.dimm < 0) | ||
490 | mask |= 1l << 41; | ||
491 | else { | ||
492 | if (maxnumdimms(pvt) > 2) | ||
493 | mask |= (pvt->inject.dimm & 0x3l) << 35; | ||
494 | else | ||
495 | mask |= (pvt->inject.dimm & 0x1l) << 36; | ||
496 | } | ||
497 | |||
498 | /* Sets pvt->inject.rank mask */ | ||
499 | if (pvt->inject.rank < 0) | ||
500 | mask |= 1l << 40; | ||
501 | else { | ||
502 | if (maxnumdimms(pvt) > 2) | ||
503 | mask |= (pvt->inject.rank & 0x1l) << 34; | ||
504 | else | ||
505 | mask |= (pvt->inject.rank & 0x3l) << 34; | ||
506 | } | ||
507 | |||
508 | /* Sets pvt->inject.bank mask */ | ||
509 | if (pvt->inject.bank < 0) | ||
510 | mask |= 1l << 39; | ||
511 | else | ||
512 | mask |= (pvt->inject.bank & 0x15l) << 30; | ||
513 | |||
514 | /* Sets pvt->inject.page mask */ | ||
515 | if (pvt->inject.page < 0) | ||
516 | mask |= 1l << 38; | ||
517 | else | ||
518 | mask |= (pvt->inject.page & 0xffffl) << 14; | ||
519 | |||
520 | /* Sets pvt->inject.column mask */ | ||
521 | if (pvt->inject.col < 0) | ||
522 | mask |= 1l << 37; | ||
523 | else | ||
524 | mask |= (pvt->inject.col & 0x3fffl); | ||
525 | |||
526 | pci_write_config_qword(pvt->pci_ch[pvt->inject.channel][0], | ||
527 | MC_CHANNEL_ADDR_MATCH, mask); | ||
528 | |||
529 | pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], | ||
530 | MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask); | ||
531 | |||
532 | /* | ||
533 | * bit 0: REPEAT_EN | ||
534 | * bits 1-2: MASK_HALF_CACHELINE | ||
535 | * bit 3: INJECT_ECC | ||
536 | * bit 4: INJECT_ADDR_PARITY | ||
537 | */ | ||
538 | |||
539 | injectmask = (pvt->inject.type & 1) && | ||
540 | (pvt->inject.section & 0x3) << 1 && | ||
541 | (pvt->inject.type & 0x6) << (3 - 1); | ||
542 | |||
543 | pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], | ||
544 | MC_CHANNEL_ERROR_MASK, injectmask); | ||
545 | |||
546 | |||
547 | debugf0("Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n", | ||
548 | mask, pvt->inject.eccmask, injectmask); | ||
549 | |||
550 | return count; | ||
551 | } | ||
552 | |||
553 | static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, | ||
554 | char *data) | ||
555 | { | ||
556 | struct i7core_pvt *pvt = mci->pvt_info; | ||
557 | return sprintf(data, "%d\n", pvt->inject.enable); | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Sysfs struct | ||
562 | */ | ||
563 | static struct mcidev_sysfs_attribute i7core_inj_attrs[] = { | ||
564 | |||
565 | { | ||
566 | .attr = { | ||
567 | .name = "inject_section", | ||
568 | .mode = (S_IRUGO | S_IWUSR) | ||
569 | }, | ||
570 | .show = i7core_inject_section_show, | ||
571 | .store = i7core_inject_section_store, | ||
572 | }, { | ||
573 | .attr = { | ||
574 | .name = "inject_type", | ||
575 | .mode = (S_IRUGO | S_IWUSR) | ||
576 | }, | ||
577 | .show = i7core_inject_type_show, | ||
578 | .store = i7core_inject_type_store, | ||
579 | }, { | ||
580 | .attr = { | ||
581 | .name = "inject_eccmask", | ||
582 | .mode = (S_IRUGO | S_IWUSR) | ||
583 | }, | ||
584 | .show = i7core_inject_eccmask_show, | ||
585 | .store = i7core_inject_eccmask_store, | ||
586 | }, { | ||
587 | .attr = { | ||
588 | .name = "inject_addrmatch", | ||
589 | .mode = (S_IRUGO | S_IWUSR) | ||
590 | }, | ||
591 | .show = i7core_inject_addrmatch_show, | ||
592 | .store = i7core_inject_addrmatch_store, | ||
593 | }, { | ||
594 | .attr = { | ||
595 | .name = "inject_enable", | ||
596 | .mode = (S_IRUGO | S_IWUSR) | ||
597 | }, | ||
598 | .show = i7core_inject_enable_show, | ||
599 | .store = i7core_inject_enable_store, | ||
600 | }, | ||
601 | }; | ||
602 | |||
603 | /**************************************************************************** | ||
203 | Device initialization routines: put/get, init/exit | 604 | Device initialization routines: put/get, init/exit |
204 | ****************************************************************************/ | 605 | ****************************************************************************/ |
205 | 606 | ||
@@ -322,10 +723,11 @@ static int __devinit i7core_probe(struct pci_dev *pdev, | |||
322 | 723 | ||
323 | debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); | 724 | debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); |
324 | 725 | ||
325 | mci->dev = &pdev->dev; /* record ptr to the generic device */ | 726 | mci->dev = &pdev->dev; /* record ptr to the generic device */ |
326 | dev_set_drvdata(mci->dev, mci); | 727 | dev_set_drvdata(mci->dev, mci); |
327 | 728 | ||
328 | pvt = mci->pvt_info; | 729 | pvt = mci->pvt_info; |
730 | |||
329 | // pvt->system_address = pdev; /* Record this device in our private */ | 731 | // pvt->system_address = pdev; /* Record this device in our private */ |
330 | // pvt->maxch = num_channels; | 732 | // pvt->maxch = num_channels; |
331 | // pvt->maxdimmperch = num_dimms_per_channel; | 733 | // pvt->maxdimmperch = num_dimms_per_channel; |
@@ -343,6 +745,7 @@ static int __devinit i7core_probe(struct pci_dev *pdev, | |||
343 | mci->ctl_name = i7core_devs[dev_idx].ctl_name; | 745 | mci->ctl_name = i7core_devs[dev_idx].ctl_name; |
344 | mci->dev_name = pci_name(pdev); | 746 | mci->dev_name = pci_name(pdev); |
345 | mci->ctl_page_to_phys = NULL; | 747 | mci->ctl_page_to_phys = NULL; |
748 | mci->mc_driver_sysfs_attributes = i7core_inj_attrs; | ||
346 | 749 | ||
347 | /* add this new MC control structure to EDAC's list of MCs */ | 750 | /* add this new MC control structure to EDAC's list of MCs */ |
348 | if (edac_mc_add_mc(mci)) { | 751 | if (edac_mc_add_mc(mci)) { |
@@ -365,6 +768,14 @@ static int __devinit i7core_probe(struct pci_dev *pdev, | |||
365 | __func__); | 768 | __func__); |
366 | } | 769 | } |
367 | 770 | ||
771 | /* Default error mask is any memory */ | ||
772 | pvt->inject.channel = -1; | ||
773 | pvt->inject.dimm = -1; | ||
774 | pvt->inject.rank = -1; | ||
775 | pvt->inject.bank = -1; | ||
776 | pvt->inject.page = -1; | ||
777 | pvt->inject.col = -1; | ||
778 | |||
368 | return 0; | 779 | return 0; |
369 | 780 | ||
370 | fail1: | 781 | fail1: |