aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2009-06-22 21:48:28 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2010-05-10 10:44:45 -0400
commit194a40feabb7cab38911a357c86e968e98024281 (patch)
tree6c133fd3db2ea07830fa7028c827fb2b534b74ba /drivers/edac
parenta0c36a1f0fbab42590dab3c13c10fa7d20e6c2cd (diff)
i7core_edac: Add error insertion code for Nehalem
Implements set_inject_error() with the low-level code needed to inject memory errors at Nehalem, and adds some sysfs nodes to allow error injection The next patch will add an API for error injection. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/i7core_edac.c427
1 files changed, 419 insertions, 8 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 7ecf15e66a3f..b590f8468693 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -20,7 +20,6 @@
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf 20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */ 21 */
22 22
23
24#include <linux/module.h> 23#include <linux/module.h>
25#include <linux/init.h> 24#include <linux/init.h>
26#include <linux/pci.h> 25#include <linux/pci.h>
@@ -64,12 +63,16 @@
64 /* OFFSETS for Devices 4,5 and 6 Function 0 */ 63 /* OFFSETS for Devices 4,5 and 6 Function 0 */
65 64
66#define MC_CHANNEL_ADDR_MATCH 0xf0 65#define MC_CHANNEL_ADDR_MATCH 0xf0
67 66#define MC_CHANNEL_ERROR_MASK 0xf8
68#define MC_MASK_DIMM (1 << 41) 67#define MC_CHANNEL_ERROR_INJECT 0xfc
69#define MC_MASK_RANK (1 << 40) 68 #define INJECT_ADDR_PARITY 0x10
70#define MC_MASK_BANK (1 << 39) 69 #define INJECT_ECC 0x08
71#define MC_MASK_PAGE (1 << 38) 70 #define MASK_CACHELINE 0x06
72#define MC_MASK_COL (1 << 37) 71 #define MASK_FULL_CACHELINE 0x06
72 #define MASK_MSB32_CACHELINE 0x04
73 #define MASK_LSB32_CACHELINE 0x02
74 #define NO_MASK_CACHELINE 0x00
75 #define REPEAT_EN 0x01
73 76
74/* 77/*
75 * i7core structs 78 * i7core structs
@@ -84,10 +87,23 @@ struct i7core_info {
84 u32 max_dod; 87 u32 max_dod;
85}; 88};
86 89
90
91struct i7core_inject {
92 int enable;
93
94 u32 section;
95 u32 type;
96 u32 eccmask;
97
98 /* Error address mask */
99 int channel, dimm, rank, bank, page, col;
100};
101
87struct i7core_pvt { 102struct i7core_pvt {
88 struct pci_dev *pci_mcr; /* Dev 3:0 */ 103 struct pci_dev *pci_mcr; /* Dev 3:0 */
89 struct pci_dev *pci_ch[NUM_CHANS][NUM_FUNCS]; 104 struct pci_dev *pci_ch[NUM_CHANS][NUM_FUNCS];
90 struct i7core_info info; 105 struct i7core_info info;
106 struct i7core_inject inject;
91}; 107};
92 108
93/* Device name and register DID (Device ID) */ 109/* Device name and register DID (Device ID) */
@@ -166,6 +182,7 @@ static inline int maxnumcol(struct i7core_pvt *pvt)
166 return cols[((pvt->info.max_dod >> 9) & 0x3) << 12]; 182 return cols[((pvt->info.max_dod >> 9) & 0x3) << 12];
167} 183}
168 184
185
169/**************************************************************************** 186/****************************************************************************
170 Memory check routines 187 Memory check routines
171 ****************************************************************************/ 188 ****************************************************************************/
@@ -200,6 +217,390 @@ static int get_dimm_config(struct mem_ctl_info *mci)
200} 217}
201 218
202/**************************************************************************** 219/****************************************************************************
220 Error insertion routines
221 ****************************************************************************/
222
223/* The i7core has independent error injection features per channel.
224 However, to have a simpler code, we don't allow enabling error injection
225 on more than one channel.
226 Also, since a change at an inject parameter will be applied only at enable,
227 we're disabling error injection on all write calls to the sysfs nodes that
228 controls the error code injection.
229 */
230static void disable_inject(struct mem_ctl_info *mci)
231{
232 struct i7core_pvt *pvt = mci->pvt_info;
233
234 pvt->inject.enable = 0;
235
236 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
237 MC_CHANNEL_ERROR_MASK, 0);
238}
239
240/*
241 * i7core inject inject.section
242 *
243 * accept and store error injection inject.section value
244 * bit 0 - refers to the lower 32-byte half cacheline
245 * bit 1 - refers to the upper 32-byte half cacheline
246 */
247static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
248 const char *data, size_t count)
249{
250 struct i7core_pvt *pvt = mci->pvt_info;
251 unsigned long value;
252 int rc;
253
254 if (pvt->inject.enable)
255 disable_inject(mci);
256
257 rc = strict_strtoul(data, 10, &value);
258 if ((rc < 0) || (value > 3))
259 return 0;
260
261 pvt->inject.section = (u32) value;
262 return count;
263}
264
265static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
266 char *data)
267{
268 struct i7core_pvt *pvt = mci->pvt_info;
269 return sprintf(data, "0x%08x\n", pvt->inject.section);
270}
271
272/*
273 * i7core inject.type
274 *
275 * accept and store error injection inject.section value
276 * bit 0 - repeat enable - Enable error repetition
277 * bit 1 - inject ECC error
278 * bit 2 - inject parity error
279 */
280static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
281 const char *data, size_t count)
282{
283 struct i7core_pvt *pvt = mci->pvt_info;
284 unsigned long value;
285 int rc;
286
287 if (pvt->inject.enable)
288 disable_inject(mci);
289
290 rc = strict_strtoul(data, 10, &value);
291 if ((rc < 0) || (value > 7))
292 return 0;
293
294 pvt->inject.type = (u32) value;
295 return count;
296}
297
298static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
299 char *data)
300{
301 struct i7core_pvt *pvt = mci->pvt_info;
302 return sprintf(data, "0x%08x\n", pvt->inject.type);
303}
304
305/*
306 * i7core_inject_inject.eccmask_store
307 *
308 * The type of error (UE/CE) will depend on the inject.eccmask value:
309 * Any bits set to a 1 will flip the corresponding ECC bit
310 * Correctable errors can be injected by flipping 1 bit or the bits within
311 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
312 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
313 * uncorrectable error to be injected.
314 */
315static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
316 const char *data, size_t count)
317{
318 struct i7core_pvt *pvt = mci->pvt_info;
319 unsigned long value;
320 int rc;
321
322 if (pvt->inject.enable)
323 disable_inject(mci);
324
325 rc = strict_strtoul(data, 10, &value);
326 if (rc < 0)
327 return 0;
328
329 pvt->inject.eccmask = (u32) value;
330 return count;
331}
332
333static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
334 char *data)
335{
336 struct i7core_pvt *pvt = mci->pvt_info;
337 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
338}
339
340/*
341 * i7core_addrmatch
342 *
343 * The type of error (UE/CE) will depend on the inject.eccmask value:
344 * Any bits set to a 1 will flip the corresponding ECC bit
345 * Correctable errors can be injected by flipping 1 bit or the bits within
346 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
347 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
348 * uncorrectable error to be injected.
349 */
350static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
351 const char *data, size_t count)
352{
353 struct i7core_pvt *pvt = mci->pvt_info;
354 char *cmd, *val;
355 long value;
356 int rc;
357
358 if (pvt->inject.enable)
359 disable_inject(mci);
360
361 do {
362 cmd = strsep((char **) &data, ":");
363 if (!cmd)
364 break;
365 val = strsep((char **) &data, " \n\t");
366 if (!val)
367 return cmd - data;
368
369 if (!strcasecmp(val,"any"))
370 value = -1;
371 else {
372 rc = strict_strtol(val, 10, &value);
373 if ((rc < 0) || (value < 0))
374 return cmd - data;
375 }
376
377 if (!strcasecmp(cmd,"channel")) {
378 if (value < 3)
379 pvt->inject.channel = value;
380 else
381 return cmd - data;
382 } else if (!strcasecmp(cmd,"dimm")) {
383 if (value < 4)
384 pvt->inject.dimm = value;
385 else
386 return cmd - data;
387 } else if (!strcasecmp(cmd,"rank")) {
388 if (value < 4)
389 pvt->inject.rank = value;
390 else
391 return cmd - data;
392 } else if (!strcasecmp(cmd,"bank")) {
393 if (value < 4)
394 pvt->inject.bank = value;
395 else
396 return cmd - data;
397 } else if (!strcasecmp(cmd,"page")) {
398 if (value <= 0xffff)
399 pvt->inject.page = value;
400 else
401 return cmd - data;
402 } else if (!strcasecmp(cmd,"col") ||
403 !strcasecmp(cmd,"column")) {
404 if (value <= 0x3fff)
405 pvt->inject.col = value;
406 else
407 return cmd - data;
408 }
409 } while (1);
410
411 return count;
412}
413
414static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
415 char *data)
416{
417 struct i7core_pvt *pvt = mci->pvt_info;
418 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
419
420 if (pvt->inject.channel < 0)
421 sprintf(channel, "any");
422 else
423 sprintf(channel, "%d", pvt->inject.channel);
424 if (pvt->inject.dimm < 0)
425 sprintf(dimm, "any");
426 else
427 sprintf(dimm, "%d", pvt->inject.dimm);
428 if (pvt->inject.bank < 0)
429 sprintf(bank, "any");
430 else
431 sprintf(bank, "%d", pvt->inject.bank);
432 if (pvt->inject.rank < 0)
433 sprintf(rank, "any");
434 else
435 sprintf(rank, "%d", pvt->inject.rank);
436 if (pvt->inject.page < 0)
437 sprintf(page, "any");
438 else
439 sprintf(page, "0x%04x", pvt->inject.page);
440 if (pvt->inject.col < 0)
441 sprintf(col, "any");
442 else
443 sprintf(col, "0x%04x", pvt->inject.col);
444
445 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
446 "rank: %s\npage: %s\ncolumn: %s\n",
447 channel, dimm, bank, rank, page, col);
448}
449
450/*
451 * This routine prepares the Memory Controller for error injection.
452 * The error will be injected when some process tries to write to the
453 * memory that matches the given criteria.
454 * The criteria can be set in terms of a mask where dimm, rank, bank, page
455 * and col can be specified.
456 * A -1 value for any of the mask items will make the MCU to ignore
457 * that matching criteria for error injection.
458 *
459 * It should be noticed that the error will only happen after a write operation
460 * on a memory that matches the condition. if REPEAT_EN is not enabled at
461 * inject mask, then it will produce just one error. Otherwise, it will repeat
462 * until the injectmask would be cleaned.
463 *
464 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
465 * is reliable enough to check if the MC is using the
466 * three channels. However, this is not clear at the datasheet.
467 */
468static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
469 const char *data, size_t count)
470{
471 struct i7core_pvt *pvt = mci->pvt_info;
472 u32 injectmask;
473 u64 mask = 0;
474 int rc;
475 long enable;
476
477 rc = strict_strtoul(data, 10, &enable);
478 if ((rc < 0))
479 return 0;
480
481 if (enable) {
482 pvt->inject.enable = 1;
483 } else {
484 disable_inject(mci);
485 return count;
486 }
487
488 /* Sets pvt->inject.dimm mask */
489 if (pvt->inject.dimm < 0)
490 mask |= 1l << 41;
491 else {
492 if (maxnumdimms(pvt) > 2)
493 mask |= (pvt->inject.dimm & 0x3l) << 35;
494 else
495 mask |= (pvt->inject.dimm & 0x1l) << 36;
496 }
497
498 /* Sets pvt->inject.rank mask */
499 if (pvt->inject.rank < 0)
500 mask |= 1l << 40;
501 else {
502 if (maxnumdimms(pvt) > 2)
503 mask |= (pvt->inject.rank & 0x1l) << 34;
504 else
505 mask |= (pvt->inject.rank & 0x3l) << 34;
506 }
507
508 /* Sets pvt->inject.bank mask */
509 if (pvt->inject.bank < 0)
510 mask |= 1l << 39;
511 else
512 mask |= (pvt->inject.bank & 0x15l) << 30;
513
514 /* Sets pvt->inject.page mask */
515 if (pvt->inject.page < 0)
516 mask |= 1l << 38;
517 else
518 mask |= (pvt->inject.page & 0xffffl) << 14;
519
520 /* Sets pvt->inject.column mask */
521 if (pvt->inject.col < 0)
522 mask |= 1l << 37;
523 else
524 mask |= (pvt->inject.col & 0x3fffl);
525
526 pci_write_config_qword(pvt->pci_ch[pvt->inject.channel][0],
527 MC_CHANNEL_ADDR_MATCH, mask);
528
529 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
530 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
531
532 /*
533 * bit 0: REPEAT_EN
534 * bits 1-2: MASK_HALF_CACHELINE
535 * bit 3: INJECT_ECC
536 * bit 4: INJECT_ADDR_PARITY
537 */
538
539 injectmask = (pvt->inject.type & 1) &&
540 (pvt->inject.section & 0x3) << 1 &&
541 (pvt->inject.type & 0x6) << (3 - 1);
542
543 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
544 MC_CHANNEL_ERROR_MASK, injectmask);
545
546
547 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
548 mask, pvt->inject.eccmask, injectmask);
549
550 return count;
551}
552
553static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
554 char *data)
555{
556 struct i7core_pvt *pvt = mci->pvt_info;
557 return sprintf(data, "%d\n", pvt->inject.enable);
558}
559
560/*
561 * Sysfs struct
562 */
563static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
564
565 {
566 .attr = {
567 .name = "inject_section",
568 .mode = (S_IRUGO | S_IWUSR)
569 },
570 .show = i7core_inject_section_show,
571 .store = i7core_inject_section_store,
572 }, {
573 .attr = {
574 .name = "inject_type",
575 .mode = (S_IRUGO | S_IWUSR)
576 },
577 .show = i7core_inject_type_show,
578 .store = i7core_inject_type_store,
579 }, {
580 .attr = {
581 .name = "inject_eccmask",
582 .mode = (S_IRUGO | S_IWUSR)
583 },
584 .show = i7core_inject_eccmask_show,
585 .store = i7core_inject_eccmask_store,
586 }, {
587 .attr = {
588 .name = "inject_addrmatch",
589 .mode = (S_IRUGO | S_IWUSR)
590 },
591 .show = i7core_inject_addrmatch_show,
592 .store = i7core_inject_addrmatch_store,
593 }, {
594 .attr = {
595 .name = "inject_enable",
596 .mode = (S_IRUGO | S_IWUSR)
597 },
598 .show = i7core_inject_enable_show,
599 .store = i7core_inject_enable_store,
600 },
601};
602
603/****************************************************************************
203 Device initialization routines: put/get, init/exit 604 Device initialization routines: put/get, init/exit
204 ****************************************************************************/ 605 ****************************************************************************/
205 606
@@ -322,10 +723,11 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
322 723
323 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); 724 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
324 725
325 mci->dev = &pdev->dev; /* record ptr to the generic device */ 726 mci->dev = &pdev->dev; /* record ptr to the generic device */
326 dev_set_drvdata(mci->dev, mci); 727 dev_set_drvdata(mci->dev, mci);
327 728
328 pvt = mci->pvt_info; 729 pvt = mci->pvt_info;
730
329// pvt->system_address = pdev; /* Record this device in our private */ 731// pvt->system_address = pdev; /* Record this device in our private */
330// pvt->maxch = num_channels; 732// pvt->maxch = num_channels;
331// pvt->maxdimmperch = num_dimms_per_channel; 733// pvt->maxdimmperch = num_dimms_per_channel;
@@ -343,6 +745,7 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
343 mci->ctl_name = i7core_devs[dev_idx].ctl_name; 745 mci->ctl_name = i7core_devs[dev_idx].ctl_name;
344 mci->dev_name = pci_name(pdev); 746 mci->dev_name = pci_name(pdev);
345 mci->ctl_page_to_phys = NULL; 747 mci->ctl_page_to_phys = NULL;
748 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
346 749
347 /* add this new MC control structure to EDAC's list of MCs */ 750 /* add this new MC control structure to EDAC's list of MCs */
348 if (edac_mc_add_mc(mci)) { 751 if (edac_mc_add_mc(mci)) {
@@ -365,6 +768,14 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
365 __func__); 768 __func__);
366 } 769 }
367 770
771 /* Default error mask is any memory */
772 pvt->inject.channel = -1;
773 pvt->inject.dimm = -1;
774 pvt->inject.rank = -1;
775 pvt->inject.bank = -1;
776 pvt->inject.page = -1;
777 pvt->inject.col = -1;
778
368 return 0; 779 return 0;
369 780
370fail1: 781fail1: