aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/edac.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/edac.h')
-rw-r--r--include/linux/edac.h182
1 files changed, 160 insertions, 22 deletions
diff --git a/include/linux/edac.h b/include/linux/edac.h
index c621d762bb2c..91ba3bae42ee 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -71,6 +71,25 @@ enum dev_type {
71#define DEV_FLAG_X64 BIT(DEV_X64) 71#define DEV_FLAG_X64 BIT(DEV_X64)
72 72
73/** 73/**
74 * enum hw_event_mc_err_type - type of the detected error
75 *
76 * @HW_EVENT_ERR_CORRECTED: Corrected Error - Indicates that an ECC
77 * corrected error was detected
78 * @HW_EVENT_ERR_UNCORRECTED: Uncorrected Error - Indicates an error that
79 * can't be corrected by ECC, but it is not
80 * fatal (maybe it is on an unused memory area,
81 * or the memory controller could recover from
82 * it for example, by re-trying the operation).
83 * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not
84 * be recovered.
85 */
86enum hw_event_mc_err_type {
87 HW_EVENT_ERR_CORRECTED,
88 HW_EVENT_ERR_UNCORRECTED,
89 HW_EVENT_ERR_FATAL,
90};
91
92/**
74 * enum mem_type - memory types. For a more detailed reference, please see 93 * enum mem_type - memory types. For a more detailed reference, please see
75 * http://en.wikipedia.org/wiki/DRAM 94 * http://en.wikipedia.org/wiki/DRAM
76 * 95 *
@@ -313,38 +332,141 @@ enum scrub_type {
313 */ 332 */
314 333
315/** 334/**
335 * enum edac_mc_layer - memory controller hierarchy layer
336 *
337 * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch"
338 * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel"
339 * @EDAC_MC_LAYER_SLOT: memory layer is named "slot"
340 * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select"
341 *
342 * This enum is used by the drivers to tell edac_mc_sysfs what name should
343 * be used when describing a memory stick location.
344 */
345enum edac_mc_layer_type {
346 EDAC_MC_LAYER_BRANCH,
347 EDAC_MC_LAYER_CHANNEL,
348 EDAC_MC_LAYER_SLOT,
349 EDAC_MC_LAYER_CHIP_SELECT,
350};
351
352/**
353 * struct edac_mc_layer - describes the memory controller hierarchy
354 * @layer: layer type
355 * @size: number of components per layer. For example,
356 * if the channel layer has two channels, size = 2
357 * @is_virt_csrow: This layer is part of the "csrow" when old API
358 * compatibility mode is enabled. Otherwise, it is
359 * a channel
360 */
361struct edac_mc_layer {
362 enum edac_mc_layer_type type;
363 unsigned size;
364 bool is_virt_csrow;
365};
366
367/*
368 * Maximum number of layers used by the memory controller to uniquely
369 * identify a single memory stick.
370 * NOTE: Changing this constant requires not only to change the constant
371 * below, but also to change the existing code at the core, as there are
372 * some code there that are optimized for 3 layers.
373 */
374#define EDAC_MAX_LAYERS 3
375
376/**
377 * EDAC_DIMM_PTR - Macro responsible to find a pointer inside a pointer array
378 * for the element given by [layer0,layer1,layer2] position
379 *
380 * @layers: a struct edac_mc_layer array, describing how many elements
381 * were allocated for each layer
382 * @var: name of the var where we want to get the pointer
383 * (like mci->dimms)
384 * @n_layers: Number of layers at the @layers array
385 * @layer0: layer0 position
386 * @layer1: layer1 position. Unused if n_layers < 2
387 * @layer2: layer2 position. Unused if n_layers < 3
388 *
389 * For 1 layer, this macro returns &var[layer0]
390 * For 2 layers, this macro is similar to allocate a bi-dimensional array
391 * and to return "&var[layer0][layer1]"
392 * For 3 layers, this macro is similar to allocate a tri-dimensional array
393 * and to return "&var[layer0][layer1][layer2]"
394 *
395 * A loop could be used here to make it more generic, but, as we only have
396 * 3 layers, this is a little faster.
397 * By design, layers can never be 0 or more than 3. If that ever happens,
398 * a NULL is returned, causing an OOPS during the memory allocation routine,
399 * with would point to the developer that he's doing something wrong.
400 */
401#define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \
402 typeof(var) __p; \
403 if ((nlayers) == 1) \
404 __p = &var[layer0]; \
405 else if ((nlayers) == 2) \
406 __p = &var[(layer1) + ((layers[1]).size * (layer0))]; \
407 else if ((nlayers) == 3) \
408 __p = &var[(layer2) + ((layers[2]).size * ((layer1) + \
409 ((layers[1]).size * (layer0))))]; \
410 else \
411 __p = NULL; \
412 __p; \
413})
414
415
416/* FIXME: add the proper per-location error counts */
417struct dimm_info {
418 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
419
420 /* Memory location data */
421 unsigned location[EDAC_MAX_LAYERS];
422
423 struct mem_ctl_info *mci; /* the parent */
424
425 u32 grain; /* granularity of reported error in bytes */
426 enum dev_type dtype; /* memory device type */
427 enum mem_type mtype; /* memory dimm type */
428 enum edac_type edac_mode; /* EDAC mode for this dimm */
429
430 u32 nr_pages; /* number of pages on this dimm */
431
432 unsigned csrow, cschannel; /* Points to the old API data */
433};
434
435/**
316 * struct rank_info - contains the information for one DIMM rank 436 * struct rank_info - contains the information for one DIMM rank
317 * 437 *
318 * @chan_idx: channel number where the rank is (typically, 0 or 1) 438 * @chan_idx: channel number where the rank is (typically, 0 or 1)
319 * @ce_count: number of correctable errors for this rank 439 * @ce_count: number of correctable errors for this rank
320 * @label: DIMM label. Different ranks for the same DIMM should be
321 * filled, on userspace, with the same label.
322 * FIXME: The core currently won't enforce it.
323 * @csrow: A pointer to the chip select row structure (the parent 440 * @csrow: A pointer to the chip select row structure (the parent
324 * structure). The location of the rank is given by 441 * structure). The location of the rank is given by
325 * the (csrow->csrow_idx, chan_idx) vector. 442 * the (csrow->csrow_idx, chan_idx) vector.
443 * @dimm: A pointer to the DIMM structure, where the DIMM label
444 * information is stored.
445 *
446 * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
447 * This is a bad assumption, but it makes this patch easier. Later
448 * patches in this series will fix this issue.
326 */ 449 */
327struct rank_info { 450struct rank_info {
328 int chan_idx; 451 int chan_idx;
329 u32 ce_count; 452 struct csrow_info *csrow;
330 char label[EDAC_MC_LABEL_LEN + 1]; 453 struct dimm_info *dimm;
331 struct csrow_info *csrow; /* the parent */ 454
455 u32 ce_count; /* Correctable Errors for this csrow */
332}; 456};
333 457
334struct csrow_info { 458struct csrow_info {
335 unsigned long first_page; /* first page number in dimm */ 459 /* Used only by edac_mc_find_csrow_by_page() */
336 unsigned long last_page; /* last page number in dimm */ 460 unsigned long first_page; /* first page number in csrow */
461 unsigned long last_page; /* last page number in csrow */
337 unsigned long page_mask; /* used for interleaving - 462 unsigned long page_mask; /* used for interleaving -
338 * 0UL for non intlv 463 * 0UL for non intlv */
339 */ 464
340 u32 nr_pages; /* number of pages in csrow */ 465 int csrow_idx; /* the chip-select row */
341 u32 grain; /* granularity of reported error in bytes */ 466
342 int csrow_idx; /* the chip-select row */
343 enum dev_type dtype; /* memory device type */
344 u32 ue_count; /* Uncorrectable Errors for this csrow */ 467 u32 ue_count; /* Uncorrectable Errors for this csrow */
345 u32 ce_count; /* Correctable Errors for this csrow */ 468 u32 ce_count; /* Correctable Errors for this csrow */
346 enum mem_type mtype; /* memory csrow type */ 469
347 enum edac_type edac_mode; /* EDAC mode for this csrow */
348 struct mem_ctl_info *mci; /* the parent */ 470 struct mem_ctl_info *mci; /* the parent */
349 471
350 struct kobject kobj; /* sysfs kobject for this csrow */ 472 struct kobject kobj; /* sysfs kobject for this csrow */
@@ -426,8 +548,20 @@ struct mem_ctl_info {
426 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, 548 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
427 unsigned long page); 549 unsigned long page);
428 int mc_idx; 550 int mc_idx;
429 int nr_csrows;
430 struct csrow_info *csrows; 551 struct csrow_info *csrows;
552 unsigned nr_csrows, num_cschannel;
553
554 /* Memory Controller hierarchy */
555 unsigned n_layers;
556 struct edac_mc_layer *layers;
557 bool mem_is_per_rank;
558
559 /*
560 * DIMM info. Will eventually remove the entire csrows_info some day
561 */
562 unsigned tot_dimms;
563 struct dimm_info *dimms;
564
431 /* 565 /*
432 * FIXME - what about controllers on other busses? - IDs must be 566 * FIXME - what about controllers on other busses? - IDs must be
433 * unique. dev pointer should be sufficiently unique, but 567 * unique. dev pointer should be sufficiently unique, but
@@ -440,12 +574,16 @@ struct mem_ctl_info {
440 const char *dev_name; 574 const char *dev_name;
441 char proc_name[MC_PROC_NAME_MAX_LEN + 1]; 575 char proc_name[MC_PROC_NAME_MAX_LEN + 1];
442 void *pvt_info; 576 void *pvt_info;
443 u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
444 u32 ce_noinfo_count; /* Correctable Errors w/o info */
445 u32 ue_count; /* Total Uncorrectable Errors for this MC */
446 u32 ce_count; /* Total Correctable Errors for this MC */
447 unsigned long start_time; /* mci load start time (in jiffies) */ 577 unsigned long start_time; /* mci load start time (in jiffies) */
448 578
579 /*
580 * drivers shouldn't access those fields directly, as the core
581 * already handles that.
582 */
583 u32 ce_noinfo_count, ue_noinfo_count;
584 u32 ue_mc, ce_mc;
585 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
586
449 struct completion complete; 587 struct completion complete;
450 588
451 /* edac sysfs device control */ 589 /* edac sysfs device control */
@@ -458,7 +596,7 @@ struct mem_ctl_info {
458 * by the low level driver. 596 * by the low level driver.
459 * 597 *
460 * Set by the low level driver to provide attributes at the 598 * Set by the low level driver to provide attributes at the
461 * controller level, same level as 'ue_count' and 'ce_count' above. 599 * controller level.
462 * An array of structures, NULL terminated 600 * An array of structures, NULL terminated
463 * 601 *
464 * If attributes are desired, then set to array of attributes 602 * If attributes are desired, then set to array of attributes