aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/edac_core.h
diff options
context:
space:
mode:
authorDouglas Thompson <dougthompson@xmission.com>2007-07-19 04:49:36 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:53 -0400
commite27e3dac651771fe3250f6305dee277bce29fc5d (patch)
tree9c0ac81a0948d8e52a72865ff9fbae4a12031a32 /drivers/edac/edac_core.h
parent7c9281d76c1c0b130f79d5fc021084e9749959d4 (diff)
drivers/edac: add edac_device class
This patch adds the new 'class' of object to be managed, named: 'edac_device'. As a peer of the 'edac_mc' class of object, it provides a non-memory centric view of an ERROR DETECTING device in hardware. It provides a sysfs interface and an abstraction for varioius EDAC type devices. Multiple 'instances' within the class are possible, with each 'instance' able to have multiple 'blocks', and each 'block' having 'attributes'. At the 'block' level there are the 'ce_count' and 'ue_count' fields which the device driver can update and/or call edac_device_handle_XX() functions. At each higher level are additional 'total' count fields, which are a summation of counts below that level. This 'edac_device' has been used to capture and present ECC errors which are found in a a L1 and L2 system on a per CORE/CPU basis. Signed-off-by: Douglas Thompson <dougthompson@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac/edac_core.h')
-rw-r--r--drivers/edac/edac_core.h252
1 files changed, 246 insertions, 6 deletions
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 397f144791e..a3e4b97fe4f 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -32,9 +32,14 @@
32#include <linux/completion.h> 32#include <linux/completion.h>
33#include <linux/kobject.h> 33#include <linux/kobject.h>
34#include <linux/platform_device.h> 34#include <linux/platform_device.h>
35#include <linux/sysdev.h>
36#include <linux/workqueue.h>
37#include <linux/version.h>
35 38
36#define EDAC_MC_LABEL_LEN 31 39#define EDAC_MC_LABEL_LEN 31
37#define MC_PROC_NAME_MAX_LEN 7 40#define EDAC_DEVICE_NAME_LEN 31
41#define EDAC_ATTRIB_VALUE_LEN 15
42#define MC_PROC_NAME_MAX_LEN 7
38 43
39#if PAGE_SHIFT < 20 44#if PAGE_SHIFT < 20
40#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) ) 45#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) )
@@ -51,6 +56,10 @@
51#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ 56#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
52 printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) 57 printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
53 58
59/* edac_device printk */
60#define edac_device_printk(ctl, level, fmt, arg...) \
61 printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
62
54/* prefixes for edac_printk() and edac_mc_printk() */ 63/* prefixes for edac_printk() and edac_mc_printk() */
55#define EDAC_MC "MC" 64#define EDAC_MC "MC"
56#define EDAC_PCI "PCI" 65#define EDAC_PCI "PCI"
@@ -62,7 +71,7 @@ extern int edac_debug_level;
62#define edac_debug_printk(level, fmt, arg...) \ 71#define edac_debug_printk(level, fmt, arg...) \
63 do { \ 72 do { \
64 if (level <= edac_debug_level) \ 73 if (level <= edac_debug_level) \
65 edac_printk(KERN_DEBUG, EDAC_DEBUG, fmt, ##arg); \ 74 edac_printk(KERN_EMERG, EDAC_DEBUG, fmt, ##arg); \
66 } while(0) 75 } while(0)
67 76
68#define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) 77#define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ )
@@ -195,6 +204,8 @@ enum scrub_type {
195 204
196/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ 205/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
197 206
207extern char * edac_align_ptr(void *ptr, unsigned size);
208
198/* 209/*
199 * There are several things to be aware of that aren't at all obvious: 210 * There are several things to be aware of that aren't at all obvious:
200 * 211 *
@@ -376,6 +387,231 @@ struct mem_ctl_info {
376 struct completion kobj_complete; 387 struct completion kobj_complete;
377}; 388};
378 389
390/*
391 * The following are the structures to provide for a generice
392 * or abstract 'edac_device'. This set of structures and the
393 * code that implements the APIs for the same, provide for
394 * registering EDAC type devices which are NOT standard memory.
395 *
396 * CPU caches (L1 and L2)
397 * DMA engines
398 * Core CPU swithces
399 * Fabric switch units
400 * PCIe interface controllers
401 * other EDAC/ECC type devices that can be monitored for
402 * errors, etc.
403 *
404 * It allows for a 2 level set of hiearchry. For example:
405 *
406 * cache could be composed of L1, L2 and L3 levels of cache.
407 * Each CPU core would have its own L1 cache, while sharing
408 * L2 and maybe L3 caches.
409 *
410 * View them arranged, via the sysfs presentation:
411 * /sys/devices/system/edac/..
412 *
413 * mc/ <existing memory device directory>
414 * cpu/cpu0/.. <L1 and L2 block directory>
415 * /L1-cache/ce_count
416 * /ue_count
417 * /L2-cache/ce_count
418 * /ue_count
419 * cpu/cpu1/.. <L1 and L2 block directory>
420 * /L1-cache/ce_count
421 * /ue_count
422 * /L2-cache/ce_count
423 * /ue_count
424 * ...
425 *
426 * the L1 and L2 directories would be "edac_device_block's"
427 */
428
429struct edac_device_counter {
430 u32 ue_count;
431 u32 ce_count;
432};
433
434#define INC_COUNTER(cnt) (cnt++)
435
436/*
437 * An array of these is passed to the alloc() function
438 * to specify attributes of the edac_block
439 */
440struct edac_attrib_spec {
441 char name[EDAC_DEVICE_NAME_LEN + 1];
442
443 int type;
444#define EDAC_ATTR_INT 0x01
445#define EDAC_ATTR_CHAR 0x02
446};
447
448
449/* Attribute control structure
450 * In this structure is a pointer to the driver's edac_attrib_spec
451 * The life of this pointer is inclusive in the life of the driver's
452 * life cycle.
453 */
454struct edac_attrib {
455 struct edac_device_block *block; /* Up Pointer */
456
457 struct edac_attrib_spec *spec; /* ptr to module spec entry */
458
459 union { /* actual value */
460 int edac_attrib_int_value;
461 char edac_attrib_char_value[EDAC_ATTRIB_VALUE_LEN + 1];
462 } edac_attrib_value;
463};
464
465/* device block control structure */
466struct edac_device_block {
467 struct edac_device_instance *instance; /* Up Pointer */
468 char name[EDAC_DEVICE_NAME_LEN + 1];
469
470 struct edac_device_counter counters; /* basic UE and CE counters */
471
472 int nr_attribs; /* how many attributes */
473 struct edac_attrib *attribs; /* this block's attributes */
474
475 /* edac sysfs device control */
476 struct kobject kobj;
477 struct completion kobj_complete;
478};
479
480/* device instance control structure */
481struct edac_device_instance {
482 struct edac_device_ctl_info *ctl; /* Up pointer */
483 char name[EDAC_DEVICE_NAME_LEN + 4];
484
485 struct edac_device_counter counters; /* instance counters */
486
487 u32 nr_blocks; /* how many blocks */
488 struct edac_device_block *blocks; /* block array */
489
490 /* edac sysfs device control */
491 struct kobject kobj;
492 struct completion kobj_complete;
493};
494
495
496/*
497 * Abstract edac_device control info structure
498 *
499 */
500struct edac_device_ctl_info {
501 /* for global list of edac_device_ctl_info structs */
502 struct list_head link;
503
504 int dev_idx;
505
506 /* Per instance controls for this edac_device */
507 int log_ue; /* boolean for logging UEs */
508 int log_ce; /* boolean for logging CEs */
509 int panic_on_ue; /* boolean for panic'ing on an UE */
510 unsigned poll_msec; /* number of milliseconds to poll interval */
511 unsigned long delay; /* number of jiffies for poll_msec */
512
513 struct sysdev_class *edac_class; /* pointer to class */
514
515 /* the internal state of this controller instance */
516 int op_state;
517#define OP_ALLOC 0x100
518#define OP_RUNNING_POLL 0x201
519#define OP_RUNNING_INTERRUPT 0x202
520#define OP_RUNNING_POLL_INTR 0x203
521#define OP_OFFLINE 0x300
522
523 /* work struct for this instance */
524#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
525 struct delayed_work work;
526#else
527 struct work_struct work;
528#endif
529
530 /* pointer to edac polling checking routine:
531 * If NOT NULL: points to polling check routine
532 * If NULL: Then assumes INTERRUPT operation, where
533 * MC driver will receive events
534 */
535 void (*edac_check) (struct edac_device_ctl_info * edac_dev);
536
537 struct device *dev; /* pointer to device structure */
538
539 const char *mod_name; /* module name */
540 const char *ctl_name; /* edac controller name */
541
542 void *pvt_info; /* pointer to 'private driver' info */
543
544 unsigned long start_time;/* edac_device load start time (jiffies)*/
545
546 /* these are for safe removal of mc devices from global list while
547 * NMI handlers may be traversing list
548 */
549 struct rcu_head rcu;
550 struct completion complete;
551
552 /* sysfs top name under 'edac' directory
553 * and instance name:
554 * cpu/cpu0/...
555 * cpu/cpu1/...
556 * cpu/cpu2/...
557 * ...
558 */
559 char name[EDAC_DEVICE_NAME_LEN + 1];
560
561 /* Number of instances supported on this control structure
562 * and the array of those instances
563 */
564 u32 nr_instances;
565 struct edac_device_instance *instances;
566
567 /* Event counters for the this whole EDAC Device */
568 struct edac_device_counter counters;
569
570 /* edac sysfs device control for the 'name'
571 * device this structure controls
572 */
573 struct kobject kobj;
574 struct completion kobj_complete;
575};
576
577/* To get from the instance's wq to the beginning of the ctl structure */
578#define to_edac_device_ctl_work(w) \
579 container_of(w,struct edac_device_ctl_info,work)
580
581/* Function to calc the number of delay jiffies from poll_msec */
582static inline void edac_device_calc_delay(
583 struct edac_device_ctl_info *edac_dev)
584{
585 /* convert from msec to jiffies */
586 edac_dev->delay = edac_dev->poll_msec * HZ / 1000;
587}
588
589/*
590 * The alloc() and free() functions for the 'edac_device' control info
591 * structure. A MC driver will allocate one of these for each edac_device
592 * it is going to control/register with the EDAC CORE.
593 */
594extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
595 unsigned sizeof_private,
596 char *edac_device_name,
597 unsigned nr_instances,
598 char *edac_block_name,
599 unsigned nr_blocks,
600 unsigned offset_value,
601 struct edac_attrib_spec *attrib_spec,
602 unsigned nr_attribs
603);
604
605/* The offset value can be:
606 * -1 indicating no offset value
607 * 0 for zero-based block numbers
608 * 1 for 1-based block number
609 * other for other-based block number
610 */
611#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
612
613extern void edac_device_free_ctl_info( struct edac_device_ctl_info *ctl_info);
614
379#ifdef CONFIG_PCI 615#ifdef CONFIG_PCI
380 616
381/* write all or some bits in a byte-register*/ 617/* write all or some bits in a byte-register*/
@@ -466,13 +702,17 @@ extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
466 char *msg); 702 char *msg);
467 703
468/* 704/*
469 * This kmalloc's and initializes all the structures. 705 * edac_device APIs
470 * Can't be used if all structures don't have the same lifetime.
471 */ 706 */
472extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 707extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
473 unsigned nr_chans); 708 unsigned nr_chans);
474
475/* Free an mc previously allocated by edac_mc_alloc() */
476extern void edac_mc_free(struct mem_ctl_info *mci); 709extern void edac_mc_free(struct mem_ctl_info *mci);
710extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev, int edac_idx);
711extern struct edac_device_ctl_info * edac_device_del_device(struct device *dev);
712extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
713 int inst_nr, int block_nr, const char *msg);
714extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
715 int inst_nr, int block_nr, const char *msg);
716
477 717
478#endif /* _EDAC_CORE_H_ */ 718#endif /* _EDAC_CORE_H_ */