aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/edac_device.c
diff options
context:
space:
mode:
authorDouglas Thompson <dougthompson@xmission.com>2007-07-19 04:49:36 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:53 -0400
commite27e3dac651771fe3250f6305dee277bce29fc5d (patch)
tree9c0ac81a0948d8e52a72865ff9fbae4a12031a32 /drivers/edac/edac_device.c
parent7c9281d76c1c0b130f79d5fc021084e9749959d4 (diff)
drivers/edac: add edac_device class
This patch adds the new 'class' of object to be managed, named: 'edac_device'. As a peer of the 'edac_mc' class of object, it provides a non-memory centric view of an ERROR DETECTING device in hardware. It provides a sysfs interface and an abstraction for varioius EDAC type devices. Multiple 'instances' within the class are possible, with each 'instance' able to have multiple 'blocks', and each 'block' having 'attributes'. At the 'block' level there are the 'ce_count' and 'ue_count' fields which the device driver can update and/or call edac_device_handle_XX() functions. At each higher level are additional 'total' count fields, which are a summation of counts below that level. This 'edac_device' has been used to capture and present ECC errors which are found in a a L1 and L2 system on a per CORE/CPU basis. Signed-off-by: Douglas Thompson <dougthompson@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac/edac_device.c')
-rw-r--r--drivers/edac/edac_device.c669
1 files changed, 669 insertions, 0 deletions
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
new file mode 100644
index 000000000000..c579c498cc75
--- /dev/null
+++ b/drivers/edac/edac_device.c
@@ -0,0 +1,669 @@
1
2/*
3 * edac_device.c
4 * (C) 2007 www.douglaskthompson.com
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License.
8 *
9 * Written by Doug Thompson <norsk5@xmission.com>
10 *
11 * edac_device API implementation
12 * 19 Jan 2007
13 */
14
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/smp.h>
18#include <linux/init.h>
19#include <linux/sysctl.h>
20#include <linux/highmem.h>
21#include <linux/timer.h>
22#include <linux/slab.h>
23#include <linux/spinlock.h>
24#include <linux/list.h>
25#include <linux/sysdev.h>
26#include <linux/ctype.h>
27#include <linux/workqueue.h>
28#include <asm/uaccess.h>
29#include <asm/page.h>
30
31#include "edac_core.h"
32#include "edac_module.h"
33
34/* lock to memory controller's control array */
35static DECLARE_MUTEX(device_ctls_mutex);
36static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list);
37
38
39static inline void lock_device_list(void)
40{
41 down(&device_ctls_mutex);
42}
43
44static inline void unlock_device_list(void)
45{
46 up(&device_ctls_mutex);
47}
48
49
50#ifdef CONFIG_EDAC_DEBUG
51static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
52{
53 debugf3("\tedac_dev = %p dev_idx=%d \n", edac_dev,edac_dev->dev_idx);
54 debugf4("\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
55 debugf3("\tdev = %p\n", edac_dev->dev);
56 debugf3("\tmod_name:ctl_name = %s:%s\n",
57 edac_dev->mod_name, edac_dev->ctl_name);
58 debugf3("\tpvt_info = %p\n\n", edac_dev->pvt_info);
59}
60#endif /* CONFIG_EDAC_DEBUG */
61
62/*
63 * The alloc() and free() functions for the 'edac_device' control info
64 * structure. A MC driver will allocate one of these for each edac_device
65 * it is going to control/register with the EDAC CORE.
66 */
67struct edac_device_ctl_info *edac_device_alloc_ctl_info(
68 unsigned sz_private,
69 char *edac_device_name,
70 unsigned nr_instances,
71 char *edac_block_name,
72 unsigned nr_blocks,
73 unsigned offset_value,
74 struct edac_attrib_spec *attrib_spec,
75 unsigned nr_attribs)
76{
77 struct edac_device_ctl_info *dev_ctl;
78 struct edac_device_instance *dev_inst, *inst;
79 struct edac_device_block *dev_blk, *blk_p, *blk;
80 struct edac_attrib *dev_attrib, *attrib_p, *attrib;
81 unsigned total_size;
82 unsigned count;
83 unsigned instance, block, attr;
84 void *pvt;
85
86 debugf1("%s() instances=%d blocks=%d\n",
87 __func__,nr_instances,nr_blocks);
88
89 /* Figure out the offsets of the various items from the start of an
90 * ctl_info structure. We want the alignment of each item
91 * to be at least as stringent as what the compiler would
92 * provide if we could simply hardcode everything into a single struct.
93 */
94 dev_ctl = (struct edac_device_ctl_info *) 0;
95
96 /* Calc the 'end' offset past the ctl_info structure */
97 dev_inst = (struct edac_device_instance *)
98 edac_align_ptr(&dev_ctl[1],sizeof(*dev_inst));
99
100 /* Calc the 'end' offset past the instance array */
101 dev_blk = (struct edac_device_block *)
102 edac_align_ptr(&dev_inst[nr_instances],sizeof(*dev_blk));
103
104 /* Calc the 'end' offset past the dev_blk array */
105 count = nr_instances * nr_blocks;
106 dev_attrib = (struct edac_attrib *)
107 edac_align_ptr(&dev_blk[count],sizeof(*dev_attrib));
108
109 /* Check for case of NO attributes specified */
110 if (nr_attribs > 0)
111 count *= nr_attribs;
112
113 /* Calc the 'end' offset past the attributes array */
114 pvt = edac_align_ptr(&dev_attrib[count],sz_private);
115 total_size = ((unsigned long) pvt) + sz_private;
116
117 /* Allocate the amount of memory for the set of control structures */
118 if ((dev_ctl = kmalloc(total_size, GFP_KERNEL)) == NULL)
119 return NULL;
120
121 /* Adjust pointers so they point within the memory we just allocated
122 * rather than an imaginary chunk of memory located at address 0.
123 */
124 dev_inst = (struct edac_device_instance *)
125 (((char *) dev_ctl) + ((unsigned long) dev_inst));
126 dev_blk = (struct edac_device_block *)
127 (((char *) dev_ctl) + ((unsigned long) dev_blk));
128 dev_attrib = (struct edac_attrib *)
129 (((char *) dev_ctl) + ((unsigned long) dev_attrib));
130 pvt = sz_private ?
131 (((char *) dev_ctl) + ((unsigned long) pvt)) : NULL;
132
133 memset(dev_ctl, 0, total_size); /* clear all fields */
134 dev_ctl->nr_instances = nr_instances;
135 dev_ctl->instances = dev_inst;
136 dev_ctl->pvt_info = pvt;
137
138 /* Name of this edac device, ensure null terminated */
139 snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s", edac_device_name);
140 dev_ctl->name[sizeof(dev_ctl->name)-1] = '\0';
141
142 /* Initialize every Instance */
143 for (instance = 0; instance < nr_instances; instance++) {
144 inst = &dev_inst[instance];
145 inst->ctl = dev_ctl;
146 inst->nr_blocks = nr_blocks;
147 blk_p = &dev_blk[instance * nr_blocks];
148 inst->blocks = blk_p;
149
150 /* name of this instance */
151 snprintf(inst->name, sizeof(inst->name),
152 "%s%u", edac_device_name, instance);
153 inst->name[sizeof(inst->name)-1] = '\0';
154
155 /* Initialize every block in each instance */
156 for ( block = 0;
157 block < nr_blocks;
158 block++) {
159 blk = &blk_p[block];
160 blk->instance = inst;
161 blk->nr_attribs = nr_attribs;
162 attrib_p = &dev_attrib[block * nr_attribs];
163 blk->attribs = attrib_p;
164 snprintf(blk->name, sizeof(blk->name),
165 "%s%d", edac_block_name,block+1);
166 blk->name[sizeof(blk->name)-1] = '\0';
167
168 debugf1("%s() instance=%d block=%d name=%s\n",
169 __func__, instance,block,blk->name);
170
171 if (attrib_spec != NULL) {
172 /* when there is an attrib_spec passed int then
173 * Initialize every attrib of each block
174 */
175 for (attr = 0; attr < nr_attribs; attr++) {
176 attrib = &attrib_p[attr];
177 attrib->block = blk;
178
179 /* Link each attribute to the caller's
180 * spec entry, for name and type
181 */
182 attrib->spec = &attrib_spec[attr];
183 }
184 }
185 }
186 }
187
188 /* Mark this instance as merely ALLOCATED */
189 dev_ctl->op_state = OP_ALLOC;
190
191 return dev_ctl;
192}
193EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
194
195/*
196 * edac_device_free_ctl_info()
197 * frees the memory allocated by the edac_device_alloc_ctl_info()
198 * function
199 */
200void edac_device_free_ctl_info( struct edac_device_ctl_info *ctl_info) {
201 kfree(ctl_info);
202}
203EXPORT_SYMBOL_GPL(edac_device_free_ctl_info);
204
205
206
207/*
208 * find_edac_device_by_dev
209 * scans the edac_device list for a specific 'struct device *'
210 */
211static struct edac_device_ctl_info *
212find_edac_device_by_dev(struct device *dev)
213{
214 struct edac_device_ctl_info *edac_dev;
215 struct list_head *item;
216
217 debugf3("%s()\n", __func__);
218
219 list_for_each(item, &edac_device_list) {
220 edac_dev = list_entry(item, struct edac_device_ctl_info, link);
221
222 if (edac_dev->dev == dev)
223 return edac_dev;
224 }
225
226 return NULL;
227}
228
229/*
230 * add_edac_dev_to_global_list
231 * Before calling this function, caller must
232 * assign a unique value to edac_dev->dev_idx.
233 * Return:
234 * 0 on success
235 * 1 on failure.
236 */
237static int add_edac_dev_to_global_list (struct edac_device_ctl_info *edac_dev)
238{
239 struct list_head *item, *insert_before;
240 struct edac_device_ctl_info *rover;
241
242 insert_before = &edac_device_list;
243
244 /* Determine if already on the list */
245 if (unlikely((rover = find_edac_device_by_dev(edac_dev->dev)) != NULL))
246 goto fail0;
247
248 /* Insert in ascending order by 'dev_idx', so find position */
249 list_for_each(item, &edac_device_list) {
250 rover = list_entry(item, struct edac_device_ctl_info, link);
251
252 if (rover->dev_idx >= edac_dev->dev_idx) {
253 if (unlikely(rover->dev_idx == edac_dev->dev_idx))
254 goto fail1;
255
256 insert_before = item;
257 break;
258 }
259 }
260
261 list_add_tail_rcu(&edac_dev->link, insert_before);
262 return 0;
263
264fail0:
265 edac_printk(KERN_WARNING, EDAC_MC,
266 "%s (%s) %s %s already assigned %d\n",
267 rover->dev->bus_id, dev_name(rover->dev),
268 rover->mod_name, rover->ctl_name, rover->dev_idx);
269 return 1;
270
271fail1:
272 edac_printk(KERN_WARNING, EDAC_MC,
273 "bug in low-level driver: attempt to assign\n"
274 " duplicate dev_idx %d in %s()\n", rover->dev_idx, __func__);
275 return 1;
276}
277
278/*
279 * complete_edac_device_list_del
280 */
281static void complete_edac_device_list_del(struct rcu_head *head)
282{
283 struct edac_device_ctl_info *edac_dev;
284
285 edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
286 INIT_LIST_HEAD(&edac_dev->link);
287 complete(&edac_dev->complete);
288}
289
290/*
291 * del_edac_device_from_global_list
292 */
293static void del_edac_device_from_global_list(
294 struct edac_device_ctl_info *edac_device)
295{
296 list_del_rcu(&edac_device->link);
297 init_completion(&edac_device->complete);
298 call_rcu(&edac_device->rcu, complete_edac_device_list_del);
299 wait_for_completion(&edac_device->complete);
300}
301
302/**
303 * edac_device_find
304 * Search for a edac_device_ctl_info structure whose index is 'idx'.
305 *
306 * If found, return a pointer to the structure.
307 * Else return NULL.
308 *
309 * Caller must hold device_ctls_mutex.
310 */
311struct edac_device_ctl_info * edac_device_find(int idx)
312{
313 struct list_head *item;
314 struct edac_device_ctl_info *edac_dev;
315
316 /* Iterate over list, looking for exact match of ID */
317 list_for_each(item, &edac_device_list) {
318 edac_dev = list_entry(item, struct edac_device_ctl_info, link);
319
320 if (edac_dev->dev_idx >= idx) {
321 if (edac_dev->dev_idx == idx)
322 return edac_dev;
323
324 /* not on list, so terminate early */
325 break;
326 }
327 }
328
329 return NULL;
330}
331EXPORT_SYMBOL(edac_device_find);
332
333
334/*
335 * edac_workq_function
336 * performs the operation scheduled by a workq request
337 */
338#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
339static void edac_workq_function(struct work_struct *work_req)
340{
341 struct delayed_work *d_work = (struct delayed_work*) work_req;
342 struct edac_device_ctl_info *edac_dev =
343 to_edac_device_ctl_work(d_work);
344#else
345static void edac_workq_function(void *ptr)
346{
347 struct edac_device_ctl_info *edac_dev =
348 (struct edac_device_ctl_info *) ptr;
349#endif
350
351 //debugf0("%s() here and running\n", __func__);
352 lock_device_list();
353
354 /* Only poll controllers that are running polled and have a check */
355 if ((edac_dev->op_state == OP_RUNNING_POLL) &&
356 (edac_dev->edac_check != NULL)) {
357 edac_dev->edac_check(edac_dev);
358 }
359
360 unlock_device_list();
361
362 /* Reschedule */
363 queue_delayed_work(edac_workqueue,&edac_dev->work, edac_dev->delay);
364}
365
366/*
367 * edac_workq_setup
368 * initialize a workq item for this edac_device instance
369 * passing in the new delay period in msec
370 */
371void edac_workq_setup(struct edac_device_ctl_info *edac_dev, unsigned msec)
372{
373 debugf0("%s()\n", __func__);
374
375 edac_dev->poll_msec = msec;
376 edac_device_calc_delay(edac_dev); /* Calc delay jiffies */
377
378#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
379 INIT_DELAYED_WORK(&edac_dev->work,edac_workq_function);
380#else
381 INIT_WORK(&edac_dev->work,edac_workq_function,edac_dev);
382#endif
383 queue_delayed_work(edac_workqueue,&edac_dev->work, edac_dev->delay);
384}
385
386/*
387 * edac_workq_teardown
388 * stop the workq processing on this edac_dev
389 */
390void edac_workq_teardown(struct edac_device_ctl_info *edac_dev)
391{
392 int status;
393
394 status = cancel_delayed_work(&edac_dev->work);
395 if (status == 0) {
396 /* workq instance might be running, wait for it */
397 flush_workqueue(edac_workqueue);
398 }
399}
400
401/*
402 * edac_device_reset_delay_period
403 */
404
405void edac_device_reset_delay_period(
406 struct edac_device_ctl_info *edac_dev,
407 unsigned long value)
408{
409 lock_device_list();
410
411 /* cancel the current workq request */
412 edac_workq_teardown(edac_dev);
413
414 /* restart the workq request, with new delay value */
415 edac_workq_setup(edac_dev, value);
416
417 unlock_device_list();
418}
419
420/*
421 * edac_op_state_toString(edac_dev)
422 */
423static char *edac_op_state_toString(struct edac_device_ctl_info *edac_dev)
424{
425 int opstate = edac_dev->op_state;
426
427 if (opstate == OP_RUNNING_POLL)
428 return "POLLED";
429 else if (opstate == OP_RUNNING_INTERRUPT)
430 return "INTERRUPT";
431 else if (opstate == OP_RUNNING_POLL_INTR)
432 return "POLL-INTR";
433 else if (opstate == OP_ALLOC)
434 return "ALLOC";
435 else if (opstate == OP_OFFLINE)
436 return "OFFLINE";
437
438 return "UNKNOWN";
439}
440
441/**
442 * edac_device_add_device: Insert the 'edac_dev' structure into the
443 * edac_device global list and create sysfs entries associated with
444 * edac_device structure.
445 * @edac_device: pointer to the edac_device structure to be added to the list
446 * @edac_idx: A unique numeric identifier to be assigned to the
447 * 'edac_device' structure.
448 *
449 * Return:
450 * 0 Success
451 * !0 Failure
452 */
453int edac_device_add_device(struct edac_device_ctl_info *edac_dev, int edac_idx)
454{
455 debugf0("%s()\n", __func__);
456
457 edac_dev->dev_idx = edac_idx;
458#ifdef CONFIG_EDAC_DEBUG
459 if (edac_debug_level >= 3)
460 edac_device_dump_device(edac_dev);
461#endif
462 lock_device_list();
463
464 if (add_edac_dev_to_global_list(edac_dev))
465 goto fail0;
466
467 /* set load time so that error rate can be tracked */
468 edac_dev->start_time = jiffies;
469
470 /* create this instance's sysfs entries */
471 if (edac_device_create_sysfs(edac_dev)) {
472 edac_device_printk(edac_dev, KERN_WARNING,
473 "failed to create sysfs device\n");
474 goto fail1;
475 }
476
477 /* If there IS a check routine, then we are running POLLED */
478 if (edac_dev->edac_check != NULL) {
479 /* This instance is NOW RUNNING */
480 edac_dev->op_state = OP_RUNNING_POLL;
481
482 /* enable workq processing on this instance, default = 1000 msec */
483 edac_workq_setup(edac_dev, 1000);
484 } else {
485 edac_dev->op_state = OP_RUNNING_INTERRUPT;
486 }
487
488
489 /* Report action taken */
490 edac_device_printk(edac_dev, KERN_INFO,
491 "Giving out device to module '%s' controller '%s': DEV '%s' (%s)\n",
492 edac_dev->mod_name,
493 edac_dev->ctl_name,
494 dev_name(edac_dev->dev),
495 edac_op_state_toString(edac_dev)
496 );
497
498 unlock_device_list();
499 return 0;
500
501fail1:
502 /* Some error, so remove the entry from the lsit */
503 del_edac_device_from_global_list(edac_dev);
504
505fail0:
506 unlock_device_list();
507 return 1;
508}
509EXPORT_SYMBOL_GPL(edac_device_add_device);
510
511/**
512 * edac_device_del_device:
513 * Remove sysfs entries for specified edac_device structure and
514 * then remove edac_device structure from global list
515 *
516 * @pdev:
517 * Pointer to 'struct device' representing edac_device
518 * structure to remove.
519 *
520 * Return:
521 * Pointer to removed edac_device structure,
522 * OR NULL if device not found.
523 */
524struct edac_device_ctl_info * edac_device_del_device(struct device *dev)
525{
526 struct edac_device_ctl_info *edac_dev;
527
528 debugf0("MC: %s()\n", __func__);
529
530 lock_device_list();
531
532 if ((edac_dev = find_edac_device_by_dev(dev)) == NULL) {
533 unlock_device_list();
534 return NULL;
535 }
536
537 /* mark this instance as OFFLINE */
538 edac_dev->op_state = OP_OFFLINE;
539
540 /* clear workq processing on this instance */
541 edac_workq_teardown(edac_dev);
542
543 /* Tear down the sysfs entries for this instance */
544 edac_device_remove_sysfs(edac_dev);
545
546 /* deregister from global list */
547 del_edac_device_from_global_list(edac_dev);
548
549 unlock_device_list();
550
551 edac_printk(KERN_INFO, EDAC_MC,
552 "Removed device %d for %s %s: DEV %s\n",
553 edac_dev->dev_idx,
554 edac_dev->mod_name,
555 edac_dev->ctl_name,
556 dev_name(edac_dev->dev));
557
558 return edac_dev;
559}
560EXPORT_SYMBOL_GPL(edac_device_del_device);
561
562
563static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev)
564{
565 return edac_dev->log_ce;
566}
567
568static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev)
569{
570 return edac_dev->log_ue;
571}
572
573static inline int edac_device_get_panic_on_ue(
574 struct edac_device_ctl_info *edac_dev)
575{
576 return edac_dev->panic_on_ue;
577}
578
579/*
580 * edac_device_handle_ce
581 * perform a common output and handling of an 'edac_dev' CE event
582 */
583void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
584 int inst_nr, int block_nr, const char *msg)
585{
586 struct edac_device_instance *instance;
587 struct edac_device_block *block = NULL;
588
589 if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
590 edac_device_printk(edac_dev, KERN_ERR,
591 "INTERNAL ERROR: 'instance' out of range "
592 "(%d >= %d)\n", inst_nr, edac_dev->nr_instances);
593 return;
594 }
595
596 instance = edac_dev->instances + inst_nr;
597
598 if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
599 edac_device_printk(edac_dev, KERN_ERR,
600 "INTERNAL ERROR: instance %d 'block' out of range "
601 "(%d >= %d)\n", inst_nr, block_nr, instance->nr_blocks);
602 return;
603 }
604
605 if (instance->nr_blocks > 0) {
606 block = instance->blocks + block_nr;
607 block->counters.ce_count++;
608 }
609
610 /* Propogate the count up the 'totals' tree */
611 instance->counters.ce_count++;
612 edac_dev->counters.ce_count++;
613
614 if (edac_device_get_log_ce(edac_dev))
615 edac_device_printk(edac_dev, KERN_WARNING,
616 "CE ctl: %s, instance: %s, block: %s: %s\n",
617 edac_dev->ctl_name, instance->name,
618 block ? block->name : "N/A", msg);
619}
620EXPORT_SYMBOL_GPL(edac_device_handle_ce);
621
622/*
623 * edac_device_handle_ue
624 * perform a common output and handling of an 'edac_dev' UE event
625 */
626void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
627 int inst_nr, int block_nr, const char *msg)
628{
629 struct edac_device_instance *instance;
630 struct edac_device_block *block = NULL;
631
632 if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
633 edac_device_printk(edac_dev, KERN_ERR,
634 "INTERNAL ERROR: 'instance' out of range "
635 "(%d >= %d)\n", inst_nr, edac_dev->nr_instances);
636 return;
637 }
638
639 instance = edac_dev->instances + inst_nr;
640
641 if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
642 edac_device_printk(edac_dev, KERN_ERR,
643 "INTERNAL ERROR: instance %d 'block' out of range "
644 "(%d >= %d)\n", inst_nr, block_nr, instance->nr_blocks);
645 return;
646 }
647
648 if (instance->nr_blocks > 0) {
649 block = instance->blocks + block_nr;
650 block->counters.ue_count++;
651 }
652
653 /* Propogate the count up the 'totals' tree */
654 instance->counters.ue_count++;
655 edac_dev->counters.ue_count++;
656
657 if (edac_device_get_log_ue(edac_dev))
658 edac_device_printk(edac_dev, KERN_EMERG,
659 "UE ctl: %s, instance: %s, block: %s: %s\n",
660 edac_dev->ctl_name, instance->name,
661 block ? block->name : "N/A", msg);
662
663 if (edac_device_get_panic_on_ue(edac_dev))
664 panic("EDAC %s: UE instance: %s, block %s: %s\n",
665 edac_dev->ctl_name, instance->name,
666 block ? block->name : "N/A", msg);
667}
668EXPORT_SYMBOL_GPL(edac_device_handle_ue);
669