diff options
-rw-r--r-- | Documentation/edac.txt | 151 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 117 |
2 files changed, 73 insertions, 195 deletions
diff --git a/Documentation/edac.txt b/Documentation/edac.txt index a5c36842ece..ced52738800 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt | |||
@@ -222,74 +222,9 @@ both csrow2 and csrow3 are populated, this indicates a dual ranked | |||
222 | set of DIMMs for channels 0 and 1. | 222 | set of DIMMs for channels 0 and 1. |
223 | 223 | ||
224 | 224 | ||
225 | Within each of the 'mc','mcX' and 'csrowX' directories are several | 225 | Within each of the 'mcX' and 'csrowX' directories are several |
226 | EDAC control and attribute files. | 226 | EDAC control and attribute files. |
227 | 227 | ||
228 | |||
229 | ============================================================================ | ||
230 | DIRECTORY 'mc' | ||
231 | |||
232 | In directory 'mc' are EDAC system overall control and attribute files: | ||
233 | |||
234 | |||
235 | Panic on UE control file: | ||
236 | |||
237 | 'edac_mc_panic_on_ue' | ||
238 | |||
239 | An uncorrectable error will cause a machine panic. This is usually | ||
240 | desirable. It is a bad idea to continue when an uncorrectable error | ||
241 | occurs - it is indeterminate what was uncorrected and the operating | ||
242 | system context might be so mangled that continuing will lead to further | ||
243 | corruption. If the kernel has MCE configured, then EDAC will never | ||
244 | notice the UE. | ||
245 | |||
246 | LOAD TIME: module/kernel parameter: panic_on_ue=[0|1] | ||
247 | |||
248 | RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue | ||
249 | |||
250 | |||
251 | Log UE control file: | ||
252 | |||
253 | 'edac_mc_log_ue' | ||
254 | |||
255 | Generate kernel messages describing uncorrectable errors. These errors | ||
256 | are reported through the system message log system. UE statistics | ||
257 | will be accumulated even when UE logging is disabled. | ||
258 | |||
259 | LOAD TIME: module/kernel parameter: log_ue=[0|1] | ||
260 | |||
261 | RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue | ||
262 | |||
263 | |||
264 | Log CE control file: | ||
265 | |||
266 | 'edac_mc_log_ce' | ||
267 | |||
268 | Generate kernel messages describing correctable errors. These | ||
269 | errors are reported through the system message log system. | ||
270 | CE statistics will be accumulated even when CE logging is disabled. | ||
271 | |||
272 | LOAD TIME: module/kernel parameter: log_ce=[0|1] | ||
273 | |||
274 | RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce | ||
275 | |||
276 | |||
277 | Polling period control file: | ||
278 | |||
279 | 'edac_mc_poll_msec' | ||
280 | |||
281 | The time period, in milliseconds, for polling for error information. | ||
282 | Too small a value wastes resources. Too large a value might delay | ||
283 | necessary handling of errors and might loose valuable information for | ||
284 | locating the error. 1000 milliseconds (once each second) is the current | ||
285 | default. Systems which require all the bandwidth they can get, may | ||
286 | increase this. | ||
287 | |||
288 | LOAD TIME: module/kernel parameter: poll_msec=[0|1] | ||
289 | |||
290 | RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec | ||
291 | |||
292 | |||
293 | ============================================================================ | 228 | ============================================================================ |
294 | 'mcX' DIRECTORIES | 229 | 'mcX' DIRECTORIES |
295 | 230 | ||
@@ -537,7 +472,6 @@ Channel 1 DIMM Label control file: | |||
537 | motherboard specific and determination of this information | 472 | motherboard specific and determination of this information |
538 | must occur in userland at this time. | 473 | must occur in userland at this time. |
539 | 474 | ||
540 | |||
541 | ============================================================================ | 475 | ============================================================================ |
542 | SYSTEM LOGGING | 476 | SYSTEM LOGGING |
543 | 477 | ||
@@ -570,7 +504,6 @@ error type, a notice of "no info" and then an optional, | |||
570 | driver-specific error message. | 504 | driver-specific error message. |
571 | 505 | ||
572 | 506 | ||
573 | |||
574 | ============================================================================ | 507 | ============================================================================ |
575 | PCI Bus Parity Detection | 508 | PCI Bus Parity Detection |
576 | 509 | ||
@@ -604,6 +537,74 @@ Enable/Disable PCI Parity checking control file: | |||
604 | echo "0" >/sys/devices/system/edac/pci/check_pci_parity | 537 | echo "0" >/sys/devices/system/edac/pci/check_pci_parity |
605 | 538 | ||
606 | 539 | ||
540 | Parity Count: | ||
541 | |||
542 | 'pci_parity_count' | ||
543 | |||
544 | This attribute file will display the number of parity errors that | ||
545 | have been detected. | ||
546 | |||
547 | |||
548 | ============================================================================ | ||
549 | MODULE PARAMETERS | ||
550 | |||
551 | Panic on UE control file: | ||
552 | |||
553 | 'edac_mc_panic_on_ue' | ||
554 | |||
555 | An uncorrectable error will cause a machine panic. This is usually | ||
556 | desirable. It is a bad idea to continue when an uncorrectable error | ||
557 | occurs - it is indeterminate what was uncorrected and the operating | ||
558 | system context might be so mangled that continuing will lead to further | ||
559 | corruption. If the kernel has MCE configured, then EDAC will never | ||
560 | notice the UE. | ||
561 | |||
562 | LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1] | ||
563 | |||
564 | RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue | ||
565 | |||
566 | |||
567 | Log UE control file: | ||
568 | |||
569 | 'edac_mc_log_ue' | ||
570 | |||
571 | Generate kernel messages describing uncorrectable errors. These errors | ||
572 | are reported through the system message log system. UE statistics | ||
573 | will be accumulated even when UE logging is disabled. | ||
574 | |||
575 | LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1] | ||
576 | |||
577 | RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue | ||
578 | |||
579 | |||
580 | Log CE control file: | ||
581 | |||
582 | 'edac_mc_log_ce' | ||
583 | |||
584 | Generate kernel messages describing correctable errors. These | ||
585 | errors are reported through the system message log system. | ||
586 | CE statistics will be accumulated even when CE logging is disabled. | ||
587 | |||
588 | LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1] | ||
589 | |||
590 | RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce | ||
591 | |||
592 | |||
593 | Polling period control file: | ||
594 | |||
595 | 'edac_mc_poll_msec' | ||
596 | |||
597 | The time period, in milliseconds, for polling for error information. | ||
598 | Too small a value wastes resources. Too large a value might delay | ||
599 | necessary handling of errors and might loose valuable information for | ||
600 | locating the error. 1000 milliseconds (once each second) is the current | ||
601 | default. Systems which require all the bandwidth they can get, may | ||
602 | increase this. | ||
603 | |||
604 | LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1] | ||
605 | |||
606 | RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec | ||
607 | |||
607 | 608 | ||
608 | Panic on PCI PARITY Error: | 609 | Panic on PCI PARITY Error: |
609 | 610 | ||
@@ -614,21 +615,13 @@ Panic on PCI PARITY Error: | |||
614 | error has been detected. | 615 | error has been detected. |
615 | 616 | ||
616 | 617 | ||
617 | module/kernel parameter: panic_on_pci_parity=[0|1] | 618 | module/kernel parameter: edac_panic_on_pci_pe=[0|1] |
618 | 619 | ||
619 | Enable: | 620 | Enable: |
620 | echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity | 621 | echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe |
621 | 622 | ||
622 | Disable: | 623 | Disable: |
623 | echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity | 624 | echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe |
624 | |||
625 | |||
626 | Parity Count: | ||
627 | |||
628 | 'pci_parity_count' | ||
629 | |||
630 | This attribute file will display the number of parity errors that | ||
631 | have been detected. | ||
632 | 625 | ||
633 | 626 | ||
634 | 627 | ||
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 7bb9c1532b9..cbe1a17e42f 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c | |||
@@ -123,16 +123,6 @@ static const char *edac_caps[] = { | |||
123 | 123 | ||
124 | 124 | ||
125 | 125 | ||
126 | /* | ||
127 | * /sys/devices/system/edac/mc; | ||
128 | * data structures and methods | ||
129 | */ | ||
130 | static ssize_t memctrl_int_show(void *ptr, char *buffer) | ||
131 | { | ||
132 | int *value = (int *)ptr; | ||
133 | return sprintf(buffer, "%u\n", *value); | ||
134 | } | ||
135 | |||
136 | static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) | 126 | static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) |
137 | { | 127 | { |
138 | int *value = (int *)ptr; | 128 | int *value = (int *)ptr; |
@@ -143,23 +133,6 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) | |||
143 | return count; | 133 | return count; |
144 | } | 134 | } |
145 | 135 | ||
146 | /* | ||
147 | * mc poll_msec time value | ||
148 | */ | ||
149 | static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count) | ||
150 | { | ||
151 | int *value = (int *)ptr; | ||
152 | |||
153 | if (isdigit(*buffer)) { | ||
154 | *value = simple_strtoul(buffer, NULL, 0); | ||
155 | |||
156 | /* notify edac_mc engine to reset the poll period */ | ||
157 | edac_mc_reset_delay_period(*value); | ||
158 | } | ||
159 | |||
160 | return count; | ||
161 | } | ||
162 | |||
163 | 136 | ||
164 | /* EDAC sysfs CSROW data structures and methods | 137 | /* EDAC sysfs CSROW data structures and methods |
165 | */ | 138 | */ |
@@ -669,98 +642,10 @@ static struct kobj_type ktype_mci = { | |||
669 | .default_attrs = (struct attribute **)mci_attr, | 642 | .default_attrs = (struct attribute **)mci_attr, |
670 | }; | 643 | }; |
671 | 644 | ||
672 | /* show/store, tables, etc for the MC kset */ | ||
673 | |||
674 | |||
675 | struct memctrl_dev_attribute { | ||
676 | struct attribute attr; | ||
677 | void *value; | ||
678 | ssize_t(*show) (void *, char *); | ||
679 | ssize_t(*store) (void *, const char *, size_t); | ||
680 | }; | ||
681 | |||
682 | /* Set of show/store abstract level functions for memory control object */ | ||
683 | static ssize_t memctrl_dev_show(struct kobject *kobj, | ||
684 | struct attribute *attr, char *buffer) | ||
685 | { | ||
686 | struct memctrl_dev_attribute *memctrl_dev; | ||
687 | memctrl_dev = (struct memctrl_dev_attribute *)attr; | ||
688 | |||
689 | if (memctrl_dev->show) | ||
690 | return memctrl_dev->show(memctrl_dev->value, buffer); | ||
691 | |||
692 | return -EIO; | ||
693 | } | ||
694 | |||
695 | static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, | ||
696 | const char *buffer, size_t count) | ||
697 | { | ||
698 | struct memctrl_dev_attribute *memctrl_dev; | ||
699 | memctrl_dev = (struct memctrl_dev_attribute *)attr; | ||
700 | |||
701 | if (memctrl_dev->store) | ||
702 | return memctrl_dev->store(memctrl_dev->value, buffer, count); | ||
703 | |||
704 | return -EIO; | ||
705 | } | ||
706 | |||
707 | static struct sysfs_ops memctrlfs_ops = { | ||
708 | .show = memctrl_dev_show, | ||
709 | .store = memctrl_dev_store | ||
710 | }; | ||
711 | |||
712 | #define MEMCTRL_ATTR(_name, _mode, _show, _store) \ | ||
713 | static struct memctrl_dev_attribute attr_##_name = { \ | ||
714 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
715 | .value = &_name, \ | ||
716 | .show = _show, \ | ||
717 | .store = _store, \ | ||
718 | }; | ||
719 | |||
720 | #define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \ | ||
721 | static struct memctrl_dev_attribute attr_##_name = { \ | ||
722 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
723 | .value = _data, \ | ||
724 | .show = _show, \ | ||
725 | .store = _store, \ | ||
726 | }; | ||
727 | |||
728 | /* csrow<id> control files */ | ||
729 | MEMCTRL_ATTR(edac_mc_panic_on_ue, | ||
730 | S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); | ||
731 | |||
732 | MEMCTRL_ATTR(edac_mc_log_ue, | ||
733 | S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); | ||
734 | |||
735 | MEMCTRL_ATTR(edac_mc_log_ce, | ||
736 | S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); | ||
737 | |||
738 | MEMCTRL_ATTR(edac_mc_poll_msec, | ||
739 | S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store); | ||
740 | |||
741 | /* Base Attributes of the memory ECC object */ | ||
742 | static struct memctrl_dev_attribute *memctrl_attr[] = { | ||
743 | &attr_edac_mc_panic_on_ue, | ||
744 | &attr_edac_mc_log_ue, | ||
745 | &attr_edac_mc_log_ce, | ||
746 | &attr_edac_mc_poll_msec, | ||
747 | NULL, | ||
748 | }; | ||
749 | |||
750 | |||
751 | /* the ktype for the mc_kset internal kobj */ | ||
752 | static struct kobj_type ktype_mc_set_attribs = { | ||
753 | .sysfs_ops = &memctrlfs_ops, | ||
754 | .default_attrs = (struct attribute **)memctrl_attr, | ||
755 | }; | ||
756 | |||
757 | /* EDAC memory controller sysfs kset: | 645 | /* EDAC memory controller sysfs kset: |
758 | * /sys/devices/system/edac/mc | 646 | * /sys/devices/system/edac/mc |
759 | */ | 647 | */ |
760 | static struct kset mc_kset = { | 648 | static struct kset mc_kset; |
761 | .kobj = {.ktype = &ktype_mc_set_attribs }, | ||
762 | }; | ||
763 | |||
764 | 649 | ||
765 | /* | 650 | /* |
766 | * edac_mc_register_sysfs_main_kobj | 651 | * edac_mc_register_sysfs_main_kobj |