diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 23:42:33 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 23:42:33 -0500 |
| commit | ad6c2c2eb34f234d6253292b9b3c047614fbfe7e (patch) | |
| tree | 8ceb00db9874c09f3002b5ca579f1f9146b30a28 /include/linux | |
| parent | 19cc90f58d4f2538b4cf5371681a057d2e5209f2 (diff) | |
| parent | b0769891ba7baa53f270dc70d71934748beb4c5b (diff) | |
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC fixes and ghes-edac from Mauro Carvalho Chehab:
"For:
- Some fixes at edac drivers (i7core_edac, sb_edac, i3200_edac);
- error injection support for i5100, when EDAC debug is enabled;
- fix edac when it is loaded builtin (early init for the subsystem);
- a "Firmware First" EDAC driver, allowing ghes to report errors via
EDAC (ghes-edac).
With regards to ghes-edac, this fixes a longstanding BZ at Red Hat
that happens with Nehalem and Sandy Bridge CPUs: when both GHES and
i7core_edac or sb_edac are running, the error reports are
unpredictable, as both BIOS and OS race to access the registers. With
ghes-edac, the EDAC core will refuse to register any other concurrent
memory error driver.
This patchset moves the ghes struct definitions to a separate header
file (include/acpi/ghes.h) and adds 3 hooks at apei/ghes.c to
register/unregister and to report errors via ghes-edac. Those changes
were acked by ghes driver maintainer (Huang)."
* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (30 commits)
i5100_edac: convert to use simple_open()
ghes_edac: fix to use list_for_each_entry_safe() when delete list items
ghes_edac: Fix RAS tracing
ghes_edac: Make it compliant with UEFI spec 2.3.1
ghes_edac: Improve driver's printk messages
ghes_edac: Don't credit the same memory dimm twice
ghes_edac: do a better job of filling EDAC DIMM info
ghes_edac: add support for reporting errors via EDAC
ghes_edac: Register at EDAC core the BIOS report
ghes: add the needed hooks for EDAC error report
ghes: move structures/enum to a header file
edac: add support for error type "Info"
edac: add support for raw error reports
edac: reduce stack pressure by using a pre-allocated buffer
edac: lock module owner to avoid error report conflicts
edac: remove proc_name from mci structure
edac: add a new memory layer type
edac: initialize the core earlier
edac: better report error conditions in debug mode
i5100_edac: Remove two checkpatch warnings
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/edac.h | 79 | ||||
| -rw-r--r-- | include/linux/pci_ids.h | 1 |
2 files changed, 77 insertions, 3 deletions
diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b8c02b36f76..4fd4999ccb5b 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | 14 | ||
| 15 | #include <linux/atomic.h> | 15 | #include <linux/atomic.h> |
| 16 | #include <linux/device.h> | 16 | #include <linux/device.h> |
| 17 | #include <linux/kobject.h> | ||
| 18 | #include <linux/completion.h> | 17 | #include <linux/completion.h> |
| 19 | #include <linux/workqueue.h> | 18 | #include <linux/workqueue.h> |
| 20 | #include <linux/debugfs.h> | 19 | #include <linux/debugfs.h> |
| @@ -48,8 +47,17 @@ static inline void opstate_init(void) | |||
| 48 | return; | 47 | return; |
| 49 | } | 48 | } |
| 50 | 49 | ||
| 50 | /* Max length of a DIMM label*/ | ||
| 51 | #define EDAC_MC_LABEL_LEN 31 | 51 | #define EDAC_MC_LABEL_LEN 31 |
| 52 | #define MC_PROC_NAME_MAX_LEN 7 | 52 | |
| 53 | /* Maximum size of the location string */ | ||
| 54 | #define LOCATION_SIZE 80 | ||
| 55 | |||
| 56 | /* Defines the maximum number of labels that can be reported */ | ||
| 57 | #define EDAC_MAX_LABELS 8 | ||
| 58 | |||
| 59 | /* String used to join two or more labels */ | ||
| 60 | #define OTHER_LABEL " or " | ||
| 53 | 61 | ||
| 54 | /** | 62 | /** |
| 55 | * enum dev_type - describe the type of memory DRAM chips used at the stick | 63 | * enum dev_type - describe the type of memory DRAM chips used at the stick |
| @@ -101,8 +109,24 @@ enum hw_event_mc_err_type { | |||
| 101 | HW_EVENT_ERR_CORRECTED, | 109 | HW_EVENT_ERR_CORRECTED, |
| 102 | HW_EVENT_ERR_UNCORRECTED, | 110 | HW_EVENT_ERR_UNCORRECTED, |
| 103 | HW_EVENT_ERR_FATAL, | 111 | HW_EVENT_ERR_FATAL, |
| 112 | HW_EVENT_ERR_INFO, | ||
| 104 | }; | 113 | }; |
| 105 | 114 | ||
| 115 | static inline char *mc_event_error_type(const unsigned int err_type) | ||
| 116 | { | ||
| 117 | switch (err_type) { | ||
| 118 | case HW_EVENT_ERR_CORRECTED: | ||
| 119 | return "Corrected"; | ||
| 120 | case HW_EVENT_ERR_UNCORRECTED: | ||
| 121 | return "Uncorrected"; | ||
| 122 | case HW_EVENT_ERR_FATAL: | ||
| 123 | return "Fatal"; | ||
| 124 | default: | ||
| 125 | case HW_EVENT_ERR_INFO: | ||
| 126 | return "Info"; | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 106 | /** | 130 | /** |
| 107 | * enum mem_type - memory types. For a more detailed reference, please see | 131 | * enum mem_type - memory types. For a more detailed reference, please see |
| 108 | * http://en.wikipedia.org/wiki/DRAM | 132 | * http://en.wikipedia.org/wiki/DRAM |
| @@ -376,6 +400,9 @@ enum scrub_type { | |||
| 376 | * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" | 400 | * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" |
| 377 | * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" | 401 | * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" |
| 378 | * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" | 402 | * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" |
| 403 | * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped | ||
| 404 | * as a single memory area. This is used when | ||
| 405 | * retrieving errors from a firmware driven driver. | ||
| 379 | * | 406 | * |
| 380 | * This enum is used by the drivers to tell edac_mc_sysfs what name should | 407 | * This enum is used by the drivers to tell edac_mc_sysfs what name should |
| 381 | * be used when describing a memory stick location. | 408 | * be used when describing a memory stick location. |
| @@ -385,6 +412,7 @@ enum edac_mc_layer_type { | |||
| 385 | EDAC_MC_LAYER_CHANNEL, | 412 | EDAC_MC_LAYER_CHANNEL, |
| 386 | EDAC_MC_LAYER_SLOT, | 413 | EDAC_MC_LAYER_SLOT, |
| 387 | EDAC_MC_LAYER_CHIP_SELECT, | 414 | EDAC_MC_LAYER_CHIP_SELECT, |
| 415 | EDAC_MC_LAYER_ALL_MEM, | ||
| 388 | }; | 416 | }; |
| 389 | 417 | ||
| 390 | /** | 418 | /** |
| @@ -551,6 +579,46 @@ struct errcount_attribute_data { | |||
| 551 | int layer0, layer1, layer2; | 579 | int layer0, layer1, layer2; |
| 552 | }; | 580 | }; |
| 553 | 581 | ||
| 582 | /** | ||
| 583 | * edac_raw_error_desc - Raw error report structure | ||
| 584 | * @grain: minimum granularity for an error report, in bytes | ||
| 585 | * @error_count: number of errors of the same type | ||
| 586 | * @top_layer: top layer of the error (layer[0]) | ||
| 587 | * @mid_layer: middle layer of the error (layer[1]) | ||
| 588 | * @low_layer: low layer of the error (layer[2]) | ||
| 589 | * @page_frame_number: page where the error happened | ||
| 590 | * @offset_in_page: page offset | ||
| 591 | * @syndrome: syndrome of the error (or 0 if unknown or if | ||
| 592 | * the syndrome is not applicable) | ||
| 593 | * @msg: error message | ||
| 594 | * @location: location of the error | ||
| 595 | * @label: label of the affected DIMM(s) | ||
| 596 | * @other_detail: other driver-specific detail about the error | ||
| 597 | * @enable_per_layer_report: if false, the error affects all layers | ||
| 598 | * (typically, a memory controller error) | ||
| 599 | */ | ||
| 600 | struct edac_raw_error_desc { | ||
| 601 | /* | ||
| 602 | * NOTE: everything before grain won't be cleaned by | ||
| 603 | * edac_raw_error_desc_clean() | ||
| 604 | */ | ||
| 605 | char location[LOCATION_SIZE]; | ||
| 606 | char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; | ||
| 607 | long grain; | ||
| 608 | |||
| 609 | /* the vars below and grain will be cleaned on every new error report */ | ||
| 610 | u16 error_count; | ||
| 611 | int top_layer; | ||
| 612 | int mid_layer; | ||
| 613 | int low_layer; | ||
| 614 | unsigned long page_frame_number; | ||
| 615 | unsigned long offset_in_page; | ||
| 616 | unsigned long syndrome; | ||
| 617 | const char *msg; | ||
| 618 | const char *other_detail; | ||
| 619 | bool enable_per_layer_report; | ||
| 620 | }; | ||
| 621 | |||
| 554 | /* MEMORY controller information structure | 622 | /* MEMORY controller information structure |
| 555 | */ | 623 | */ |
| 556 | struct mem_ctl_info { | 624 | struct mem_ctl_info { |
| @@ -630,7 +698,6 @@ struct mem_ctl_info { | |||
| 630 | const char *mod_ver; | 698 | const char *mod_ver; |
| 631 | const char *ctl_name; | 699 | const char *ctl_name; |
| 632 | const char *dev_name; | 700 | const char *dev_name; |
| 633 | char proc_name[MC_PROC_NAME_MAX_LEN + 1]; | ||
| 634 | void *pvt_info; | 701 | void *pvt_info; |
| 635 | unsigned long start_time; /* mci load start time (in jiffies) */ | 702 | unsigned long start_time; /* mci load start time (in jiffies) */ |
| 636 | 703 | ||
| @@ -659,6 +726,12 @@ struct mem_ctl_info { | |||
| 659 | /* work struct for this MC */ | 726 | /* work struct for this MC */ |
| 660 | struct delayed_work work; | 727 | struct delayed_work work; |
| 661 | 728 | ||
| 729 | /* | ||
| 730 | * Used to report an error - by being at the global struct | ||
| 731 | * makes the memory allocated by the EDAC core | ||
| 732 | */ | ||
| 733 | struct edac_raw_error_desc error_desc; | ||
| 734 | |||
| 662 | /* the internal state of this controller instance */ | 735 | /* the internal state of this controller instance */ |
| 663 | int op_state; | 736 | int op_state; |
| 664 | 737 | ||
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 31717bd287fd..f11c1c2609d5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h | |||
| @@ -2802,6 +2802,7 @@ | |||
| 2802 | #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 | 2802 | #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 |
| 2803 | #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f | 2803 | #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f |
| 2804 | #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 | 2804 | #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 |
| 2805 | #define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 | ||
| 2805 | #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 | 2806 | #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 |
| 2806 | #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 | 2807 | #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 |
| 2807 | #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 | 2808 | #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 |
