diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 23:42:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 23:42:33 -0500 |
commit | ad6c2c2eb34f234d6253292b9b3c047614fbfe7e (patch) | |
tree | 8ceb00db9874c09f3002b5ca579f1f9146b30a28 /drivers/edac | |
parent | 19cc90f58d4f2538b4cf5371681a057d2e5209f2 (diff) | |
parent | b0769891ba7baa53f270dc70d71934748beb4c5b (diff) |
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC fixes and ghes-edac from Mauro Carvalho Chehab:
"For:
- Some fixes at edac drivers (i7core_edac, sb_edac, i3200_edac);
- error injection support for i5100, when EDAC debug is enabled;
- fix edac when it is loaded builtin (early init for the subsystem);
- a "Firmware First" EDAC driver, allowing ghes to report errors via
EDAC (ghes-edac).
With regards to ghes-edac, this fixes a longstanding BZ at Red Hat
that happens with Nehalem and Sandy Bridge CPUs: when both GHES and
i7core_edac or sb_edac are running, the error reports are
unpredictable, as both BIOS and OS race to access the registers. With
ghes-edac, the EDAC core will refuse to register any other concurrent
memory error driver.
This patchset moves the ghes struct definitions to a separate header
file (include/acpi/ghes.h) and adds 3 hooks at apei/ghes.c to
register/unregister and to report errors via ghes-edac. Those changes
were acked by ghes driver maintainer (Huang)."
* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (30 commits)
i5100_edac: convert to use simple_open()
ghes_edac: fix to use list_for_each_entry_safe() when delete list items
ghes_edac: Fix RAS tracing
ghes_edac: Make it compliant with UEFI spec 2.3.1
ghes_edac: Improve driver's printk messages
ghes_edac: Don't credit the same memory dimm twice
ghes_edac: do a better job of filling EDAC DIMM info
ghes_edac: add support for reporting errors via EDAC
ghes_edac: Register at EDAC core the BIOS report
ghes: add the needed hooks for EDAC error report
ghes: move structures/enum to a header file
edac: add support for error type "Info"
edac: add support for raw error reports
edac: reduce stack pressure by using a pre-allocated buffer
edac: lock module owner to avoid error report conflicts
edac: remove proc_name from mci structure
edac: add a new memory layer type
edac: initialize the core earlier
edac: better report error conditions in debug mode
i5100_edac: Remove two checkpatch warnings
...
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 23 | ||||
-rw-r--r-- | drivers/edac/Makefile | 1 | ||||
-rw-r--r-- | drivers/edac/edac_core.h | 5 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 152 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 36 | ||||
-rw-r--r-- | drivers/edac/edac_module.c | 2 | ||||
-rw-r--r-- | drivers/edac/edac_pci_sysfs.c | 2 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 537 | ||||
-rw-r--r-- | drivers/edac/i3200_edac.c | 37 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 178 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 8 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 2 |
12 files changed, 899 insertions, 84 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index acb709bfac0f..e443f2c1dfd1 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
@@ -80,6 +80,29 @@ config EDAC_MM_EDAC | |||
80 | occurred so that a particular failing memory module can be | 80 | occurred so that a particular failing memory module can be |
81 | replaced. If unsure, select 'Y'. | 81 | replaced. If unsure, select 'Y'. |
82 | 82 | ||
83 | config EDAC_GHES | ||
84 | bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" | ||
85 | depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) | ||
86 | default y | ||
87 | help | ||
88 | Not all machines support hardware-driven error report. Some of those | ||
89 | provide a BIOS-driven error report mechanism via ACPI, using the | ||
90 | APEI/GHES driver. By enabling this option, the error reports provided | ||
91 | by GHES are sent to userspace via the EDAC API. | ||
92 | |||
93 | When this option is enabled, it will disable the hardware-driven | ||
94 | mechanisms, if a GHES BIOS is detected, entering into the | ||
95 | "Firmware First" mode. | ||
96 | |||
97 | It should be noticed that keeping both GHES and a hardware-driven | ||
98 | error mechanism won't work well, as BIOS will race with OS, while | ||
99 | reading the error registers. So, if you want to not use "Firmware | ||
100 | first" GHES error mechanism, you should disable GHES either at | ||
101 | compilation time or by passing "ghes.disable=1" Kernel parameter | ||
102 | at boot time. | ||
103 | |||
104 | In doubt, say 'Y'. | ||
105 | |||
83 | config EDAC_AMD64 | 106 | config EDAC_AMD64 |
84 | tristate "AMD64 (Opteron, Athlon64) K8, F10h" | 107 | tristate "AMD64 (Opteron, Athlon64) K8, F10h" |
85 | depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE | 108 | depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 5608a9ba61b7..4154ed6a02c6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
@@ -16,6 +16,7 @@ ifdef CONFIG_PCI | |||
16 | edac_core-y += edac_pci.o edac_pci_sysfs.o | 16 | edac_core-y += edac_pci.o edac_pci_sysfs.o |
17 | endif | 17 | endif |
18 | 18 | ||
19 | obj-$(CONFIG_EDAC_GHES) += ghes_edac.o | ||
19 | obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o | 20 | obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o |
20 | 21 | ||
21 | edac_mce_amd-y := mce_amd.o | 22 | edac_mce_amd-y := mce_amd.o |
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99fa44f1..3c2625e7980d 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h | |||
@@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); | |||
453 | extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); | 453 | extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); |
454 | extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, | 454 | extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, |
455 | unsigned long page); | 455 | unsigned long page); |
456 | |||
457 | void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, | ||
458 | struct mem_ctl_info *mci, | ||
459 | struct edac_raw_error_desc *e); | ||
460 | |||
456 | void edac_mc_handle_error(const enum hw_event_mc_err_type type, | 461 | void edac_mc_handle_error(const enum hw_event_mc_err_type type, |
457 | struct mem_ctl_info *mci, | 462 | struct mem_ctl_info *mci, |
458 | const u16 error_count, | 463 | const u16 error_count, |
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d1e9eb191f2b..cdb81aa73ab7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c | |||
@@ -42,6 +42,12 @@ | |||
42 | static DEFINE_MUTEX(mem_ctls_mutex); | 42 | static DEFINE_MUTEX(mem_ctls_mutex); |
43 | static LIST_HEAD(mc_devices); | 43 | static LIST_HEAD(mc_devices); |
44 | 44 | ||
45 | /* | ||
46 | * Used to lock EDAC MC to just one module, avoiding two drivers e. g. | ||
47 | * apei/ghes and i7core_edac to be used at the same time. | ||
48 | */ | ||
49 | static void const *edac_mc_owner; | ||
50 | |||
45 | unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, | 51 | unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, |
46 | unsigned len) | 52 | unsigned len) |
47 | { | 53 | { |
@@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, | |||
441 | 447 | ||
442 | mci->op_state = OP_ALLOC; | 448 | mci->op_state = OP_ALLOC; |
443 | 449 | ||
444 | /* at this point, the root kobj is valid, and in order to | ||
445 | * 'free' the object, then the function: | ||
446 | * edac_mc_unregister_sysfs_main_kobj() must be called | ||
447 | * which will perform kobj unregistration and the actual free | ||
448 | * will occur during the kobject callback operation | ||
449 | */ | ||
450 | |||
451 | return mci; | 450 | return mci; |
452 | 451 | ||
453 | error: | 452 | error: |
@@ -666,9 +665,9 @@ fail1: | |||
666 | return 1; | 665 | return 1; |
667 | } | 666 | } |
668 | 667 | ||
669 | static void del_mc_from_global_list(struct mem_ctl_info *mci) | 668 | static int del_mc_from_global_list(struct mem_ctl_info *mci) |
670 | { | 669 | { |
671 | atomic_dec(&edac_handlers); | 670 | int handlers = atomic_dec_return(&edac_handlers); |
672 | list_del_rcu(&mci->link); | 671 | list_del_rcu(&mci->link); |
673 | 672 | ||
674 | /* these are for safe removal of devices from global list while | 673 | /* these are for safe removal of devices from global list while |
@@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) | |||
676 | */ | 675 | */ |
677 | synchronize_rcu(); | 676 | synchronize_rcu(); |
678 | INIT_LIST_HEAD(&mci->link); | 677 | INIT_LIST_HEAD(&mci->link); |
678 | |||
679 | return handlers; | ||
679 | } | 680 | } |
680 | 681 | ||
681 | /** | 682 | /** |
@@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); | |||
719 | /* FIXME - should a warning be printed if no error detection? correction? */ | 720 | /* FIXME - should a warning be printed if no error detection? correction? */ |
720 | int edac_mc_add_mc(struct mem_ctl_info *mci) | 721 | int edac_mc_add_mc(struct mem_ctl_info *mci) |
721 | { | 722 | { |
723 | int ret = -EINVAL; | ||
722 | edac_dbg(0, "\n"); | 724 | edac_dbg(0, "\n"); |
723 | 725 | ||
724 | #ifdef CONFIG_EDAC_DEBUG | 726 | #ifdef CONFIG_EDAC_DEBUG |
@@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) | |||
749 | #endif | 751 | #endif |
750 | mutex_lock(&mem_ctls_mutex); | 752 | mutex_lock(&mem_ctls_mutex); |
751 | 753 | ||
754 | if (edac_mc_owner && edac_mc_owner != mci->mod_name) { | ||
755 | ret = -EPERM; | ||
756 | goto fail0; | ||
757 | } | ||
758 | |||
752 | if (add_mc_to_global_list(mci)) | 759 | if (add_mc_to_global_list(mci)) |
753 | goto fail0; | 760 | goto fail0; |
754 | 761 | ||
@@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) | |||
775 | edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" | 782 | edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" |
776 | " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); | 783 | " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); |
777 | 784 | ||
785 | edac_mc_owner = mci->mod_name; | ||
786 | |||
778 | mutex_unlock(&mem_ctls_mutex); | 787 | mutex_unlock(&mem_ctls_mutex); |
779 | return 0; | 788 | return 0; |
780 | 789 | ||
@@ -783,7 +792,7 @@ fail1: | |||
783 | 792 | ||
784 | fail0: | 793 | fail0: |
785 | mutex_unlock(&mem_ctls_mutex); | 794 | mutex_unlock(&mem_ctls_mutex); |
786 | return 1; | 795 | return ret; |
787 | } | 796 | } |
788 | EXPORT_SYMBOL_GPL(edac_mc_add_mc); | 797 | EXPORT_SYMBOL_GPL(edac_mc_add_mc); |
789 | 798 | ||
@@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) | |||
809 | return NULL; | 818 | return NULL; |
810 | } | 819 | } |
811 | 820 | ||
812 | del_mc_from_global_list(mci); | 821 | if (!del_mc_from_global_list(mci)) |
822 | edac_mc_owner = NULL; | ||
813 | mutex_unlock(&mem_ctls_mutex); | 823 | mutex_unlock(&mem_ctls_mutex); |
814 | 824 | ||
815 | /* flush workq processes */ | 825 | /* flush workq processes */ |
@@ -907,6 +917,7 @@ const char *edac_layer_name[] = { | |||
907 | [EDAC_MC_LAYER_CHANNEL] = "channel", | 917 | [EDAC_MC_LAYER_CHANNEL] = "channel", |
908 | [EDAC_MC_LAYER_SLOT] = "slot", | 918 | [EDAC_MC_LAYER_SLOT] = "slot", |
909 | [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", | 919 | [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", |
920 | [EDAC_MC_LAYER_ALL_MEM] = "memory", | ||
910 | }; | 921 | }; |
911 | EXPORT_SYMBOL_GPL(edac_layer_name); | 922 | EXPORT_SYMBOL_GPL(edac_layer_name); |
912 | 923 | ||
@@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci, | |||
1054 | edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); | 1065 | edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); |
1055 | } | 1066 | } |
1056 | 1067 | ||
1057 | #define OTHER_LABEL " or " | 1068 | /** |
1069 | * edac_raw_mc_handle_error - reports a memory event to userspace without doing | ||
1070 | * anything to discover the error location | ||
1071 | * | ||
1072 | * @type: severity of the error (CE/UE/Fatal) | ||
1073 | * @mci: a struct mem_ctl_info pointer | ||
1074 | * @e: error description | ||
1075 | * | ||
1076 | * This raw function is used internally by edac_mc_handle_error(). It should | ||
1077 | * only be called directly when the hardware error come directly from BIOS, | ||
1078 | * like in the case of APEI GHES driver. | ||
1079 | */ | ||
1080 | void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, | ||
1081 | struct mem_ctl_info *mci, | ||
1082 | struct edac_raw_error_desc *e) | ||
1083 | { | ||
1084 | char detail[80]; | ||
1085 | int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; | ||
1086 | |||
1087 | /* Memory type dependent details about the error */ | ||
1088 | if (type == HW_EVENT_ERR_CORRECTED) { | ||
1089 | snprintf(detail, sizeof(detail), | ||
1090 | "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", | ||
1091 | e->page_frame_number, e->offset_in_page, | ||
1092 | e->grain, e->syndrome); | ||
1093 | edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, | ||
1094 | detail, e->other_detail, e->enable_per_layer_report, | ||
1095 | e->page_frame_number, e->offset_in_page, e->grain); | ||
1096 | } else { | ||
1097 | snprintf(detail, sizeof(detail), | ||
1098 | "page:0x%lx offset:0x%lx grain:%ld", | ||
1099 | e->page_frame_number, e->offset_in_page, e->grain); | ||
1100 | |||
1101 | edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, | ||
1102 | detail, e->other_detail, e->enable_per_layer_report); | ||
1103 | } | ||
1104 | |||
1105 | |||
1106 | } | ||
1107 | EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); | ||
1058 | 1108 | ||
1059 | /** | 1109 | /** |
1060 | * edac_mc_handle_error - reports a memory event to userspace | 1110 | * edac_mc_handle_error - reports a memory event to userspace |
@@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1086 | const char *msg, | 1136 | const char *msg, |
1087 | const char *other_detail) | 1137 | const char *other_detail) |
1088 | { | 1138 | { |
1089 | /* FIXME: too much for stack: move it to some pre-alocated area */ | ||
1090 | char detail[80], location[80]; | ||
1091 | char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; | ||
1092 | char *p; | 1139 | char *p; |
1093 | int row = -1, chan = -1; | 1140 | int row = -1, chan = -1; |
1094 | int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; | 1141 | int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; |
1095 | int i; | 1142 | int i, n_labels = 0; |
1096 | long grain; | ||
1097 | bool enable_per_layer_report = false; | ||
1098 | u8 grain_bits; | 1143 | u8 grain_bits; |
1144 | struct edac_raw_error_desc *e = &mci->error_desc; | ||
1099 | 1145 | ||
1100 | edac_dbg(3, "MC%d\n", mci->mc_idx); | 1146 | edac_dbg(3, "MC%d\n", mci->mc_idx); |
1101 | 1147 | ||
1148 | /* Fills the error report buffer */ | ||
1149 | memset(e, 0, sizeof (*e)); | ||
1150 | e->error_count = error_count; | ||
1151 | e->top_layer = top_layer; | ||
1152 | e->mid_layer = mid_layer; | ||
1153 | e->low_layer = low_layer; | ||
1154 | e->page_frame_number = page_frame_number; | ||
1155 | e->offset_in_page = offset_in_page; | ||
1156 | e->syndrome = syndrome; | ||
1157 | e->msg = msg; | ||
1158 | e->other_detail = other_detail; | ||
1159 | |||
1102 | /* | 1160 | /* |
1103 | * Check if the event report is consistent and if the memory | 1161 | * Check if the event report is consistent and if the memory |
1104 | * location is known. If it is known, enable_per_layer_report will be | 1162 | * location is known. If it is known, enable_per_layer_report will be |
@@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1121 | pos[i] = -1; | 1179 | pos[i] = -1; |
1122 | } | 1180 | } |
1123 | if (pos[i] >= 0) | 1181 | if (pos[i] >= 0) |
1124 | enable_per_layer_report = true; | 1182 | e->enable_per_layer_report = true; |
1125 | } | 1183 | } |
1126 | 1184 | ||
1127 | /* | 1185 | /* |
@@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1135 | * where each memory belongs to a separate channel within the same | 1193 | * where each memory belongs to a separate channel within the same |
1136 | * branch. | 1194 | * branch. |
1137 | */ | 1195 | */ |
1138 | grain = 0; | 1196 | p = e->label; |
1139 | p = label; | ||
1140 | *p = '\0'; | 1197 | *p = '\0'; |
1141 | 1198 | ||
1142 | for (i = 0; i < mci->tot_dimms; i++) { | 1199 | for (i = 0; i < mci->tot_dimms; i++) { |
@@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1150 | continue; | 1207 | continue; |
1151 | 1208 | ||
1152 | /* get the max grain, over the error match range */ | 1209 | /* get the max grain, over the error match range */ |
1153 | if (dimm->grain > grain) | 1210 | if (dimm->grain > e->grain) |
1154 | grain = dimm->grain; | 1211 | e->grain = dimm->grain; |
1155 | 1212 | ||
1156 | /* | 1213 | /* |
1157 | * If the error is memory-controller wide, there's no need to | 1214 | * If the error is memory-controller wide, there's no need to |
@@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1159 | * channel/memory controller/... may be affected. | 1216 | * channel/memory controller/... may be affected. |
1160 | * Also, don't show errors for empty DIMM slots. | 1217 | * Also, don't show errors for empty DIMM slots. |
1161 | */ | 1218 | */ |
1162 | if (enable_per_layer_report && dimm->nr_pages) { | 1219 | if (e->enable_per_layer_report && dimm->nr_pages) { |
1163 | if (p != label) { | 1220 | if (n_labels >= EDAC_MAX_LABELS) { |
1221 | e->enable_per_layer_report = false; | ||
1222 | break; | ||
1223 | } | ||
1224 | n_labels++; | ||
1225 | if (p != e->label) { | ||
1164 | strcpy(p, OTHER_LABEL); | 1226 | strcpy(p, OTHER_LABEL); |
1165 | p += strlen(OTHER_LABEL); | 1227 | p += strlen(OTHER_LABEL); |
1166 | } | 1228 | } |
@@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1187 | } | 1249 | } |
1188 | } | 1250 | } |
1189 | 1251 | ||
1190 | if (!enable_per_layer_report) { | 1252 | if (!e->enable_per_layer_report) { |
1191 | strcpy(label, "any memory"); | 1253 | strcpy(e->label, "any memory"); |
1192 | } else { | 1254 | } else { |
1193 | edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); | 1255 | edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); |
1194 | if (p == label) | 1256 | if (p == e->label) |
1195 | strcpy(label, "unknown memory"); | 1257 | strcpy(e->label, "unknown memory"); |
1196 | if (type == HW_EVENT_ERR_CORRECTED) { | 1258 | if (type == HW_EVENT_ERR_CORRECTED) { |
1197 | if (row >= 0) { | 1259 | if (row >= 0) { |
1198 | mci->csrows[row]->ce_count += error_count; | 1260 | mci->csrows[row]->ce_count += error_count; |
@@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1205 | } | 1267 | } |
1206 | 1268 | ||
1207 | /* Fill the RAM location data */ | 1269 | /* Fill the RAM location data */ |
1208 | p = location; | 1270 | p = e->location; |
1209 | 1271 | ||
1210 | for (i = 0; i < mci->n_layers; i++) { | 1272 | for (i = 0; i < mci->n_layers; i++) { |
1211 | if (pos[i] < 0) | 1273 | if (pos[i] < 0) |
@@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1215 | edac_layer_name[mci->layers[i].type], | 1277 | edac_layer_name[mci->layers[i].type], |
1216 | pos[i]); | 1278 | pos[i]); |
1217 | } | 1279 | } |
1218 | if (p > location) | 1280 | if (p > e->location) |
1219 | *(p - 1) = '\0'; | 1281 | *(p - 1) = '\0'; |
1220 | 1282 | ||
1221 | /* Report the error via the trace interface */ | 1283 | /* Report the error via the trace interface */ |
1222 | grain_bits = fls_long(grain) + 1; | 1284 | grain_bits = fls_long(e->grain) + 1; |
1223 | trace_mc_event(type, msg, label, error_count, | 1285 | trace_mc_event(type, e->msg, e->label, e->error_count, |
1224 | mci->mc_idx, top_layer, mid_layer, low_layer, | 1286 | mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, |
1225 | PAGES_TO_MiB(page_frame_number) | offset_in_page, | 1287 | PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, |
1226 | grain_bits, syndrome, other_detail); | 1288 | grain_bits, e->syndrome, e->other_detail); |
1227 | 1289 | ||
1228 | /* Memory type dependent details about the error */ | 1290 | edac_raw_mc_handle_error(type, mci, e); |
1229 | if (type == HW_EVENT_ERR_CORRECTED) { | ||
1230 | snprintf(detail, sizeof(detail), | ||
1231 | "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", | ||
1232 | page_frame_number, offset_in_page, | ||
1233 | grain, syndrome); | ||
1234 | edac_ce_error(mci, error_count, pos, msg, location, label, | ||
1235 | detail, other_detail, enable_per_layer_report, | ||
1236 | page_frame_number, offset_in_page, grain); | ||
1237 | } else { | ||
1238 | snprintf(detail, sizeof(detail), | ||
1239 | "page:0x%lx offset:0x%lx grain:%ld", | ||
1240 | page_frame_number, offset_in_page, grain); | ||
1241 | |||
1242 | edac_ue_error(mci, error_count, pos, msg, location, label, | ||
1243 | detail, other_detail, enable_per_layer_report); | ||
1244 | } | ||
1245 | } | 1291 | } |
1246 | EXPORT_SYMBOL_GPL(edac_mc_handle_error); | 1292 | EXPORT_SYMBOL_GPL(edac_mc_handle_error); |
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0ca1ca71157f..4f4b6137d74e 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * | 7 | * |
8 | * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com | 8 | * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com |
9 | * | 9 | * |
10 | * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> | 10 | * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> |
11 | * The entire API were re-written, and ported to use struct device | 11 | * The entire API were re-written, and ported to use struct device |
12 | * | 12 | * |
13 | */ | 13 | */ |
@@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) | |||
429 | if (!nr_pages_per_csrow(csrow)) | 429 | if (!nr_pages_per_csrow(csrow)) |
430 | continue; | 430 | continue; |
431 | err = edac_create_csrow_object(mci, mci->csrows[i], i); | 431 | err = edac_create_csrow_object(mci, mci->csrows[i], i); |
432 | if (err < 0) | 432 | if (err < 0) { |
433 | edac_dbg(1, | ||
434 | "failure: create csrow objects for csrow %d\n", | ||
435 | i); | ||
433 | goto error; | 436 | goto error; |
437 | } | ||
434 | } | 438 | } |
435 | return 0; | 439 | return 0; |
436 | 440 | ||
@@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, | |||
677 | unsigned long bandwidth = 0; | 681 | unsigned long bandwidth = 0; |
678 | int new_bw = 0; | 682 | int new_bw = 0; |
679 | 683 | ||
680 | if (!mci->set_sdram_scrub_rate) | ||
681 | return -ENODEV; | ||
682 | |||
683 | if (strict_strtoul(data, 10, &bandwidth) < 0) | 684 | if (strict_strtoul(data, 10, &bandwidth) < 0) |
684 | return -EINVAL; | 685 | return -EINVAL; |
685 | 686 | ||
@@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, | |||
703 | struct mem_ctl_info *mci = to_mci(dev); | 704 | struct mem_ctl_info *mci = to_mci(dev); |
704 | int bandwidth = 0; | 705 | int bandwidth = 0; |
705 | 706 | ||
706 | if (!mci->get_sdram_scrub_rate) | ||
707 | return -ENODEV; | ||
708 | |||
709 | bandwidth = mci->get_sdram_scrub_rate(mci); | 707 | bandwidth = mci->get_sdram_scrub_rate(mci); |
710 | if (bandwidth < 0) { | 708 | if (bandwidth < 0) { |
711 | edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); | 709 | edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); |
@@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); | |||
866 | DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); | 864 | DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); |
867 | 865 | ||
868 | /* memory scrubber attribute file */ | 866 | /* memory scrubber attribute file */ |
869 | DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, | 867 | DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); |
870 | mci_sdram_scrub_rate_store); | ||
871 | 868 | ||
872 | static struct attribute *mci_attrs[] = { | 869 | static struct attribute *mci_attrs[] = { |
873 | &dev_attr_reset_counters.attr, | 870 | &dev_attr_reset_counters.attr, |
@@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = { | |||
878 | &dev_attr_ce_noinfo_count.attr, | 875 | &dev_attr_ce_noinfo_count.attr, |
879 | &dev_attr_ue_count.attr, | 876 | &dev_attr_ue_count.attr, |
880 | &dev_attr_ce_count.attr, | 877 | &dev_attr_ce_count.attr, |
881 | &dev_attr_sdram_scrub_rate.attr, | ||
882 | &dev_attr_max_location.attr, | 878 | &dev_attr_max_location.attr, |
883 | NULL | 879 | NULL |
884 | }; | 880 | }; |
@@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) | |||
1007 | edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); | 1003 | edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); |
1008 | err = device_add(&mci->dev); | 1004 | err = device_add(&mci->dev); |
1009 | if (err < 0) { | 1005 | if (err < 0) { |
1006 | edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); | ||
1010 | bus_unregister(&mci->bus); | 1007 | bus_unregister(&mci->bus); |
1011 | kfree(mci->bus.name); | 1008 | kfree(mci->bus.name); |
1012 | return err; | 1009 | return err; |
1013 | } | 1010 | } |
1014 | 1011 | ||
1012 | if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { | ||
1013 | if (mci->get_sdram_scrub_rate) { | ||
1014 | dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; | ||
1015 | dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; | ||
1016 | } | ||
1017 | if (mci->set_sdram_scrub_rate) { | ||
1018 | dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; | ||
1019 | dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; | ||
1020 | } | ||
1021 | err = device_create_file(&mci->dev, | ||
1022 | &dev_attr_sdram_scrub_rate); | ||
1023 | if (err) { | ||
1024 | edac_dbg(1, "failure: create sdram_scrub_rate\n"); | ||
1025 | goto fail2; | ||
1026 | } | ||
1027 | } | ||
1015 | /* | 1028 | /* |
1016 | * Create the dimm/rank devices | 1029 | * Create the dimm/rank devices |
1017 | */ | 1030 | */ |
@@ -1056,6 +1069,7 @@ fail: | |||
1056 | continue; | 1069 | continue; |
1057 | device_unregister(&dimm->dev); | 1070 | device_unregister(&dimm->dev); |
1058 | } | 1071 | } |
1072 | fail2: | ||
1059 | device_unregister(&mci->dev); | 1073 | device_unregister(&mci->dev); |
1060 | bus_unregister(&mci->bus); | 1074 | bus_unregister(&mci->bus); |
1061 | kfree(mci->bus.name); | 1075 | kfree(mci->bus.name); |
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 12c951a2c33d..a66941fea5a4 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c | |||
@@ -146,7 +146,7 @@ static void __exit edac_exit(void) | |||
146 | /* | 146 | /* |
147 | * Inform the kernel of our entry and exit points | 147 | * Inform the kernel of our entry and exit points |
148 | */ | 148 | */ |
149 | module_init(edac_init); | 149 | subsys_initcall(edac_init); |
150 | module_exit(edac_exit); | 150 | module_exit(edac_exit); |
151 | 151 | ||
152 | MODULE_LICENSE("GPL"); | 152 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 0056c4dae9d5..e8658e451762 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c | |||
@@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) | |||
429 | if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { | 429 | if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { |
430 | edac_dbg(0, "called kobject_put on main kobj\n"); | 430 | edac_dbg(0, "called kobject_put on main kobj\n"); |
431 | kobject_put(edac_pci_top_main_kobj); | 431 | kobject_put(edac_pci_top_main_kobj); |
432 | edac_put_sysfs_subsys(); | ||
432 | } | 433 | } |
433 | edac_put_sysfs_subsys(); | ||
434 | } | 434 | } |
435 | 435 | ||
436 | /* | 436 | /* |
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 000000000000..bb534670ec02 --- /dev/null +++ b/drivers/edac/ghes_edac.c | |||
@@ -0,0 +1,537 @@ | |||
1 | /* | ||
2 | * GHES/EDAC Linux driver | ||
3 | * | ||
4 | * This file may be distributed under the terms of the GNU General Public | ||
5 | * License version 2. | ||
6 | * | ||
7 | * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> | ||
8 | * | ||
9 | * Red Hat Inc. http://www.redhat.com | ||
10 | */ | ||
11 | |||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
14 | #include <acpi/ghes.h> | ||
15 | #include <linux/edac.h> | ||
16 | #include <linux/dmi.h> | ||
17 | #include "edac_core.h" | ||
18 | #include <ras/ras_event.h> | ||
19 | |||
20 | #define GHES_EDAC_REVISION " Ver: 1.0.0" | ||
21 | |||
22 | struct ghes_edac_pvt { | ||
23 | struct list_head list; | ||
24 | struct ghes *ghes; | ||
25 | struct mem_ctl_info *mci; | ||
26 | |||
27 | /* Buffers for the error handling routine */ | ||
28 | char detail_location[240]; | ||
29 | char other_detail[160]; | ||
30 | char msg[80]; | ||
31 | }; | ||
32 | |||
33 | static LIST_HEAD(ghes_reglist); | ||
34 | static DEFINE_MUTEX(ghes_edac_lock); | ||
35 | static int ghes_edac_mc_num; | ||
36 | |||
37 | |||
38 | /* Memory Device - Type 17 of SMBIOS spec */ | ||
39 | struct memdev_dmi_entry { | ||
40 | u8 type; | ||
41 | u8 length; | ||
42 | u16 handle; | ||
43 | u16 phys_mem_array_handle; | ||
44 | u16 mem_err_info_handle; | ||
45 | u16 total_width; | ||
46 | u16 data_width; | ||
47 | u16 size; | ||
48 | u8 form_factor; | ||
49 | u8 device_set; | ||
50 | u8 device_locator; | ||
51 | u8 bank_locator; | ||
52 | u8 memory_type; | ||
53 | u16 type_detail; | ||
54 | u16 speed; | ||
55 | u8 manufacturer; | ||
56 | u8 serial_number; | ||
57 | u8 asset_tag; | ||
58 | u8 part_number; | ||
59 | u8 attributes; | ||
60 | u32 extended_size; | ||
61 | u16 conf_mem_clk_speed; | ||
62 | } __attribute__((__packed__)); | ||
63 | |||
64 | struct ghes_edac_dimm_fill { | ||
65 | struct mem_ctl_info *mci; | ||
66 | unsigned count; | ||
67 | }; | ||
68 | |||
69 | char *memory_type[] = { | ||
70 | [MEM_EMPTY] = "EMPTY", | ||
71 | [MEM_RESERVED] = "RESERVED", | ||
72 | [MEM_UNKNOWN] = "UNKNOWN", | ||
73 | [MEM_FPM] = "FPM", | ||
74 | [MEM_EDO] = "EDO", | ||
75 | [MEM_BEDO] = "BEDO", | ||
76 | [MEM_SDR] = "SDR", | ||
77 | [MEM_RDR] = "RDR", | ||
78 | [MEM_DDR] = "DDR", | ||
79 | [MEM_RDDR] = "RDDR", | ||
80 | [MEM_RMBS] = "RMBS", | ||
81 | [MEM_DDR2] = "DDR2", | ||
82 | [MEM_FB_DDR2] = "FB_DDR2", | ||
83 | [MEM_RDDR2] = "RDDR2", | ||
84 | [MEM_XDR] = "XDR", | ||
85 | [MEM_DDR3] = "DDR3", | ||
86 | [MEM_RDDR3] = "RDDR3", | ||
87 | }; | ||
88 | |||
89 | static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) | ||
90 | { | ||
91 | int *num_dimm = arg; | ||
92 | |||
93 | if (dh->type == DMI_ENTRY_MEM_DEVICE) | ||
94 | (*num_dimm)++; | ||
95 | } | ||
96 | |||
97 | static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) | ||
98 | { | ||
99 | struct ghes_edac_dimm_fill *dimm_fill = arg; | ||
100 | struct mem_ctl_info *mci = dimm_fill->mci; | ||
101 | |||
102 | if (dh->type == DMI_ENTRY_MEM_DEVICE) { | ||
103 | struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; | ||
104 | struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, | ||
105 | mci->n_layers, | ||
106 | dimm_fill->count, 0, 0); | ||
107 | |||
108 | if (entry->size == 0xffff) { | ||
109 | pr_info("Can't get DIMM%i size\n", | ||
110 | dimm_fill->count); | ||
111 | dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ | ||
112 | } else if (entry->size == 0x7fff) { | ||
113 | dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); | ||
114 | } else { | ||
115 | if (entry->size & 1 << 15) | ||
116 | dimm->nr_pages = MiB_TO_PAGES((entry->size & | ||
117 | 0x7fff) << 10); | ||
118 | else | ||
119 | dimm->nr_pages = MiB_TO_PAGES(entry->size); | ||
120 | } | ||
121 | |||
122 | switch (entry->memory_type) { | ||
123 | case 0x12: | ||
124 | if (entry->type_detail & 1 << 13) | ||
125 | dimm->mtype = MEM_RDDR; | ||
126 | else | ||
127 | dimm->mtype = MEM_DDR; | ||
128 | break; | ||
129 | case 0x13: | ||
130 | if (entry->type_detail & 1 << 13) | ||
131 | dimm->mtype = MEM_RDDR2; | ||
132 | else | ||
133 | dimm->mtype = MEM_DDR2; | ||
134 | break; | ||
135 | case 0x14: | ||
136 | dimm->mtype = MEM_FB_DDR2; | ||
137 | break; | ||
138 | case 0x18: | ||
139 | if (entry->type_detail & 1 << 13) | ||
140 | dimm->mtype = MEM_RDDR3; | ||
141 | else | ||
142 | dimm->mtype = MEM_DDR3; | ||
143 | break; | ||
144 | default: | ||
145 | if (entry->type_detail & 1 << 6) | ||
146 | dimm->mtype = MEM_RMBS; | ||
147 | else if ((entry->type_detail & ((1 << 7) | (1 << 13))) | ||
148 | == ((1 << 7) | (1 << 13))) | ||
149 | dimm->mtype = MEM_RDR; | ||
150 | else if (entry->type_detail & 1 << 7) | ||
151 | dimm->mtype = MEM_SDR; | ||
152 | else if (entry->type_detail & 1 << 9) | ||
153 | dimm->mtype = MEM_EDO; | ||
154 | else | ||
155 | dimm->mtype = MEM_UNKNOWN; | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Actually, we can only detect if the memory has bits for | ||
160 | * checksum or not | ||
161 | */ | ||
162 | if (entry->total_width == entry->data_width) | ||
163 | dimm->edac_mode = EDAC_NONE; | ||
164 | else | ||
165 | dimm->edac_mode = EDAC_SECDED; | ||
166 | |||
167 | dimm->dtype = DEV_UNKNOWN; | ||
168 | dimm->grain = 128; /* Likely, worse case */ | ||
169 | |||
170 | /* | ||
171 | * FIXME: It shouldn't be hard to also fill the DIMM labels | ||
172 | */ | ||
173 | |||
174 | if (dimm->nr_pages) { | ||
175 | edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", | ||
176 | dimm_fill->count, memory_type[dimm->mtype], | ||
177 | PAGES_TO_MiB(dimm->nr_pages), | ||
178 | (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); | ||
179 | edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", | ||
180 | entry->memory_type, entry->type_detail, | ||
181 | entry->total_width, entry->data_width); | ||
182 | } | ||
183 | |||
184 | dimm_fill->count++; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | void ghes_edac_report_mem_error(struct ghes *ghes, int sev, | ||
189 | struct cper_sec_mem_err *mem_err) | ||
190 | { | ||
191 | enum hw_event_mc_err_type type; | ||
192 | struct edac_raw_error_desc *e; | ||
193 | struct mem_ctl_info *mci; | ||
194 | struct ghes_edac_pvt *pvt = NULL; | ||
195 | char *p; | ||
196 | u8 grain_bits; | ||
197 | |||
198 | list_for_each_entry(pvt, &ghes_reglist, list) { | ||
199 | if (ghes == pvt->ghes) | ||
200 | break; | ||
201 | } | ||
202 | if (!pvt) { | ||
203 | pr_err("Internal error: Can't find EDAC structure\n"); | ||
204 | return; | ||
205 | } | ||
206 | mci = pvt->mci; | ||
207 | e = &mci->error_desc; | ||
208 | |||
209 | /* Cleans the error report buffer */ | ||
210 | memset(e, 0, sizeof (*e)); | ||
211 | e->error_count = 1; | ||
212 | strcpy(e->label, "unknown label"); | ||
213 | e->msg = pvt->msg; | ||
214 | e->other_detail = pvt->other_detail; | ||
215 | e->top_layer = -1; | ||
216 | e->mid_layer = -1; | ||
217 | e->low_layer = -1; | ||
218 | *pvt->other_detail = '\0'; | ||
219 | *pvt->msg = '\0'; | ||
220 | |||
221 | switch (sev) { | ||
222 | case GHES_SEV_CORRECTED: | ||
223 | type = HW_EVENT_ERR_CORRECTED; | ||
224 | break; | ||
225 | case GHES_SEV_RECOVERABLE: | ||
226 | type = HW_EVENT_ERR_UNCORRECTED; | ||
227 | break; | ||
228 | case GHES_SEV_PANIC: | ||
229 | type = HW_EVENT_ERR_FATAL; | ||
230 | break; | ||
231 | default: | ||
232 | case GHES_SEV_NO: | ||
233 | type = HW_EVENT_ERR_INFO; | ||
234 | } | ||
235 | |||
236 | edac_dbg(1, "error validation_bits: 0x%08llx\n", | ||
237 | (long long)mem_err->validation_bits); | ||
238 | |||
239 | /* Error type, mapped on e->msg */ | ||
240 | if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { | ||
241 | p = pvt->msg; | ||
242 | switch (mem_err->error_type) { | ||
243 | case 0: | ||
244 | p += sprintf(p, "Unknown"); | ||
245 | break; | ||
246 | case 1: | ||
247 | p += sprintf(p, "No error"); | ||
248 | break; | ||
249 | case 2: | ||
250 | p += sprintf(p, "Single-bit ECC"); | ||
251 | break; | ||
252 | case 3: | ||
253 | p += sprintf(p, "Multi-bit ECC"); | ||
254 | break; | ||
255 | case 4: | ||
256 | p += sprintf(p, "Single-symbol ChipKill ECC"); | ||
257 | break; | ||
258 | case 5: | ||
259 | p += sprintf(p, "Multi-symbol ChipKill ECC"); | ||
260 | break; | ||
261 | case 6: | ||
262 | p += sprintf(p, "Master abort"); | ||
263 | break; | ||
264 | case 7: | ||
265 | p += sprintf(p, "Target abort"); | ||
266 | break; | ||
267 | case 8: | ||
268 | p += sprintf(p, "Parity Error"); | ||
269 | break; | ||
270 | case 9: | ||
271 | p += sprintf(p, "Watchdog timeout"); | ||
272 | break; | ||
273 | case 10: | ||
274 | p += sprintf(p, "Invalid address"); | ||
275 | break; | ||
276 | case 11: | ||
277 | p += sprintf(p, "Mirror Broken"); | ||
278 | break; | ||
279 | case 12: | ||
280 | p += sprintf(p, "Memory Sparing"); | ||
281 | break; | ||
282 | case 13: | ||
283 | p += sprintf(p, "Scrub corrected error"); | ||
284 | break; | ||
285 | case 14: | ||
286 | p += sprintf(p, "Scrub uncorrected error"); | ||
287 | break; | ||
288 | case 15: | ||
289 | p += sprintf(p, "Physical Memory Map-out event"); | ||
290 | break; | ||
291 | default: | ||
292 | p += sprintf(p, "reserved error (%d)", | ||
293 | mem_err->error_type); | ||
294 | } | ||
295 | } else { | ||
296 | strcpy(pvt->msg, "unknown error"); | ||
297 | } | ||
298 | |||
299 | /* Error address */ | ||
300 | if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
301 | e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; | ||
302 | e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; | ||
303 | } | ||
304 | |||
305 | /* Error grain */ | ||
306 | if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { | ||
307 | e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); | ||
308 | } | ||
309 | |||
310 | /* Memory error location, mapped on e->location */ | ||
311 | p = e->location; | ||
312 | if (mem_err->validation_bits & CPER_MEM_VALID_NODE) | ||
313 | p += sprintf(p, "node:%d ", mem_err->node); | ||
314 | if (mem_err->validation_bits & CPER_MEM_VALID_CARD) | ||
315 | p += sprintf(p, "card:%d ", mem_err->card); | ||
316 | if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) | ||
317 | p += sprintf(p, "module:%d ", mem_err->module); | ||
318 | if (mem_err->validation_bits & CPER_MEM_VALID_BANK) | ||
319 | p += sprintf(p, "bank:%d ", mem_err->bank); | ||
320 | if (mem_err->validation_bits & CPER_MEM_VALID_ROW) | ||
321 | p += sprintf(p, "row:%d ", mem_err->row); | ||
322 | if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) | ||
323 | p += sprintf(p, "col:%d ", mem_err->column); | ||
324 | if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) | ||
325 | p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); | ||
326 | if (p > e->location) | ||
327 | *(p - 1) = '\0'; | ||
328 | |||
329 | /* All other fields are mapped on e->other_detail */ | ||
330 | p = pvt->other_detail; | ||
331 | if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { | ||
332 | u64 status = mem_err->error_status; | ||
333 | |||
334 | p += sprintf(p, "status(0x%016llx): ", (long long)status); | ||
335 | switch ((status >> 8) & 0xff) { | ||
336 | case 1: | ||
337 | p += sprintf(p, "Error detected internal to the component "); | ||
338 | break; | ||
339 | case 16: | ||
340 | p += sprintf(p, "Error detected in the bus "); | ||
341 | break; | ||
342 | case 4: | ||
343 | p += sprintf(p, "Storage error in DRAM memory "); | ||
344 | break; | ||
345 | case 5: | ||
346 | p += sprintf(p, "Storage error in TLB "); | ||
347 | break; | ||
348 | case 6: | ||
349 | p += sprintf(p, "Storage error in cache "); | ||
350 | break; | ||
351 | case 7: | ||
352 | p += sprintf(p, "Error in one or more functional units "); | ||
353 | break; | ||
354 | case 8: | ||
355 | p += sprintf(p, "component failed self test "); | ||
356 | break; | ||
357 | case 9: | ||
358 | p += sprintf(p, "Overflow or undervalue of internal queue "); | ||
359 | break; | ||
360 | case 17: | ||
361 | p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); | ||
362 | break; | ||
363 | case 18: | ||
364 | p += sprintf(p, "Improper access error "); | ||
365 | break; | ||
366 | case 19: | ||
367 | p += sprintf(p, "Access to a memory address which is not mapped to any component "); | ||
368 | break; | ||
369 | case 20: | ||
370 | p += sprintf(p, "Loss of Lockstep "); | ||
371 | break; | ||
372 | case 21: | ||
373 | p += sprintf(p, "Response not associated with a request "); | ||
374 | break; | ||
375 | case 22: | ||
376 | p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); | ||
377 | break; | ||
378 | case 23: | ||
379 | p += sprintf(p, "Detection of a PATH_ERROR "); | ||
380 | break; | ||
381 | case 25: | ||
382 | p += sprintf(p, "Bus operation timeout "); | ||
383 | break; | ||
384 | case 26: | ||
385 | p += sprintf(p, "A read was issued to data that has been poisoned "); | ||
386 | break; | ||
387 | default: | ||
388 | p += sprintf(p, "reserved "); | ||
389 | break; | ||
390 | } | ||
391 | } | ||
392 | if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) | ||
393 | p += sprintf(p, "requestorID: 0x%016llx ", | ||
394 | (long long)mem_err->requestor_id); | ||
395 | if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) | ||
396 | p += sprintf(p, "responderID: 0x%016llx ", | ||
397 | (long long)mem_err->responder_id); | ||
398 | if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) | ||
399 | p += sprintf(p, "targetID: 0x%016llx ", | ||
400 | (long long)mem_err->responder_id); | ||
401 | if (p > pvt->other_detail) | ||
402 | *(p - 1) = '\0'; | ||
403 | |||
404 | /* Generate the trace event */ | ||
405 | grain_bits = fls_long(e->grain); | ||
406 | sprintf(pvt->detail_location, "APEI location: %s %s", | ||
407 | e->location, e->other_detail); | ||
408 | trace_mc_event(type, e->msg, e->label, e->error_count, | ||
409 | mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, | ||
410 | PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, | ||
411 | grain_bits, e->syndrome, pvt->detail_location); | ||
412 | |||
413 | /* Report the error via EDAC API */ | ||
414 | edac_raw_mc_handle_error(type, mci, e); | ||
415 | } | ||
416 | EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); | ||
417 | |||
418 | int ghes_edac_register(struct ghes *ghes, struct device *dev) | ||
419 | { | ||
420 | bool fake = false; | ||
421 | int rc, num_dimm = 0; | ||
422 | struct mem_ctl_info *mci; | ||
423 | struct edac_mc_layer layers[1]; | ||
424 | struct ghes_edac_pvt *pvt; | ||
425 | struct ghes_edac_dimm_fill dimm_fill; | ||
426 | |||
427 | /* Get the number of DIMMs */ | ||
428 | dmi_walk(ghes_edac_count_dimms, &num_dimm); | ||
429 | |||
430 | /* Check if we've got a bogus BIOS */ | ||
431 | if (num_dimm == 0) { | ||
432 | fake = true; | ||
433 | num_dimm = 1; | ||
434 | } | ||
435 | |||
436 | layers[0].type = EDAC_MC_LAYER_ALL_MEM; | ||
437 | layers[0].size = num_dimm; | ||
438 | layers[0].is_virt_csrow = true; | ||
439 | |||
440 | /* | ||
441 | * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), | ||
442 | * to avoid duplicated memory controller numbers | ||
443 | */ | ||
444 | mutex_lock(&ghes_edac_lock); | ||
445 | mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, | ||
446 | sizeof(*pvt)); | ||
447 | if (!mci) { | ||
448 | pr_info("Can't allocate memory for EDAC data\n"); | ||
449 | mutex_unlock(&ghes_edac_lock); | ||
450 | return -ENOMEM; | ||
451 | } | ||
452 | |||
453 | pvt = mci->pvt_info; | ||
454 | memset(pvt, 0, sizeof(*pvt)); | ||
455 | list_add_tail(&pvt->list, &ghes_reglist); | ||
456 | pvt->ghes = ghes; | ||
457 | pvt->mci = mci; | ||
458 | mci->pdev = dev; | ||
459 | |||
460 | mci->mtype_cap = MEM_FLAG_EMPTY; | ||
461 | mci->edac_ctl_cap = EDAC_FLAG_NONE; | ||
462 | mci->edac_cap = EDAC_FLAG_NONE; | ||
463 | mci->mod_name = "ghes_edac.c"; | ||
464 | mci->mod_ver = GHES_EDAC_REVISION; | ||
465 | mci->ctl_name = "ghes_edac"; | ||
466 | mci->dev_name = "ghes"; | ||
467 | |||
468 | if (!ghes_edac_mc_num) { | ||
469 | if (!fake) { | ||
470 | pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); | ||
471 | pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); | ||
472 | pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); | ||
473 | pr_info("If you find incorrect reports, please contact your hardware vendor\n"); | ||
474 | pr_info("to correct its BIOS.\n"); | ||
475 | pr_info("This system has %d DIMM sockets.\n", | ||
476 | num_dimm); | ||
477 | } else { | ||
478 | pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); | ||
479 | pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); | ||
480 | pr_info("work on such system. Use this driver with caution\n"); | ||
481 | } | ||
482 | } | ||
483 | |||
484 | if (!fake) { | ||
485 | /* | ||
486 | * Fill DIMM info from DMI for the memory controller #0 | ||
487 | * | ||
488 | * Keep it in blank for the other memory controllers, as | ||
489 | * there's no reliable way to properly credit each DIMM to | ||
490 | * the memory controller, as different BIOSes fill the | ||
491 | * DMI bank location fields on different ways | ||
492 | */ | ||
493 | if (!ghes_edac_mc_num) { | ||
494 | dimm_fill.count = 0; | ||
495 | dimm_fill.mci = mci; | ||
496 | dmi_walk(ghes_edac_dmidecode, &dimm_fill); | ||
497 | } | ||
498 | } else { | ||
499 | struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, | ||
500 | mci->n_layers, 0, 0, 0); | ||
501 | |||
502 | dimm->nr_pages = 1; | ||
503 | dimm->grain = 128; | ||
504 | dimm->mtype = MEM_UNKNOWN; | ||
505 | dimm->dtype = DEV_UNKNOWN; | ||
506 | dimm->edac_mode = EDAC_SECDED; | ||
507 | } | ||
508 | |||
509 | rc = edac_mc_add_mc(mci); | ||
510 | if (rc < 0) { | ||
511 | pr_info("Can't register at EDAC core\n"); | ||
512 | edac_mc_free(mci); | ||
513 | mutex_unlock(&ghes_edac_lock); | ||
514 | return -ENODEV; | ||
515 | } | ||
516 | |||
517 | ghes_edac_mc_num++; | ||
518 | mutex_unlock(&ghes_edac_lock); | ||
519 | return 0; | ||
520 | } | ||
521 | EXPORT_SYMBOL_GPL(ghes_edac_register); | ||
522 | |||
523 | void ghes_edac_unregister(struct ghes *ghes) | ||
524 | { | ||
525 | struct mem_ctl_info *mci; | ||
526 | struct ghes_edac_pvt *pvt, *tmp; | ||
527 | |||
528 | list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { | ||
529 | if (ghes == pvt->ghes) { | ||
530 | mci = pvt->mci; | ||
531 | edac_mc_del_mc(mci->pdev); | ||
532 | edac_mc_free(mci); | ||
533 | list_del(&pvt->list); | ||
534 | } | ||
535 | } | ||
536 | } | ||
537 | EXPORT_SYMBOL_GPL(ghes_edac_unregister); | ||
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 4e8337602e78..aa44c1718f50 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c | |||
@@ -106,16 +106,26 @@ static int nr_channels; | |||
106 | 106 | ||
107 | static int how_many_channels(struct pci_dev *pdev) | 107 | static int how_many_channels(struct pci_dev *pdev) |
108 | { | 108 | { |
109 | int n_channels; | ||
110 | |||
109 | unsigned char capid0_8b; /* 8th byte of CAPID0 */ | 111 | unsigned char capid0_8b; /* 8th byte of CAPID0 */ |
110 | 112 | ||
111 | pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); | 113 | pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); |
114 | |||
112 | if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ | 115 | if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ |
113 | edac_dbg(0, "In single channel mode\n"); | 116 | edac_dbg(0, "In single channel mode\n"); |
114 | return 1; | 117 | n_channels = 1; |
115 | } else { | 118 | } else { |
116 | edac_dbg(0, "In dual channel mode\n"); | 119 | edac_dbg(0, "In dual channel mode\n"); |
117 | return 2; | 120 | n_channels = 2; |
118 | } | 121 | } |
122 | |||
123 | if (capid0_8b & 0x10) /* check if both channels are filled */ | ||
124 | edac_dbg(0, "2 DIMMS per channel disabled\n"); | ||
125 | else | ||
126 | edac_dbg(0, "2 DIMMS per channel enabled\n"); | ||
127 | |||
128 | return n_channels; | ||
119 | } | 129 | } |
120 | 130 | ||
121 | static unsigned long eccerrlog_syndrome(u64 log) | 131 | static unsigned long eccerrlog_syndrome(u64 log) |
@@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window, | |||
290 | for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { | 300 | for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { |
291 | drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; | 301 | drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; |
292 | drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; | 302 | drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; |
303 | |||
304 | edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); | ||
293 | } | 305 | } |
294 | } | 306 | } |
295 | 307 | ||
@@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages( | |||
311 | int n; | 323 | int n; |
312 | 324 | ||
313 | n = drbs[channel][rank]; | 325 | n = drbs[channel][rank]; |
326 | if (!n) | ||
327 | return 0; | ||
328 | |||
314 | if (rank > 0) | 329 | if (rank > 0) |
315 | n -= drbs[channel][rank - 1]; | 330 | n -= drbs[channel][rank - 1]; |
316 | if (stacked && (channel == 1) && | 331 | if (stacked && (channel == 1) && |
@@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) | |||
377 | * cumulative; the last one will contain the total memory | 392 | * cumulative; the last one will contain the total memory |
378 | * contained in all ranks. | 393 | * contained in all ranks. |
379 | */ | 394 | */ |
380 | for (i = 0; i < mci->nr_csrows; i++) { | 395 | for (i = 0; i < I3200_DIMMS; i++) { |
381 | unsigned long nr_pages; | 396 | unsigned long nr_pages; |
382 | struct csrow_info *csrow = mci->csrows[i]; | ||
383 | 397 | ||
384 | nr_pages = drb_to_nr_pages(drbs, stacked, | 398 | for (j = 0; j < nr_channels; j++) { |
385 | i / I3200_RANKS_PER_CHANNEL, | 399 | struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, |
386 | i % I3200_RANKS_PER_CHANNEL); | 400 | mci->n_layers, i, j, 0); |
387 | 401 | ||
388 | if (nr_pages == 0) | 402 | nr_pages = drb_to_nr_pages(drbs, stacked, j, i); |
389 | continue; | 403 | if (nr_pages == 0) |
404 | continue; | ||
390 | 405 | ||
391 | for (j = 0; j < nr_channels; j++) { | 406 | edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, |
392 | struct dimm_info *dimm = csrow->channels[j]->dimm; | 407 | stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); |
393 | 408 | ||
394 | dimm->nr_pages = nr_pages; | 409 | dimm->nr_pages = nr_pages; |
395 | dimm->grain = nr_pages << PAGE_SHIFT; | 410 | dimm->grain = nr_pages << PAGE_SHIFT; |
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2cc99f..1b635178cc44 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/edac.h> | 27 | #include <linux/edac.h> |
28 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
29 | #include <linux/mmzone.h> | 29 | #include <linux/mmzone.h> |
30 | #include <linux/debugfs.h> | ||
30 | 31 | ||
31 | #include "edac_core.h" | 32 | #include "edac_core.h" |
32 | 33 | ||
@@ -68,6 +69,14 @@ | |||
68 | I5100_FERR_NF_MEM_M1ERR_MASK) | 69 | I5100_FERR_NF_MEM_M1ERR_MASK) |
69 | #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ | 70 | #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ |
70 | #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ | 71 | #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ |
72 | #define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ | ||
73 | #define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ | ||
74 | #define I5100_MEMXEINJMSK0_EINJEN (1 << 27) | ||
75 | #define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ | ||
76 | #define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ | ||
77 | |||
78 | /* Device 19, Function 0 */ | ||
79 | #define I5100_DINJ0 0x9a | ||
71 | 80 | ||
72 | /* device 21 and 22, func 0 */ | 81 | /* device 21 and 22, func 0 */ |
73 | #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ | 82 | #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ |
@@ -338,13 +347,26 @@ struct i5100_priv { | |||
338 | unsigned ranksperchan; /* number of ranks per channel */ | 347 | unsigned ranksperchan; /* number of ranks per channel */ |
339 | 348 | ||
340 | struct pci_dev *mc; /* device 16 func 1 */ | 349 | struct pci_dev *mc; /* device 16 func 1 */ |
350 | struct pci_dev *einj; /* device 19 func 0 */ | ||
341 | struct pci_dev *ch0mm; /* device 21 func 0 */ | 351 | struct pci_dev *ch0mm; /* device 21 func 0 */ |
342 | struct pci_dev *ch1mm; /* device 22 func 0 */ | 352 | struct pci_dev *ch1mm; /* device 22 func 0 */ |
343 | 353 | ||
344 | struct delayed_work i5100_scrubbing; | 354 | struct delayed_work i5100_scrubbing; |
345 | int scrub_enable; | 355 | int scrub_enable; |
356 | |||
357 | /* Error injection */ | ||
358 | u8 inject_channel; | ||
359 | u8 inject_hlinesel; | ||
360 | u8 inject_deviceptr1; | ||
361 | u8 inject_deviceptr2; | ||
362 | u16 inject_eccmask1; | ||
363 | u16 inject_eccmask2; | ||
364 | |||
365 | struct dentry *debugfs; | ||
346 | }; | 366 | }; |
347 | 367 | ||
368 | static struct dentry *i5100_debugfs; | ||
369 | |||
348 | /* map a rank/chan to a slot number on the mainboard */ | 370 | /* map a rank/chan to a slot number on the mainboard */ |
349 | static int i5100_rank_to_slot(const struct mem_ctl_info *mci, | 371 | static int i5100_rank_to_slot(const struct mem_ctl_info *mci, |
350 | int chan, int rank) | 372 | int chan, int rank) |
@@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) | |||
863 | } | 885 | } |
864 | } | 886 | } |
865 | 887 | ||
888 | /**************************************************************************** | ||
889 | * Error injection routines | ||
890 | ****************************************************************************/ | ||
891 | |||
892 | static void i5100_do_inject(struct mem_ctl_info *mci) | ||
893 | { | ||
894 | struct i5100_priv *priv = mci->pvt_info; | ||
895 | u32 mask0; | ||
896 | u16 mask1; | ||
897 | |||
898 | /* MEM[1:0]EINJMSK0 | ||
899 | * 31 - ADDRMATCHEN | ||
900 | * 29:28 - HLINESEL | ||
901 | * 00 Reserved | ||
902 | * 01 Lower half of cache line | ||
903 | * 10 Upper half of cache line | ||
904 | * 11 Both upper and lower parts of cache line | ||
905 | * 27 - EINJEN | ||
906 | * 25:19 - XORMASK1 for deviceptr1 | ||
907 | * 9:5 - SEC2RAM for deviceptr2 | ||
908 | * 4:0 - FIR2RAM for deviceptr1 | ||
909 | */ | ||
910 | mask0 = ((priv->inject_hlinesel & 0x3) << 28) | | ||
911 | I5100_MEMXEINJMSK0_EINJEN | | ||
912 | ((priv->inject_eccmask1 & 0xffff) << 10) | | ||
913 | ((priv->inject_deviceptr2 & 0x1f) << 5) | | ||
914 | (priv->inject_deviceptr1 & 0x1f); | ||
915 | |||
916 | /* MEM[1:0]EINJMSK1 | ||
917 | * 15:0 - XORMASK2 for deviceptr2 | ||
918 | */ | ||
919 | mask1 = priv->inject_eccmask2; | ||
920 | |||
921 | if (priv->inject_channel == 0) { | ||
922 | pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); | ||
923 | pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); | ||
924 | } else { | ||
925 | pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); | ||
926 | pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); | ||
927 | } | ||
928 | |||
929 | /* Error Injection Response Function | ||
930 | * Intel 5100 Memory Controller Hub Chipset (318378) datasheet | ||
931 | * hints about this register but carry no data about them. All | ||
932 | * data regarding device 19 is based on experimentation and the | ||
933 | * Intel 7300 Chipset Memory Controller Hub (318082) datasheet | ||
934 | * which appears to be accurate for the i5100 in this area. | ||
935 | * | ||
936 | * The injection code don't work without setting this register. | ||
937 | * The register needs to be flipped off then on else the hardware | ||
938 | * will only preform the first injection. | ||
939 | * | ||
940 | * Stop condition bits 7:4 | ||
941 | * 1010 - Stop after one injection | ||
942 | * 1011 - Never stop injecting faults | ||
943 | * | ||
944 | * Start condition bits 3:0 | ||
945 | * 1010 - Never start | ||
946 | * 1011 - Start immediately | ||
947 | */ | ||
948 | pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); | ||
949 | pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); | ||
950 | } | ||
951 | |||
952 | #define to_mci(k) container_of(k, struct mem_ctl_info, dev) | ||
953 | static ssize_t inject_enable_write(struct file *file, const char __user *data, | ||
954 | size_t count, loff_t *ppos) | ||
955 | { | ||
956 | struct device *dev = file->private_data; | ||
957 | struct mem_ctl_info *mci = to_mci(dev); | ||
958 | |||
959 | i5100_do_inject(mci); | ||
960 | |||
961 | return count; | ||
962 | } | ||
963 | |||
964 | static const struct file_operations i5100_inject_enable_fops = { | ||
965 | .open = simple_open, | ||
966 | .write = inject_enable_write, | ||
967 | .llseek = generic_file_llseek, | ||
968 | }; | ||
969 | |||
970 | static int i5100_setup_debugfs(struct mem_ctl_info *mci) | ||
971 | { | ||
972 | struct i5100_priv *priv = mci->pvt_info; | ||
973 | |||
974 | if (!i5100_debugfs) | ||
975 | return -ENODEV; | ||
976 | |||
977 | priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); | ||
978 | |||
979 | if (!priv->debugfs) | ||
980 | return -ENOMEM; | ||
981 | |||
982 | debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, | ||
983 | &priv->inject_channel); | ||
984 | debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, | ||
985 | &priv->inject_hlinesel); | ||
986 | debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, | ||
987 | &priv->inject_deviceptr1); | ||
988 | debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, | ||
989 | &priv->inject_deviceptr2); | ||
990 | debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, | ||
991 | &priv->inject_eccmask1); | ||
992 | debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, | ||
993 | &priv->inject_eccmask2); | ||
994 | debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, | ||
995 | &mci->dev, &i5100_inject_enable_fops); | ||
996 | |||
997 | return 0; | ||
998 | |||
999 | } | ||
1000 | |||
866 | static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) | 1001 | static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) |
867 | { | 1002 | { |
868 | int rc; | 1003 | int rc; |
869 | struct mem_ctl_info *mci; | 1004 | struct mem_ctl_info *mci; |
870 | struct edac_mc_layer layers[2]; | 1005 | struct edac_mc_layer layers[2]; |
871 | struct i5100_priv *priv; | 1006 | struct i5100_priv *priv; |
872 | struct pci_dev *ch0mm, *ch1mm; | 1007 | struct pci_dev *ch0mm, *ch1mm, *einj; |
873 | int ret = 0; | 1008 | int ret = 0; |
874 | u32 dw; | 1009 | u32 dw; |
875 | int ranksperch; | 1010 | int ranksperch; |
@@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) | |||
941 | goto bail_disable_ch1; | 1076 | goto bail_disable_ch1; |
942 | } | 1077 | } |
943 | 1078 | ||
1079 | |||
1080 | /* device 19, func 0, Error injection */ | ||
1081 | einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, | ||
1082 | PCI_DEVICE_ID_INTEL_5100_19, 0); | ||
1083 | if (!einj) { | ||
1084 | ret = -ENODEV; | ||
1085 | goto bail_einj; | ||
1086 | } | ||
1087 | |||
1088 | rc = pci_enable_device(einj); | ||
1089 | if (rc < 0) { | ||
1090 | ret = rc; | ||
1091 | goto bail_disable_einj; | ||
1092 | } | ||
1093 | |||
1094 | |||
944 | mci->pdev = &pdev->dev; | 1095 | mci->pdev = &pdev->dev; |
945 | 1096 | ||
946 | priv = mci->pvt_info; | 1097 | priv = mci->pvt_info; |
@@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) | |||
948 | priv->mc = pdev; | 1099 | priv->mc = pdev; |
949 | priv->ch0mm = ch0mm; | 1100 | priv->ch0mm = ch0mm; |
950 | priv->ch1mm = ch1mm; | 1101 | priv->ch1mm = ch1mm; |
1102 | priv->einj = einj; | ||
951 | 1103 | ||
952 | INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); | 1104 | INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); |
953 | 1105 | ||
@@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) | |||
975 | mci->set_sdram_scrub_rate = i5100_set_scrub_rate; | 1127 | mci->set_sdram_scrub_rate = i5100_set_scrub_rate; |
976 | mci->get_sdram_scrub_rate = i5100_get_scrub_rate; | 1128 | mci->get_sdram_scrub_rate = i5100_get_scrub_rate; |
977 | 1129 | ||
1130 | priv->inject_channel = 0; | ||
1131 | priv->inject_hlinesel = 0; | ||
1132 | priv->inject_deviceptr1 = 0; | ||
1133 | priv->inject_deviceptr2 = 0; | ||
1134 | priv->inject_eccmask1 = 0; | ||
1135 | priv->inject_eccmask2 = 0; | ||
1136 | |||
978 | i5100_init_csrows(mci); | 1137 | i5100_init_csrows(mci); |
979 | 1138 | ||
980 | /* this strange construction seems to be in every driver, dunno why */ | 1139 | /* this strange construction seems to be in every driver, dunno why */ |
@@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) | |||
992 | goto bail_scrub; | 1151 | goto bail_scrub; |
993 | } | 1152 | } |
994 | 1153 | ||
1154 | i5100_setup_debugfs(mci); | ||
1155 | |||
995 | return ret; | 1156 | return ret; |
996 | 1157 | ||
997 | bail_scrub: | 1158 | bail_scrub: |
@@ -999,6 +1160,12 @@ bail_scrub: | |||
999 | cancel_delayed_work_sync(&(priv->i5100_scrubbing)); | 1160 | cancel_delayed_work_sync(&(priv->i5100_scrubbing)); |
1000 | edac_mc_free(mci); | 1161 | edac_mc_free(mci); |
1001 | 1162 | ||
1163 | bail_disable_einj: | ||
1164 | pci_disable_device(einj); | ||
1165 | |||
1166 | bail_einj: | ||
1167 | pci_dev_put(einj); | ||
1168 | |||
1002 | bail_disable_ch1: | 1169 | bail_disable_ch1: |
1003 | pci_disable_device(ch1mm); | 1170 | pci_disable_device(ch1mm); |
1004 | 1171 | ||
@@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev) | |||
1030 | 1197 | ||
1031 | priv = mci->pvt_info; | 1198 | priv = mci->pvt_info; |
1032 | 1199 | ||
1200 | debugfs_remove_recursive(priv->debugfs); | ||
1201 | |||
1033 | priv->scrub_enable = 0; | 1202 | priv->scrub_enable = 0; |
1034 | cancel_delayed_work_sync(&(priv->i5100_scrubbing)); | 1203 | cancel_delayed_work_sync(&(priv->i5100_scrubbing)); |
1035 | 1204 | ||
1036 | pci_disable_device(pdev); | 1205 | pci_disable_device(pdev); |
1037 | pci_disable_device(priv->ch0mm); | 1206 | pci_disable_device(priv->ch0mm); |
1038 | pci_disable_device(priv->ch1mm); | 1207 | pci_disable_device(priv->ch1mm); |
1208 | pci_disable_device(priv->einj); | ||
1039 | pci_dev_put(priv->ch0mm); | 1209 | pci_dev_put(priv->ch0mm); |
1040 | pci_dev_put(priv->ch1mm); | 1210 | pci_dev_put(priv->ch1mm); |
1211 | pci_dev_put(priv->einj); | ||
1041 | 1212 | ||
1042 | edac_mc_free(mci); | 1213 | edac_mc_free(mci); |
1043 | } | 1214 | } |
@@ -1060,13 +1231,16 @@ static int __init i5100_init(void) | |||
1060 | { | 1231 | { |
1061 | int pci_rc; | 1232 | int pci_rc; |
1062 | 1233 | ||
1063 | pci_rc = pci_register_driver(&i5100_driver); | 1234 | i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); |
1064 | 1235 | ||
1236 | pci_rc = pci_register_driver(&i5100_driver); | ||
1065 | return (pci_rc < 0) ? pci_rc : 0; | 1237 | return (pci_rc < 0) ? pci_rc : 0; |
1066 | } | 1238 | } |
1067 | 1239 | ||
1068 | static void __exit i5100_exit(void) | 1240 | static void __exit i5100_exit(void) |
1069 | { | 1241 | { |
1242 | debugfs_remove(i5100_debugfs); | ||
1243 | |||
1070 | pci_unregister_driver(&i5100_driver); | 1244 | pci_unregister_driver(&i5100_driver); |
1071 | } | 1245 | } |
1072 | 1246 | ||
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e213d030b0dd..0ec3e95a12cd 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c | |||
@@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) | |||
420 | 420 | ||
421 | static inline int numrank(u32 rank) | 421 | static inline int numrank(u32 rank) |
422 | { | 422 | { |
423 | static int ranks[4] = { 1, 2, 4, -EINVAL }; | 423 | static const int ranks[] = { 1, 2, 4, -EINVAL }; |
424 | 424 | ||
425 | return ranks[rank & 0x3]; | 425 | return ranks[rank & 0x3]; |
426 | } | 426 | } |
427 | 427 | ||
428 | static inline int numbank(u32 bank) | 428 | static inline int numbank(u32 bank) |
429 | { | 429 | { |
430 | static int banks[4] = { 4, 8, 16, -EINVAL }; | 430 | static const int banks[] = { 4, 8, 16, -EINVAL }; |
431 | 431 | ||
432 | return banks[bank & 0x3]; | 432 | return banks[bank & 0x3]; |
433 | } | 433 | } |
434 | 434 | ||
435 | static inline int numrow(u32 row) | 435 | static inline int numrow(u32 row) |
436 | { | 436 | { |
437 | static int rows[8] = { | 437 | static const int rows[] = { |
438 | 1 << 12, 1 << 13, 1 << 14, 1 << 15, | 438 | 1 << 12, 1 << 13, 1 << 14, 1 << 15, |
439 | 1 << 16, -EINVAL, -EINVAL, -EINVAL, | 439 | 1 << 16, -EINVAL, -EINVAL, -EINVAL, |
440 | }; | 440 | }; |
@@ -444,7 +444,7 @@ static inline int numrow(u32 row) | |||
444 | 444 | ||
445 | static inline int numcol(u32 col) | 445 | static inline int numcol(u32 col) |
446 | { | 446 | { |
447 | static int cols[8] = { | 447 | static const int cols[] = { |
448 | 1 << 10, 1 << 11, 1 << 12, -EINVAL, | 448 | 1 << 10, 1 << 11, 1 << 12, -EINVAL, |
449 | }; | 449 | }; |
450 | return cols[col & 0x3]; | 450 | return cols[col & 0x3]; |
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index da7e2986e3d5..57244f995614 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c | |||
@@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) | |||
639 | tmp_mb = (1 + pvt->tohm) >> 20; | 639 | tmp_mb = (1 + pvt->tohm) >> 20; |
640 | 640 | ||
641 | mb = div_u64_rem(tmp_mb, 1000, &kb); | 641 | mb = div_u64_rem(tmp_mb, 1000, &kb); |
642 | edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); | 642 | edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); |
643 | 643 | ||
644 | /* | 644 | /* |
645 | * Step 2) Get SAD range and SAD Interleave list | 645 | * Step 2) Get SAD range and SAD Interleave list |