aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2010-12-06 21:22:30 -0500
committerLen Brown <len.brown@intel.com>2010-12-13 23:42:12 -0500
commitf59c55d04b43bd72df8efa692dd07224fe94d1ac (patch)
tree31bcecf6a49230ae8735ba0f12ab4c30639c9094
parentc9aa308fd5c373faeda588cfb02b04f116904613 (diff)
ACPI, APEI, Add APEI generic error status printing support
In APEI, Hardware error information reported by firmware to Linux kernel is in the data structure of APEI generic error status (struct acpi_hes_generic_status). While now printk is used by Linux kernel to report hardware error information to user space. So, this patch adds printing support for the data structure, so that the corresponding hardware error information can be reported to user space via printk. PCIe AER information printing is not implemented yet. Will refactor the original PCIe AER information printing code to avoid code duplicating. The output format is as follow: <error record> := APEI generic hardware error status severity: <integer>, <severity string> section: <integer>, severity: <integer>, <severity string> flags: <integer> <section flags strings> fru_id: <uuid string> fru_text: <string> section_type: <section type string> <section data> <severity string>* := recoverable | fatal | corrected | info <section flags strings># := [primary][, containment warning][, reset][, threshold exceeded]\ [, resource not accessible][, latent error] <section type string> := generic processor error | memory error | \ PCIe error | unknown, <uuid string> <section data> := <generic processor section data> | <memory section data> | \ <pcie section data> | <null> <generic processor section data> := [processor_type: <integer>, <proc type string>] [processor_isa: <integer>, <proc isa string>] [error_type: <integer> <proc error type strings>] [operation: <integer>, <proc operation string>] [flags: <integer> <proc flags strings>] [level: <integer>] [version_info: <integer>] [processor_id: <integer>] [target_address: <integer>] [requestor_id: <integer>] [responder_id: <integer>] [IP: <integer>] <proc type string>* := IA32/X64 | IA64 <proc isa string>* := IA32 | IA64 | X64 <processor error type strings># := [cache error][, TLB error][, bus error][, micro-architectural error] <proc operation string>* := unknown or generic | data read | data write | \ instruction execution <proc flags strings># := [restartable][, precise IP][, overflow][, corrected] <memory section data> := [error_status: <integer>] [physical_address: <integer>] [physical_address_mask: <integer>] [node: <integer>] [card: <integer>] [module: <integer>] [bank: <integer>] [device: <integer>] [row: <integer>] [column: <integer>] [bit_position: <integer>] [requestor_id: <integer>] [responder_id: <integer>] [target_id: <integer>] [error_type: <integer>, <mem error type string>] <mem error type string>* := unknown | no error | single-bit ECC | multi-bit ECC | \ single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \ target abort | parity error | watchdog timeout | invalid address | \ mirror Broken | memory sparing | scrub corrected error | \ scrub uncorrected error <pcie section data> := [port_type: <integer>, <pcie port type string>] [version: <integer>.<integer>] [command: <integer>, status: <integer>] [device_id: <integer>:<integer>:<integer>.<integer> slot: <integer> secondary_bus: <integer> vendor_id: <integer>, device_id: <integer> class_code: <integer>] [serial number: <integer>, <integer>] [bridge: secondary_status: <integer>, control: <integer>] <pcie port type string>* := PCIe end point | legacy PCI end point | \ unknown | unknown | root port | upstream switch port | \ downstream switch port | PCIe to PCI/PCI-X bridge | \ PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \ root complex event collector Where, [] designate corresponding content is optional All <field string> description with * has the following format: field: <integer>, <field string> Where value of <integer> should be the position of "string" in <field string> description. Otherwise, <field string> will be "unknown". All <field strings> description with # has the following format: field: <integer> <field strings> Where each string in <fields strings> corresponding to one set bit of <integer>. The bit position is the position of "string" in <field strings> description. For more detailed explanation of every field, please refer to UEFI specification version 2.3 or later, section Appendix N: Common Platform Error Record. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Len Brown <len.brown@intel.com>
-rw-r--r--Documentation/acpi/apei/output_format.txt122
-rw-r--r--drivers/acpi/apei/apei-internal.h2
-rw-r--r--drivers/acpi/apei/cper.c311
3 files changed, 435 insertions, 0 deletions
diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
new file mode 100644
index 000000000000..9146952c612a
--- /dev/null
+++ b/Documentation/acpi/apei/output_format.txt
@@ -0,0 +1,122 @@
1 APEI output format
2 ~~~~~~~~~~~~~~~~~~
3
4APEI uses printk as hardware error reporting interface, the output
5format is as follow.
6
7<error record> :=
8APEI generic hardware error status
9severity: <integer>, <severity string>
10section: <integer>, severity: <integer>, <severity string>
11flags: <integer>
12<section flags strings>
13fru_id: <uuid string>
14fru_text: <string>
15section_type: <section type string>
16<section data>
17
18<severity string>* := recoverable | fatal | corrected | info
19
20<section flags strings># :=
21[primary][, containment warning][, reset][, threshold exceeded]\
22[, resource not accessible][, latent error]
23
24<section type string> := generic processor error | memory error | \
25PCIe error | unknown, <uuid string>
26
27<section data> :=
28<generic processor section data> | <memory section data> | \
29<pcie section data> | <null>
30
31<generic processor section data> :=
32[processor_type: <integer>, <proc type string>]
33[processor_isa: <integer>, <proc isa string>]
34[error_type: <integer>
35<proc error type strings>]
36[operation: <integer>, <proc operation string>]
37[flags: <integer>
38<proc flags strings>]
39[level: <integer>]
40[version_info: <integer>]
41[processor_id: <integer>]
42[target_address: <integer>]
43[requestor_id: <integer>]
44[responder_id: <integer>]
45[IP: <integer>]
46
47<proc type string>* := IA32/X64 | IA64
48
49<proc isa string>* := IA32 | IA64 | X64
50
51<processor error type strings># :=
52[cache error][, TLB error][, bus error][, micro-architectural error]
53
54<proc operation string>* := unknown or generic | data read | data write | \
55instruction execution
56
57<proc flags strings># :=
58[restartable][, precise IP][, overflow][, corrected]
59
60<memory section data> :=
61[error_status: <integer>]
62[physical_address: <integer>]
63[physical_address_mask: <integer>]
64[node: <integer>]
65[card: <integer>]
66[module: <integer>]
67[bank: <integer>]
68[device: <integer>]
69[row: <integer>]
70[column: <integer>]
71[bit_position: <integer>]
72[requestor_id: <integer>]
73[responder_id: <integer>]
74[target_id: <integer>]
75[error_type: <integer>, <mem error type string>]
76
77<mem error type string>* :=
78unknown | no error | single-bit ECC | multi-bit ECC | \
79single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
80target abort | parity error | watchdog timeout | invalid address | \
81mirror Broken | memory sparing | scrub corrected error | \
82scrub uncorrected error
83
84<pcie section data> :=
85[port_type: <integer>, <pcie port type string>]
86[version: <integer>.<integer>]
87[command: <integer>, status: <integer>]
88[device_id: <integer>:<integer>:<integer>.<integer>
89slot: <integer>
90secondary_bus: <integer>
91vendor_id: <integer>, device_id: <integer>
92class_code: <integer>]
93[serial number: <integer>, <integer>]
94[bridge: secondary_status: <integer>, control: <integer>]
95
96<pcie port type string>* := PCIe end point | legacy PCI end point | \
97unknown | unknown | root port | upstream switch port | \
98downstream switch port | PCIe to PCI/PCI-X bridge | \
99PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
100root complex event collector
101
102Where, [] designate corresponding content is optional
103
104All <field string> description with * has the following format:
105
106field: <integer>, <field string>
107
108Where value of <integer> should be the position of "string" in <field
109string> description. Otherwise, <field string> will be "unknown".
110
111All <field strings> description with # has the following format:
112
113field: <integer>
114<field strings>
115
116Where each string in <fields strings> corresponding to one set bit of
117<integer>. The bit position is the position of "string" in <field
118strings> description.
119
120For more detailed explanation of every field, please refer to UEFI
121specification version 2.3 or later, section Appendix N: Common
122Platform Error Record.
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 18df1e940276..ef0581f2094d 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -109,6 +109,8 @@ static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
109 return sizeof(*estatus) + estatus->data_length; 109 return sizeof(*estatus) + estatus->data_length;
110} 110}
111 111
112void apei_estatus_print(const char *pfx,
113 const struct acpi_hest_generic_status *estatus);
112int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); 114int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
113int apei_estatus_check(const struct acpi_hest_generic_status *estatus); 115int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
114#endif 116#endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index f4cf2fc4c8c1..31464a006d76 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -46,6 +46,317 @@ u64 cper_next_record_id(void)
46} 46}
47EXPORT_SYMBOL_GPL(cper_next_record_id); 47EXPORT_SYMBOL_GPL(cper_next_record_id);
48 48
49static const char *cper_severity_strs[] = {
50 "recoverable",
51 "fatal",
52 "corrected",
53 "info",
54};
55
56static const char *cper_severity_str(unsigned int severity)
57{
58 return severity < ARRAY_SIZE(cper_severity_strs) ?
59 cper_severity_strs[severity] : "unknown";
60}
61
62/*
63 * cper_print_bits - print strings for set bits
64 * @pfx: prefix for each line, including log level and prefix string
65 * @bits: bit mask
66 * @strs: string array, indexed by bit position
67 * @strs_size: size of the string array: @strs
68 *
69 * For each set bit in @bits, print the corresponding string in @strs.
70 * If the output length is longer than 80, multiple line will be
71 * printed, with @pfx is printed at the beginning of each line.
72 */
73static void cper_print_bits(const char *pfx, unsigned int bits,
74 const char *strs[], unsigned int strs_size)
75{
76 int i, len = 0;
77 const char *str;
78 char buf[84];
79
80 for (i = 0; i < strs_size; i++) {
81 if (!(bits & (1U << i)))
82 continue;
83 str = strs[i];
84 if (len && len + strlen(str) + 2 > 80) {
85 printk("%s\n", buf);
86 len = 0;
87 }
88 if (!len)
89 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
90 else
91 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
92 }
93 if (len)
94 printk("%s\n", buf);
95}
96
97static const char *cper_proc_type_strs[] = {
98 "IA32/X64",
99 "IA64",
100};
101
102static const char *cper_proc_isa_strs[] = {
103 "IA32",
104 "IA64",
105 "X64",
106};
107
108static const char *cper_proc_error_type_strs[] = {
109 "cache error",
110 "TLB error",
111 "bus error",
112 "micro-architectural error",
113};
114
115static const char *cper_proc_op_strs[] = {
116 "unknown or generic",
117 "data read",
118 "data write",
119 "instruction execution",
120};
121
122static const char *cper_proc_flag_strs[] = {
123 "restartable",
124 "precise IP",
125 "overflow",
126 "corrected",
127};
128
129static void cper_print_proc_generic(const char *pfx,
130 const struct cper_sec_proc_generic *proc)
131{
132 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
133 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
134 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
135 cper_proc_type_strs[proc->proc_type] : "unknown");
136 if (proc->validation_bits & CPER_PROC_VALID_ISA)
137 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
138 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
139 cper_proc_isa_strs[proc->proc_isa] : "unknown");
140 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
141 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
142 cper_print_bits(pfx, proc->proc_error_type,
143 cper_proc_error_type_strs,
144 ARRAY_SIZE(cper_proc_error_type_strs));
145 }
146 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
147 printk("%s""operation: %d, %s\n", pfx, proc->operation,
148 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
149 cper_proc_op_strs[proc->operation] : "unknown");
150 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
151 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
152 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
153 ARRAY_SIZE(cper_proc_flag_strs));
154 }
155 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
156 printk("%s""level: %d\n", pfx, proc->level);
157 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
158 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
159 if (proc->validation_bits & CPER_PROC_VALID_ID)
160 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
161 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
162 printk("%s""target_address: 0x%016llx\n",
163 pfx, proc->target_addr);
164 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
165 printk("%s""requestor_id: 0x%016llx\n",
166 pfx, proc->requestor_id);
167 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
168 printk("%s""responder_id: 0x%016llx\n",
169 pfx, proc->responder_id);
170 if (proc->validation_bits & CPER_PROC_VALID_IP)
171 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
172}
173
174static const char *cper_mem_err_type_strs[] = {
175 "unknown",
176 "no error",
177 "single-bit ECC",
178 "multi-bit ECC",
179 "single-symbol chipkill ECC",
180 "multi-symbol chipkill ECC",
181 "master abort",
182 "target abort",
183 "parity error",
184 "watchdog timeout",
185 "invalid address",
186 "mirror Broken",
187 "memory sparing",
188 "scrub corrected error",
189 "scrub uncorrected error",
190};
191
192static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
193{
194 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
195 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
196 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
197 printk("%s""physical_address: 0x%016llx\n",
198 pfx, mem->physical_addr);
199 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
200 printk("%s""physical_address_mask: 0x%016llx\n",
201 pfx, mem->physical_addr_mask);
202 if (mem->validation_bits & CPER_MEM_VALID_NODE)
203 printk("%s""node: %d\n", pfx, mem->node);
204 if (mem->validation_bits & CPER_MEM_VALID_CARD)
205 printk("%s""card: %d\n", pfx, mem->card);
206 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
207 printk("%s""module: %d\n", pfx, mem->module);
208 if (mem->validation_bits & CPER_MEM_VALID_BANK)
209 printk("%s""bank: %d\n", pfx, mem->bank);
210 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
211 printk("%s""device: %d\n", pfx, mem->device);
212 if (mem->validation_bits & CPER_MEM_VALID_ROW)
213 printk("%s""row: %d\n", pfx, mem->row);
214 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
215 printk("%s""column: %d\n", pfx, mem->column);
216 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
217 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
218 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
219 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
220 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
221 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
222 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
223 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
224 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
225 u8 etype = mem->error_type;
226 printk("%s""error_type: %d, %s\n", pfx, etype,
227 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
228 cper_mem_err_type_strs[etype] : "unknown");
229 }
230}
231
232static const char *cper_pcie_port_type_strs[] = {
233 "PCIe end point",
234 "legacy PCI end point",
235 "unknown",
236 "unknown",
237 "root port",
238 "upstream switch port",
239 "downstream switch port",
240 "PCIe to PCI/PCI-X bridge",
241 "PCI/PCI-X to PCIe bridge",
242 "root complex integrated endpoint device",
243 "root complex event collector",
244};
245
246static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
247{
248 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
249 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
250 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
251 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
252 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
253 printk("%s""version: %d.%d\n", pfx,
254 pcie->version.major, pcie->version.minor);
255 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
256 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
257 pcie->command, pcie->status);
258 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
259 const __u8 *p;
260 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
261 pcie->device_id.segment, pcie->device_id.bus,
262 pcie->device_id.device, pcie->device_id.function);
263 printk("%s""slot: %d\n", pfx,
264 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
265 printk("%s""secondary_bus: 0x%02x\n", pfx,
266 pcie->device_id.secondary_bus);
267 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
268 pcie->device_id.vendor_id, pcie->device_id.device_id);
269 p = pcie->device_id.class_code;
270 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
271 }
272 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
273 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
274 pcie->serial_number.lower, pcie->serial_number.upper);
275 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
276 printk(
277 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
278 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
279}
280
281static const char *apei_estatus_section_flag_strs[] = {
282 "primary",
283 "containment warning",
284 "reset",
285 "threshold exceeded",
286 "resource not accessible",
287 "latent error",
288};
289
290static void apei_estatus_print_section(
291 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
292{
293 uuid_le *sec_type = (uuid_le *)gdata->section_type;
294 __u16 severity;
295
296 severity = gdata->error_severity;
297 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
298 cper_severity_str(severity));
299 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
300 cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
301 ARRAY_SIZE(apei_estatus_section_flag_strs));
302 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
303 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
304 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
305 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
306
307 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
308 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
309 printk("%s""section_type: general processor error\n", pfx);
310 if (gdata->error_data_length >= sizeof(*proc_err))
311 cper_print_proc_generic(pfx, proc_err);
312 else
313 goto err_section_too_small;
314 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
315 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
316 printk("%s""section_type: memory error\n", pfx);
317 if (gdata->error_data_length >= sizeof(*mem_err))
318 cper_print_mem(pfx, mem_err);
319 else
320 goto err_section_too_small;
321 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
322 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
323 printk("%s""section_type: PCIe error\n", pfx);
324 if (gdata->error_data_length >= sizeof(*pcie))
325 cper_print_pcie(pfx, pcie);
326 else
327 goto err_section_too_small;
328 } else
329 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
330
331 return;
332
333err_section_too_small:
334 pr_err(FW_WARN "error section length is too small\n");
335}
336
337void apei_estatus_print(const char *pfx,
338 const struct acpi_hest_generic_status *estatus)
339{
340 struct acpi_hest_generic_data *gdata;
341 unsigned int data_len, gedata_len;
342 int sec_no = 0;
343 __u16 severity;
344
345 printk("%s""APEI generic hardware error status\n", pfx);
346 severity = estatus->error_severity;
347 printk("%s""severity: %d, %s\n", pfx, severity,
348 cper_severity_str(severity));
349 data_len = estatus->data_length;
350 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
351 while (data_len > sizeof(*gdata)) {
352 gedata_len = gdata->error_data_length;
353 apei_estatus_print_section(pfx, gdata, sec_no);
354 data_len -= gedata_len + sizeof(*gdata);
355 sec_no++;
356 }
357}
358EXPORT_SYMBOL_GPL(apei_estatus_print);
359
49int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus) 360int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
50{ 361{
51 if (estatus->data_length && 362 if (estatus->data_length &&