aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-01-24 08:49:10 -0500
committerIngo Molnar <mingo@kernel.org>2013-01-24 08:49:10 -0500
commit7c3c867f8d044c539ab577f2d134054099d0e0bf (patch)
treefd4a5f153ee69456780b737eb080daa2bb1b827f
parent2a1337599b0b5629d3ff163f803659f658bb4a14 (diff)
parent2cced2d95961acd318e9395578a60ee424d9db80 (diff)
Merge tag 'please-pull-aer-trace' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into perf/core
Use perf/event tracing to report PCI Express advanced errors, by Tony Luck. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--drivers/acpi/apei/cper.c19
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c63
-rw-r--r--include/linux/aer.h4
-rw-r--r--include/trace/events/ras.h77
4 files changed, 129 insertions, 34 deletions
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index e6defd86b424..1e5d8a40101e 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
29#include <linux/time.h> 29#include <linux/time.h>
30#include <linux/cper.h> 30#include <linux/cper.h>
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/pci.h>
32#include <linux/aer.h> 33#include <linux/aer.h>
33 34
34/* 35/*
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
249static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, 250static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
250 const struct acpi_hest_generic_data *gdata) 251 const struct acpi_hest_generic_data *gdata)
251{ 252{
253#ifdef CONFIG_ACPI_APEI_PCIEAER
254 struct pci_dev *dev;
255#endif
256
252 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 257 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
253 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 258 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
254 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? 259 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
281 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 286 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
282 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 287 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
283#ifdef CONFIG_ACPI_APEI_PCIEAER 288#ifdef CONFIG_ACPI_APEI_PCIEAER
284 if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) { 289 dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
285 struct aer_capability_regs *aer_regs = (void *)pcie->aer_info; 290 pcie->device_id.bus, pcie->device_id.function);
286 cper_print_aer(pfx, gdata->error_severity, aer_regs); 291 if (!dev) {
292 pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
293 pcie->device_id.segment, pcie->device_id.bus,
294 pcie->device_id.slot, pcie->device_id.function);
295 return;
287 } 296 }
297 if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
298 cper_print_aer(pfx, dev, gdata->error_severity,
299 (struct aer_capability_regs *) pcie->aer_info);
300 pci_dev_put(dev);
288#endif 301#endif
289} 302}
290 303
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 3ea51736f18d..5ab14251839d 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,9 @@
23 23
24#include "aerdrv.h" 24#include "aerdrv.h"
25 25
26#define CREATE_TRACE_POINTS
27#include <trace/events/ras.h>
28
26#define AER_AGENT_RECEIVER 0 29#define AER_AGENT_RECEIVER 0
27#define AER_AGENT_REQUESTER 1 30#define AER_AGENT_REQUESTER 1
28#define AER_AGENT_COMPLETER 2 31#define AER_AGENT_COMPLETER 2
@@ -121,12 +124,11 @@ static const char *aer_agent_string[] = {
121 "Transmitter ID" 124 "Transmitter ID"
122}; 125};
123 126
124static void __aer_print_error(const char *prefix, 127static void __aer_print_error(struct pci_dev *dev,
125 struct aer_err_info *info) 128 struct aer_err_info *info)
126{ 129{
127 int i, status; 130 int i, status;
128 const char *errmsg = NULL; 131 const char *errmsg = NULL;
129
130 status = (info->status & ~info->mask); 132 status = (info->status & ~info->mask);
131 133
132 for (i = 0; i < 32; i++) { 134 for (i = 0; i < 32; i++) {
@@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix,
141 aer_uncorrectable_error_string[i] : NULL; 143 aer_uncorrectable_error_string[i] : NULL;
142 144
143 if (errmsg) 145 if (errmsg)
144 printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg, 146 dev_err(&dev->dev, " [%2d] %-22s%s\n", i, errmsg,
145 info->first_error == i ? " (First)" : ""); 147 info->first_error == i ? " (First)" : "");
146 else 148 else
147 printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i, 149 dev_err(&dev->dev, " [%2d] Unknown Error Bit%s\n",
148 info->first_error == i ? " (First)" : ""); 150 i, info->first_error == i ? " (First)" : "");
149 } 151 }
150} 152}
151 153
152void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 154void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
153{ 155{
154 int id = ((dev->bus->number << 8) | dev->devfn); 156 int id = ((dev->bus->number << 8) | dev->devfn);
155 char prefix[44];
156
157 snprintf(prefix, sizeof(prefix), "%s%s %s: ",
158 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
159 dev_driver_string(&dev->dev), dev_name(&dev->dev));
160 157
161 if (info->status == 0) { 158 if (info->status == 0) {
162 printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, " 159 dev_err(&dev->dev,
163 "id=%04x(Unregistered Agent ID)\n", prefix, 160 "PCIe Bus Error: severity=%s, type=Unaccessible, "
161 "id=%04x(Unregistered Agent ID)\n",
164 aer_error_severity_string[info->severity], id); 162 aer_error_severity_string[info->severity], id);
165 } else { 163 } else {
166 int layer, agent; 164 int layer, agent;
@@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
168 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 166 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
169 agent = AER_GET_AGENT(info->severity, info->status); 167 agent = AER_GET_AGENT(info->severity, info->status);
170 168
171 printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", 169 dev_err(&dev->dev,
172 prefix, aer_error_severity_string[info->severity], 170 "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
171 aer_error_severity_string[info->severity],
173 aer_error_layer[layer], id, aer_agent_string[agent]); 172 aer_error_layer[layer], id, aer_agent_string[agent]);
174 173
175 printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n", 174 dev_err(&dev->dev,
176 prefix, dev->vendor, dev->device, 175 " device [%04x:%04x] error status/mask=%08x/%08x\n",
176 dev->vendor, dev->device,
177 info->status, info->mask); 177 info->status, info->mask);
178 178
179 __aer_print_error(prefix, info); 179 __aer_print_error(dev, info);
180 180
181 if (info->tlp_header_valid) { 181 if (info->tlp_header_valid) {
182 unsigned char *tlp = (unsigned char *) &info->tlp; 182 unsigned char *tlp = (unsigned char *) &info->tlp;
183 printk("%s"" TLP Header:" 183 dev_err(&dev->dev, " TLP Header:"
184 " %02x%02x%02x%02x %02x%02x%02x%02x" 184 " %02x%02x%02x%02x %02x%02x%02x%02x"
185 " %02x%02x%02x%02x %02x%02x%02x%02x\n", 185 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
186 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 186 *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
187 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 187 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
188 *(tlp + 11), *(tlp + 10), *(tlp + 9), 188 *(tlp + 11), *(tlp + 10), *(tlp + 9),
189 *(tlp + 8), *(tlp + 15), *(tlp + 14), 189 *(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
192 } 192 }
193 193
194 if (info->id && info->error_dev_num > 1 && info->id == id) 194 if (info->id && info->error_dev_num > 1 && info->id == id)
195 printk("%s"" Error of this Agent(%04x) is reported first\n", 195 dev_err(&dev->dev,
196 prefix, id); 196 " Error of this Agent(%04x) is reported first\n",
197 id);
198 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
199 info->severity);
197} 200}
198 201
199void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 202void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity)
217} 220}
218EXPORT_SYMBOL_GPL(cper_severity_to_aer); 221EXPORT_SYMBOL_GPL(cper_severity_to_aer);
219 222
220void cper_print_aer(const char *prefix, int cper_severity, 223void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
221 struct aer_capability_regs *aer) 224 struct aer_capability_regs *aer)
222{ 225{
223 int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; 226 int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity,
239 } 242 }
240 layer = AER_GET_LAYER_ERROR(aer_severity, status); 243 layer = AER_GET_LAYER_ERROR(aer_severity, status);
241 agent = AER_GET_AGENT(aer_severity, status); 244 agent = AER_GET_AGENT(aer_severity, status);
242 printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n", 245 dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
243 prefix, status, mask); 246 status, mask);
244 cper_print_bits(prefix, status, status_strs, status_strs_size); 247 cper_print_bits(prefix, status, status_strs, status_strs_size);
245 printk("%s""aer_layer=%s, aer_agent=%s\n", prefix, 248 dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
246 aer_error_layer[layer], aer_agent_string[agent]); 249 aer_error_layer[layer], aer_agent_string[agent]);
247 if (aer_severity != AER_CORRECTABLE) 250 if (aer_severity != AER_CORRECTABLE)
248 printk("%s""aer_uncor_severity: 0x%08x\n", 251 dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n",
249 prefix, aer->uncor_severity); 252 aer->uncor_severity);
250 if (tlp_header_valid) { 253 if (tlp_header_valid) {
251 const unsigned char *tlp; 254 const unsigned char *tlp;
252 tlp = (const unsigned char *)&aer->header_log; 255 tlp = (const unsigned char *)&aer->header_log;
253 printk("%s""aer_tlp_header:" 256 dev_err(&dev->dev, "aer_tlp_header:"
254 " %02x%02x%02x%02x %02x%02x%02x%02x" 257 " %02x%02x%02x%02x %02x%02x%02x%02x"
255 " %02x%02x%02x%02x %02x%02x%02x%02x\n", 258 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
256 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 259 *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
257 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 260 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
258 *(tlp + 11), *(tlp + 10), *(tlp + 9), 261 *(tlp + 11), *(tlp + 10), *(tlp + 9),
259 *(tlp + 8), *(tlp + 15), *(tlp + 14), 262 *(tlp + 8), *(tlp + 15), *(tlp + 14),
260 *(tlp + 13), *(tlp + 12)); 263 *(tlp + 13), *(tlp + 12));
261 } 264 }
265 trace_aer_event(dev_name(&dev->dev), (status & ~mask),
266 aer_severity);
262} 267}
263#endif 268#endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 544abdb2238c..ec10e1b24c1c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
49} 49}
50#endif 50#endif
51 51
52extern void cper_print_aer(const char *prefix, int cper_severity, 52extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
53 struct aer_capability_regs *aer); 53 int cper_severity, struct aer_capability_regs *aer);
54extern int cper_severity_to_aer(int cper_severity); 54extern int cper_severity_to_aer(int cper_severity);
55extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, 55extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
56 int severity); 56 int severity);
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
new file mode 100644
index 000000000000..88b878383797
--- /dev/null
+++ b/include/trace/events/ras.h
@@ -0,0 +1,77 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ras
3
4#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_AER_H
6
7#include <linux/tracepoint.h>
8#include <linux/edac.h>
9
10
11/*
12 * PCIe AER Trace event
13 *
14 * These events are generated when hardware detects a corrected or
15 * uncorrected event on a PCIe device. The event report has
16 * the following structure:
17 *
18 * char * dev_name - The name of the slot where the device resides
19 * ([domain:]bus:device.function).
20 * u32 status - Either the correctable or uncorrectable register
21 * indicating what error or errors have been seen
22 * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
23 */
24
25#define aer_correctable_errors \
26 {BIT(0), "Receiver Error"}, \
27 {BIT(6), "Bad TLP"}, \
28 {BIT(7), "Bad DLLP"}, \
29 {BIT(8), "RELAY_NUM Rollover"}, \
30 {BIT(12), "Replay Timer Timeout"}, \
31 {BIT(13), "Advisory Non-Fatal"}
32
33#define aer_uncorrectable_errors \
34 {BIT(4), "Data Link Protocol"}, \
35 {BIT(12), "Poisoned TLP"}, \
36 {BIT(13), "Flow Control Protocol"}, \
37 {BIT(14), "Completion Timeout"}, \
38 {BIT(15), "Completer Abort"}, \
39 {BIT(16), "Unexpected Completion"}, \
40 {BIT(17), "Receiver Overflow"}, \
41 {BIT(18), "Malformed TLP"}, \
42 {BIT(19), "ECRC"}, \
43 {BIT(20), "Unsupported Request"}
44
45TRACE_EVENT(aer_event,
46 TP_PROTO(const char *dev_name,
47 const u32 status,
48 const u8 severity),
49
50 TP_ARGS(dev_name, status, severity),
51
52 TP_STRUCT__entry(
53 __string( dev_name, dev_name )
54 __field( u32, status )
55 __field( u8, severity )
56 ),
57
58 TP_fast_assign(
59 __assign_str(dev_name, dev_name);
60 __entry->status = status;
61 __entry->severity = severity;
62 ),
63
64 TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
65 __get_str(dev_name),
66 __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
67 __entry->severity == HW_EVENT_ERR_FATAL ?
68 "Fatal" : "Uncorrected",
69 __entry->severity == HW_EVENT_ERR_CORRECTED ?
70 __print_flags(__entry->status, "|", aer_correctable_errors) :
71 __print_flags(__entry->status, "|", aer_uncorrectable_errors))
72);
73
74#endif /* _TRACE_AER_H */
75
76/* This part must be outside protection */
77#include <trace/define_trace.h>