diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-01-24 08:49:10 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-01-24 08:49:10 -0500 |
commit | 7c3c867f8d044c539ab577f2d134054099d0e0bf (patch) | |
tree | fd4a5f153ee69456780b737eb080daa2bb1b827f | |
parent | 2a1337599b0b5629d3ff163f803659f658bb4a14 (diff) | |
parent | 2cced2d95961acd318e9395578a60ee424d9db80 (diff) |
Merge tag 'please-pull-aer-trace' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into perf/core
Use perf/event tracing to report PCI Express advanced errors, by
Tony Luck.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | drivers/acpi/apei/cper.c | 19 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_errprint.c | 63 | ||||
-rw-r--r-- | include/linux/aer.h | 4 | ||||
-rw-r--r-- | include/trace/events/ras.h | 77 |
4 files changed, 129 insertions, 34 deletions
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index e6defd86b424..1e5d8a40101e 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/time.h> | 29 | #include <linux/time.h> |
30 | #include <linux/cper.h> | 30 | #include <linux/cper.h> |
31 | #include <linux/acpi.h> | 31 | #include <linux/acpi.h> |
32 | #include <linux/pci.h> | ||
32 | #include <linux/aer.h> | 33 | #include <linux/aer.h> |
33 | 34 | ||
34 | /* | 35 | /* |
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = { | |||
249 | static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, | 250 | static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, |
250 | const struct acpi_hest_generic_data *gdata) | 251 | const struct acpi_hest_generic_data *gdata) |
251 | { | 252 | { |
253 | #ifdef CONFIG_ACPI_APEI_PCIEAER | ||
254 | struct pci_dev *dev; | ||
255 | #endif | ||
256 | |||
252 | if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) | 257 | if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) |
253 | printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, | 258 | printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, |
254 | pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? | 259 | pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? |
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, | |||
281 | "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", | 286 | "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", |
282 | pfx, pcie->bridge.secondary_status, pcie->bridge.control); | 287 | pfx, pcie->bridge.secondary_status, pcie->bridge.control); |
283 | #ifdef CONFIG_ACPI_APEI_PCIEAER | 288 | #ifdef CONFIG_ACPI_APEI_PCIEAER |
284 | if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) { | 289 | dev = pci_get_domain_bus_and_slot(pcie->device_id.segment, |
285 | struct aer_capability_regs *aer_regs = (void *)pcie->aer_info; | 290 | pcie->device_id.bus, pcie->device_id.function); |
286 | cper_print_aer(pfx, gdata->error_severity, aer_regs); | 291 | if (!dev) { |
292 | pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n", | ||
293 | pcie->device_id.segment, pcie->device_id.bus, | ||
294 | pcie->device_id.slot, pcie->device_id.function); | ||
295 | return; | ||
287 | } | 296 | } |
297 | if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) | ||
298 | cper_print_aer(pfx, dev, gdata->error_severity, | ||
299 | (struct aer_capability_regs *) pcie->aer_info); | ||
300 | pci_dev_put(dev); | ||
288 | #endif | 301 | #endif |
289 | } | 302 | } |
290 | 303 | ||
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 3ea51736f18d..5ab14251839d 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c | |||
@@ -23,6 +23,9 @@ | |||
23 | 23 | ||
24 | #include "aerdrv.h" | 24 | #include "aerdrv.h" |
25 | 25 | ||
26 | #define CREATE_TRACE_POINTS | ||
27 | #include <trace/events/ras.h> | ||
28 | |||
26 | #define AER_AGENT_RECEIVER 0 | 29 | #define AER_AGENT_RECEIVER 0 |
27 | #define AER_AGENT_REQUESTER 1 | 30 | #define AER_AGENT_REQUESTER 1 |
28 | #define AER_AGENT_COMPLETER 2 | 31 | #define AER_AGENT_COMPLETER 2 |
@@ -121,12 +124,11 @@ static const char *aer_agent_string[] = { | |||
121 | "Transmitter ID" | 124 | "Transmitter ID" |
122 | }; | 125 | }; |
123 | 126 | ||
124 | static void __aer_print_error(const char *prefix, | 127 | static void __aer_print_error(struct pci_dev *dev, |
125 | struct aer_err_info *info) | 128 | struct aer_err_info *info) |
126 | { | 129 | { |
127 | int i, status; | 130 | int i, status; |
128 | const char *errmsg = NULL; | 131 | const char *errmsg = NULL; |
129 | |||
130 | status = (info->status & ~info->mask); | 132 | status = (info->status & ~info->mask); |
131 | 133 | ||
132 | for (i = 0; i < 32; i++) { | 134 | for (i = 0; i < 32; i++) { |
@@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix, | |||
141 | aer_uncorrectable_error_string[i] : NULL; | 143 | aer_uncorrectable_error_string[i] : NULL; |
142 | 144 | ||
143 | if (errmsg) | 145 | if (errmsg) |
144 | printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg, | 146 | dev_err(&dev->dev, " [%2d] %-22s%s\n", i, errmsg, |
145 | info->first_error == i ? " (First)" : ""); | 147 | info->first_error == i ? " (First)" : ""); |
146 | else | 148 | else |
147 | printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i, | 149 | dev_err(&dev->dev, " [%2d] Unknown Error Bit%s\n", |
148 | info->first_error == i ? " (First)" : ""); | 150 | i, info->first_error == i ? " (First)" : ""); |
149 | } | 151 | } |
150 | } | 152 | } |
151 | 153 | ||
152 | void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) | 154 | void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) |
153 | { | 155 | { |
154 | int id = ((dev->bus->number << 8) | dev->devfn); | 156 | int id = ((dev->bus->number << 8) | dev->devfn); |
155 | char prefix[44]; | ||
156 | |||
157 | snprintf(prefix, sizeof(prefix), "%s%s %s: ", | ||
158 | (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR, | ||
159 | dev_driver_string(&dev->dev), dev_name(&dev->dev)); | ||
160 | 157 | ||
161 | if (info->status == 0) { | 158 | if (info->status == 0) { |
162 | printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, " | 159 | dev_err(&dev->dev, |
163 | "id=%04x(Unregistered Agent ID)\n", prefix, | 160 | "PCIe Bus Error: severity=%s, type=Unaccessible, " |
161 | "id=%04x(Unregistered Agent ID)\n", | ||
164 | aer_error_severity_string[info->severity], id); | 162 | aer_error_severity_string[info->severity], id); |
165 | } else { | 163 | } else { |
166 | int layer, agent; | 164 | int layer, agent; |
@@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) | |||
168 | layer = AER_GET_LAYER_ERROR(info->severity, info->status); | 166 | layer = AER_GET_LAYER_ERROR(info->severity, info->status); |
169 | agent = AER_GET_AGENT(info->severity, info->status); | 167 | agent = AER_GET_AGENT(info->severity, info->status); |
170 | 168 | ||
171 | printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", | 169 | dev_err(&dev->dev, |
172 | prefix, aer_error_severity_string[info->severity], | 170 | "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", |
171 | aer_error_severity_string[info->severity], | ||
173 | aer_error_layer[layer], id, aer_agent_string[agent]); | 172 | aer_error_layer[layer], id, aer_agent_string[agent]); |
174 | 173 | ||
175 | printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n", | 174 | dev_err(&dev->dev, |
176 | prefix, dev->vendor, dev->device, | 175 | " device [%04x:%04x] error status/mask=%08x/%08x\n", |
176 | dev->vendor, dev->device, | ||
177 | info->status, info->mask); | 177 | info->status, info->mask); |
178 | 178 | ||
179 | __aer_print_error(prefix, info); | 179 | __aer_print_error(dev, info); |
180 | 180 | ||
181 | if (info->tlp_header_valid) { | 181 | if (info->tlp_header_valid) { |
182 | unsigned char *tlp = (unsigned char *) &info->tlp; | 182 | unsigned char *tlp = (unsigned char *) &info->tlp; |
183 | printk("%s"" TLP Header:" | 183 | dev_err(&dev->dev, " TLP Header:" |
184 | " %02x%02x%02x%02x %02x%02x%02x%02x" | 184 | " %02x%02x%02x%02x %02x%02x%02x%02x" |
185 | " %02x%02x%02x%02x %02x%02x%02x%02x\n", | 185 | " %02x%02x%02x%02x %02x%02x%02x%02x\n", |
186 | prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, | 186 | *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, |
187 | *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), | 187 | *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), |
188 | *(tlp + 11), *(tlp + 10), *(tlp + 9), | 188 | *(tlp + 11), *(tlp + 10), *(tlp + 9), |
189 | *(tlp + 8), *(tlp + 15), *(tlp + 14), | 189 | *(tlp + 8), *(tlp + 15), *(tlp + 14), |
@@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) | |||
192 | } | 192 | } |
193 | 193 | ||
194 | if (info->id && info->error_dev_num > 1 && info->id == id) | 194 | if (info->id && info->error_dev_num > 1 && info->id == id) |
195 | printk("%s"" Error of this Agent(%04x) is reported first\n", | 195 | dev_err(&dev->dev, |
196 | prefix, id); | 196 | " Error of this Agent(%04x) is reported first\n", |
197 | id); | ||
198 | trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), | ||
199 | info->severity); | ||
197 | } | 200 | } |
198 | 201 | ||
199 | void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) | 202 | void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) |
@@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity) | |||
217 | } | 220 | } |
218 | EXPORT_SYMBOL_GPL(cper_severity_to_aer); | 221 | EXPORT_SYMBOL_GPL(cper_severity_to_aer); |
219 | 222 | ||
220 | void cper_print_aer(const char *prefix, int cper_severity, | 223 | void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity, |
221 | struct aer_capability_regs *aer) | 224 | struct aer_capability_regs *aer) |
222 | { | 225 | { |
223 | int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; | 226 | int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; |
@@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity, | |||
239 | } | 242 | } |
240 | layer = AER_GET_LAYER_ERROR(aer_severity, status); | 243 | layer = AER_GET_LAYER_ERROR(aer_severity, status); |
241 | agent = AER_GET_AGENT(aer_severity, status); | 244 | agent = AER_GET_AGENT(aer_severity, status); |
242 | printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n", | 245 | dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", |
243 | prefix, status, mask); | 246 | status, mask); |
244 | cper_print_bits(prefix, status, status_strs, status_strs_size); | 247 | cper_print_bits(prefix, status, status_strs, status_strs_size); |
245 | printk("%s""aer_layer=%s, aer_agent=%s\n", prefix, | 248 | dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n", |
246 | aer_error_layer[layer], aer_agent_string[agent]); | 249 | aer_error_layer[layer], aer_agent_string[agent]); |
247 | if (aer_severity != AER_CORRECTABLE) | 250 | if (aer_severity != AER_CORRECTABLE) |
248 | printk("%s""aer_uncor_severity: 0x%08x\n", | 251 | dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n", |
249 | prefix, aer->uncor_severity); | 252 | aer->uncor_severity); |
250 | if (tlp_header_valid) { | 253 | if (tlp_header_valid) { |
251 | const unsigned char *tlp; | 254 | const unsigned char *tlp; |
252 | tlp = (const unsigned char *)&aer->header_log; | 255 | tlp = (const unsigned char *)&aer->header_log; |
253 | printk("%s""aer_tlp_header:" | 256 | dev_err(&dev->dev, "aer_tlp_header:" |
254 | " %02x%02x%02x%02x %02x%02x%02x%02x" | 257 | " %02x%02x%02x%02x %02x%02x%02x%02x" |
255 | " %02x%02x%02x%02x %02x%02x%02x%02x\n", | 258 | " %02x%02x%02x%02x %02x%02x%02x%02x\n", |
256 | prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, | 259 | *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, |
257 | *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), | 260 | *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), |
258 | *(tlp + 11), *(tlp + 10), *(tlp + 9), | 261 | *(tlp + 11), *(tlp + 10), *(tlp + 9), |
259 | *(tlp + 8), *(tlp + 15), *(tlp + 14), | 262 | *(tlp + 8), *(tlp + 15), *(tlp + 14), |
260 | *(tlp + 13), *(tlp + 12)); | 263 | *(tlp + 13), *(tlp + 12)); |
261 | } | 264 | } |
265 | trace_aer_event(dev_name(&dev->dev), (status & ~mask), | ||
266 | aer_severity); | ||
262 | } | 267 | } |
263 | #endif | 268 | #endif |
diff --git a/include/linux/aer.h b/include/linux/aer.h index 544abdb2238c..ec10e1b24c1c 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h | |||
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) | |||
49 | } | 49 | } |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | extern void cper_print_aer(const char *prefix, int cper_severity, | 52 | extern void cper_print_aer(const char *prefix, struct pci_dev *dev, |
53 | struct aer_capability_regs *aer); | 53 | int cper_severity, struct aer_capability_regs *aer); |
54 | extern int cper_severity_to_aer(int cper_severity); | 54 | extern int cper_severity_to_aer(int cper_severity); |
55 | extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, | 55 | extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, |
56 | int severity); | 56 | int severity); |
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h new file mode 100644 index 000000000000..88b878383797 --- /dev/null +++ b/include/trace/events/ras.h | |||
@@ -0,0 +1,77 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM ras | ||
3 | |||
4 | #if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_AER_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | #include <linux/edac.h> | ||
9 | |||
10 | |||
11 | /* | ||
12 | * PCIe AER Trace event | ||
13 | * | ||
14 | * These events are generated when hardware detects a corrected or | ||
15 | * uncorrected event on a PCIe device. The event report has | ||
16 | * the following structure: | ||
17 | * | ||
18 | * char * dev_name - The name of the slot where the device resides | ||
19 | * ([domain:]bus:device.function). | ||
20 | * u32 status - Either the correctable or uncorrectable register | ||
21 | * indicating what error or errors have been seen | ||
22 | * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED | ||
23 | */ | ||
24 | |||
25 | #define aer_correctable_errors \ | ||
26 | {BIT(0), "Receiver Error"}, \ | ||
27 | {BIT(6), "Bad TLP"}, \ | ||
28 | {BIT(7), "Bad DLLP"}, \ | ||
29 | {BIT(8), "RELAY_NUM Rollover"}, \ | ||
30 | {BIT(12), "Replay Timer Timeout"}, \ | ||
31 | {BIT(13), "Advisory Non-Fatal"} | ||
32 | |||
33 | #define aer_uncorrectable_errors \ | ||
34 | {BIT(4), "Data Link Protocol"}, \ | ||
35 | {BIT(12), "Poisoned TLP"}, \ | ||
36 | {BIT(13), "Flow Control Protocol"}, \ | ||
37 | {BIT(14), "Completion Timeout"}, \ | ||
38 | {BIT(15), "Completer Abort"}, \ | ||
39 | {BIT(16), "Unexpected Completion"}, \ | ||
40 | {BIT(17), "Receiver Overflow"}, \ | ||
41 | {BIT(18), "Malformed TLP"}, \ | ||
42 | {BIT(19), "ECRC"}, \ | ||
43 | {BIT(20), "Unsupported Request"} | ||
44 | |||
45 | TRACE_EVENT(aer_event, | ||
46 | TP_PROTO(const char *dev_name, | ||
47 | const u32 status, | ||
48 | const u8 severity), | ||
49 | |||
50 | TP_ARGS(dev_name, status, severity), | ||
51 | |||
52 | TP_STRUCT__entry( | ||
53 | __string( dev_name, dev_name ) | ||
54 | __field( u32, status ) | ||
55 | __field( u8, severity ) | ||
56 | ), | ||
57 | |||
58 | TP_fast_assign( | ||
59 | __assign_str(dev_name, dev_name); | ||
60 | __entry->status = status; | ||
61 | __entry->severity = severity; | ||
62 | ), | ||
63 | |||
64 | TP_printk("%s PCIe Bus Error: severity=%s, %s\n", | ||
65 | __get_str(dev_name), | ||
66 | __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : | ||
67 | __entry->severity == HW_EVENT_ERR_FATAL ? | ||
68 | "Fatal" : "Uncorrected", | ||
69 | __entry->severity == HW_EVENT_ERR_CORRECTED ? | ||
70 | __print_flags(__entry->status, "|", aer_correctable_errors) : | ||
71 | __print_flags(__entry->status, "|", aer_uncorrectable_errors)) | ||
72 | ); | ||
73 | |||
74 | #endif /* _TRACE_AER_H */ | ||
75 | |||
76 | /* This part must be outside protection */ | ||
77 | #include <trace/define_trace.h> | ||