diff options
author | Roland Dreier <rolandd@cisco.com> | 2005-10-27 14:03:38 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2005-10-27 14:03:38 -0400 |
commit | 3d155f8cd0d077938d271225d26ee52f8eb26082 (patch) | |
tree | 28f65ed2dc5e9f12884daf2a97f50367f145d8a0 /drivers/infiniband/hw/mthca/mthca_catas.c | |
parent | 7cc656efb560cda66b5ed48444cad7556ea4fe99 (diff) |
[IB] mthca: first pass at catastrophic error reporting
Add some initial support for detecting and reporting catastrophic
errors reported by Mellanox HCAs. We start a periodic timer which
polls the catastrophic error reporting buffer in device memory. If an
error is detected, we dump the contents of the buffer for port-mortem
debugging, and report a fatal asynchronous error to higher levels.
In the future we can try to recover from these errors by resetting the
device, but this will require some work in higher-level code as well.
Let's get this in now, so that we at least get catastrophic errors
reported in logs.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/mthca/mthca_catas.c')
-rw-r--r-- | drivers/infiniband/hw/mthca/mthca_catas.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c new file mode 100644 index 000000000000..7ac52af43b99 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_catas.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005 Cisco Systems. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | * $Id$ | ||
33 | */ | ||
34 | |||
35 | #include "mthca_dev.h" | ||
36 | |||
37 | enum { | ||
38 | MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, | ||
39 | |||
40 | MTHCA_CATAS_TYPE_INTERNAL = 0, | ||
41 | MTHCA_CATAS_TYPE_UPLINK = 3, | ||
42 | MTHCA_CATAS_TYPE_DDR = 4, | ||
43 | MTHCA_CATAS_TYPE_PARITY = 5, | ||
44 | }; | ||
45 | |||
46 | static DEFINE_SPINLOCK(catas_lock); | ||
47 | |||
48 | static void handle_catas(struct mthca_dev *dev) | ||
49 | { | ||
50 | struct ib_event event; | ||
51 | const char *type; | ||
52 | int i; | ||
53 | |||
54 | event.device = &dev->ib_dev; | ||
55 | event.event = IB_EVENT_DEVICE_FATAL; | ||
56 | event.element.port_num = 0; | ||
57 | |||
58 | ib_dispatch_event(&event); | ||
59 | |||
60 | switch (swab32(readl(dev->catas_err.map)) >> 24) { | ||
61 | case MTHCA_CATAS_TYPE_INTERNAL: | ||
62 | type = "internal error"; | ||
63 | break; | ||
64 | case MTHCA_CATAS_TYPE_UPLINK: | ||
65 | type = "uplink bus error"; | ||
66 | break; | ||
67 | case MTHCA_CATAS_TYPE_DDR: | ||
68 | type = "DDR data error"; | ||
69 | break; | ||
70 | case MTHCA_CATAS_TYPE_PARITY: | ||
71 | type = "internal parity error"; | ||
72 | break; | ||
73 | default: | ||
74 | type = "unknown error"; | ||
75 | break; | ||
76 | } | ||
77 | |||
78 | mthca_err(dev, "Catastrophic error detected: %s\n", type); | ||
79 | for (i = 0; i < dev->catas_err.size; ++i) | ||
80 | mthca_err(dev, " buf[%02x]: %08x\n", | ||
81 | i, swab32(readl(dev->catas_err.map + i))); | ||
82 | } | ||
83 | |||
84 | static void poll_catas(unsigned long dev_ptr) | ||
85 | { | ||
86 | struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; | ||
87 | unsigned long flags; | ||
88 | int i; | ||
89 | |||
90 | for (i = 0; i < dev->catas_err.size; ++i) | ||
91 | if (readl(dev->catas_err.map + i)) { | ||
92 | handle_catas(dev); | ||
93 | return; | ||
94 | } | ||
95 | |||
96 | spin_lock_irqsave(&catas_lock, flags); | ||
97 | if (dev->catas_err.stop) | ||
98 | mod_timer(&dev->catas_err.timer, | ||
99 | jiffies + MTHCA_CATAS_POLL_INTERVAL); | ||
100 | spin_unlock_irqrestore(&catas_lock, flags); | ||
101 | |||
102 | return; | ||
103 | } | ||
104 | |||
105 | void mthca_start_catas_poll(struct mthca_dev *dev) | ||
106 | { | ||
107 | unsigned long addr; | ||
108 | |||
109 | init_timer(&dev->catas_err.timer); | ||
110 | dev->catas_err.stop = 0; | ||
111 | dev->catas_err.map = NULL; | ||
112 | |||
113 | addr = pci_resource_start(dev->pdev, 0) + | ||
114 | ((pci_resource_len(dev->pdev, 0) - 1) & | ||
115 | dev->catas_err.addr); | ||
116 | |||
117 | if (!request_mem_region(addr, dev->catas_err.size * 4, | ||
118 | DRV_NAME)) { | ||
119 | mthca_warn(dev, "couldn't request catastrophic error region " | ||
120 | "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); | ||
121 | return; | ||
122 | } | ||
123 | |||
124 | dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); | ||
125 | if (!dev->catas_err.map) { | ||
126 | mthca_warn(dev, "couldn't map catastrophic error region " | ||
127 | "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); | ||
128 | release_mem_region(addr, dev->catas_err.size * 4); | ||
129 | return; | ||
130 | } | ||
131 | |||
132 | dev->catas_err.timer.data = (unsigned long) dev; | ||
133 | dev->catas_err.timer.function = poll_catas; | ||
134 | dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; | ||
135 | add_timer(&dev->catas_err.timer); | ||
136 | } | ||
137 | |||
138 | void mthca_stop_catas_poll(struct mthca_dev *dev) | ||
139 | { | ||
140 | spin_lock_irq(&catas_lock); | ||
141 | dev->catas_err.stop = 1; | ||
142 | spin_unlock_irq(&catas_lock); | ||
143 | |||
144 | del_timer_sync(&dev->catas_err.timer); | ||
145 | |||
146 | if (dev->catas_err.map) { | ||
147 | iounmap(dev->catas_err.map); | ||
148 | release_mem_region(pci_resource_start(dev->pdev, 0) + | ||
149 | ((pci_resource_len(dev->pdev, 0) - 1) & | ||
150 | dev->catas_err.addr), | ||
151 | dev->catas_err.size * 4); | ||
152 | } | ||
153 | } | ||