aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx4/catas.c
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2007-07-12 10:50:45 -0400
committerRoland Dreier <rolandd@cisco.com>2007-07-17 21:37:42 -0400
commitee49bd9397cd2b8fe7a1962505d81c1d0a1366fc (patch)
tree064800f1ebcf6c2586f6727aa03c85e875b96289 /drivers/net/mlx4/catas.c
parent41179e2de6962b46d1d9f2b4437243ac740efdec (diff)
mlx4_core: Reset device when internal error is detected
Reset the device when an internal error is detected. Also, detect errors by polling the error buffer rather than using interrupts. This is more robust and doesn't depend on MSI-X. Remove the old interrupt handler entirely, since we don't want to support two mechanisms for detecting internal errors. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/net/mlx4/catas.c')
-rw-r--r--drivers/net/mlx4/catas.c106
1 files changed, 99 insertions, 7 deletions
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index 1bb088aeaf71..6b32ec94b3a8 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -30,41 +30,133 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#include <linux/workqueue.h>
34
33#include "mlx4.h" 35#include "mlx4.h"
34 36
35void mlx4_handle_catas_err(struct mlx4_dev *dev) 37enum {
38 MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
39};
40
41static DEFINE_SPINLOCK(catas_lock);
42
43static LIST_HEAD(catas_list);
44static struct workqueue_struct *catas_wq;
45static struct work_struct catas_work;
46
47static int internal_err_reset = 1;
48module_param(internal_err_reset, int, 0644);
49MODULE_PARM_DESC(internal_err_reset,
50 "Reset device on internal errors if non-zero (default 1)");
51
52static void dump_err_buf(struct mlx4_dev *dev)
36{ 53{
37 struct mlx4_priv *priv = mlx4_priv(dev); 54 struct mlx4_priv *priv = mlx4_priv(dev);
38 55
39 int i; 56 int i;
40 57
41 mlx4_err(dev, "Catastrophic error detected:\n"); 58 mlx4_err(dev, "Internal error detected:\n");
42 for (i = 0; i < priv->fw.catas_size; ++i) 59 for (i = 0; i < priv->fw.catas_size; ++i)
43 mlx4_err(dev, " buf[%02x]: %08x\n", 60 mlx4_err(dev, " buf[%02x]: %08x\n",
44 i, swab32(readl(priv->catas_err.map + i))); 61 i, swab32(readl(priv->catas_err.map + i)));
62}
45 63
46 mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); 64static void poll_catas(unsigned long dev_ptr)
65{
66 struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
67 struct mlx4_priv *priv = mlx4_priv(dev);
68
69 if (readl(priv->catas_err.map)) {
70 dump_err_buf(dev);
71
72 mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
73
74 if (internal_err_reset) {
75 spin_lock(&catas_lock);
76 list_add(&priv->catas_err.list, &catas_list);
77 spin_unlock(&catas_lock);
78
79 queue_work(catas_wq, &catas_work);
80 }
81 } else
82 mod_timer(&priv->catas_err.timer,
83 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
47} 84}
48 85
49void mlx4_map_catas_buf(struct mlx4_dev *dev) 86static void catas_reset(struct work_struct *work)
87{
88 struct mlx4_priv *priv, *tmppriv;
89 struct mlx4_dev *dev;
90
91 LIST_HEAD(tlist);
92 int ret;
93
94 spin_lock_irq(&catas_lock);
95 list_splice_init(&catas_list, &tlist);
96 spin_unlock_irq(&catas_lock);
97
98 list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
99 ret = mlx4_restart_one(priv->dev.pdev);
100 dev = &priv->dev;
101 if (ret)
102 mlx4_err(dev, "Reset failed (%d)\n", ret);
103 else
104 mlx4_dbg(dev, "Reset succeeded\n");
105 }
106}
107
108void mlx4_start_catas_poll(struct mlx4_dev *dev)
50{ 109{
51 struct mlx4_priv *priv = mlx4_priv(dev); 110 struct mlx4_priv *priv = mlx4_priv(dev);
52 unsigned long addr; 111 unsigned long addr;
53 112
113 INIT_LIST_HEAD(&priv->catas_err.list);
114 init_timer(&priv->catas_err.timer);
115 priv->catas_err.map = NULL;
116
54 addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + 117 addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
55 priv->fw.catas_offset; 118 priv->fw.catas_offset;
56 119
57 priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); 120 priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
58 if (!priv->catas_err.map) 121 if (!priv->catas_err.map) {
59 mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n", 122 mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
60 addr); 123 addr);
124 return;
125 }
61 126
127 priv->catas_err.timer.data = (unsigned long) dev;
128 priv->catas_err.timer.function = poll_catas;
129 priv->catas_err.timer.expires =
130 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
131 add_timer(&priv->catas_err.timer);
62} 132}
63 133
64void mlx4_unmap_catas_buf(struct mlx4_dev *dev) 134void mlx4_stop_catas_poll(struct mlx4_dev *dev)
65{ 135{
66 struct mlx4_priv *priv = mlx4_priv(dev); 136 struct mlx4_priv *priv = mlx4_priv(dev);
67 137
138 del_timer_sync(&priv->catas_err.timer);
139
68 if (priv->catas_err.map) 140 if (priv->catas_err.map)
69 iounmap(priv->catas_err.map); 141 iounmap(priv->catas_err.map);
142
143 spin_lock_irq(&catas_lock);
144 list_del(&priv->catas_err.list);
145 spin_unlock_irq(&catas_lock);
146}
147
148int __init mlx4_catas_init(void)
149{
150 INIT_WORK(&catas_work, catas_reset);
151
152 catas_wq = create_singlethread_workqueue("mlx4_err");
153 if (!catas_wq)
154 return -ENOMEM;
155
156 return 0;
157}
158
159void mlx4_catas_cleanup(void)
160{
161 destroy_workqueue(catas_wq);
70} 162}