aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2012-08-03 04:26:45 -0400
committerRoland Dreier <roland@purestorage.com>2012-08-10 16:02:24 -0400
commitdf7fba66471c6bbbaebb55e1bb3658eb7ce00a9b (patch)
treeb15170a422813baeaeacf25ca401b994fe02b675 /drivers/infiniband
parent0d7614f09c1ebdbaa1599a5aba7593f147bf96ee (diff)
IB/mlx4: Fix possible deadlock on sm_lock spinlock
The sm_lock spinlock is taken in the process context by mlx4_ib_modify_device, and in the interrupt context by update_sm_ah, so we need to take that spinlock with irqsave, and release it with irqrestore. Lockdeps reports this as follows: [ INFO: inconsistent lock state ] 3.5.0+ #20 Not tainted inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. swapper/0/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&ibdev->sm_lock)->rlock){?.+...}, at: [<ffffffffa028af1d>] update_sm_ah+0xad/0x100 [mlx4_ib] {HARDIRQ-ON-W} state was registered at: [<ffffffff810b84a0>] mark_irqflags+0x120/0x190 [<ffffffff810b9ce7>] __lock_acquire+0x307/0x4c0 [<ffffffff810b9f51>] lock_acquire+0xb1/0x150 [<ffffffff815523b1>] _raw_spin_lock+0x41/0x50 [<ffffffffa028d563>] mlx4_ib_modify_device+0x63/0x240 [mlx4_ib] [<ffffffffa026d1fc>] ib_modify_device+0x1c/0x20 [ib_core] [<ffffffffa026c353>] set_node_desc+0x83/0xc0 [ib_core] [<ffffffff8136a150>] dev_attr_store+0x20/0x30 [<ffffffff81201fd6>] sysfs_write_file+0xe6/0x170 [<ffffffff8118da38>] vfs_write+0xc8/0x190 [<ffffffff8118dc01>] sys_write+0x51/0x90 [<ffffffff8155b869>] system_call_fastpath+0x16/0x1b ... *** DEADLOCK *** 1 lock held by swapper/0/0: stack backtrace: Pid: 0, comm: swapper/0 Not tainted 3.5.0+ #20 Call Trace: <IRQ> [<ffffffff810b7bea>] print_usage_bug+0x18a/0x190 [<ffffffff810b7370>] ? print_irq_inversion_bug+0x210/0x210 [<ffffffff810b7fb2>] mark_lock_irq+0xf2/0x280 [<ffffffff810b8290>] mark_lock+0x150/0x240 [<ffffffff810b84ef>] mark_irqflags+0x16f/0x190 [<ffffffff810b9ce7>] __lock_acquire+0x307/0x4c0 [<ffffffffa028af1d>] ? update_sm_ah+0xad/0x100 [mlx4_ib] [<ffffffff810b9f51>] lock_acquire+0xb1/0x150 [<ffffffffa028af1d>] ? update_sm_ah+0xad/0x100 [mlx4_ib] [<ffffffff815523b1>] _raw_spin_lock+0x41/0x50 [<ffffffffa028af1d>] ? update_sm_ah+0xad/0x100 [mlx4_ib] [<ffffffffa026b2fa>] ? ib_create_ah+0x1a/0x40 [ib_core] [<ffffffffa028af1d>] update_sm_ah+0xad/0x100 [mlx4_ib] [<ffffffff810c27c3>] ? is_module_address+0x23/0x30 [<ffffffffa028b05b>] handle_port_mgmt_change_event+0xeb/0x150 [mlx4_ib] [<ffffffffa028c177>] mlx4_ib_event+0x117/0x160 [mlx4_ib] [<ffffffff81552501>] ? _raw_spin_lock_irqsave+0x61/0x70 [<ffffffffa022718c>] mlx4_dispatch_event+0x6c/0x90 [mlx4_core] [<ffffffffa0221b40>] mlx4_eq_int+0x500/0x950 [mlx4_core] Reported by: Or Gerlitz <ogerlitz@mellanox.com> Tested-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c16
-rw-r--r--drivers/infiniband/hw/mlx4/main.c5
2 files changed, 13 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index c27141fef1ab..9c2ae7efd00f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -125,6 +125,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
125{ 125{
126 struct ib_ah *new_ah; 126 struct ib_ah *new_ah;
127 struct ib_ah_attr ah_attr; 127 struct ib_ah_attr ah_attr;
128 unsigned long flags;
128 129
129 if (!dev->send_agent[port_num - 1][0]) 130 if (!dev->send_agent[port_num - 1][0])
130 return; 131 return;
@@ -139,11 +140,11 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
139 if (IS_ERR(new_ah)) 140 if (IS_ERR(new_ah))
140 return; 141 return;
141 142
142 spin_lock(&dev->sm_lock); 143 spin_lock_irqsave(&dev->sm_lock, flags);
143 if (dev->sm_ah[port_num - 1]) 144 if (dev->sm_ah[port_num - 1])
144 ib_destroy_ah(dev->sm_ah[port_num - 1]); 145 ib_destroy_ah(dev->sm_ah[port_num - 1]);
145 dev->sm_ah[port_num - 1] = new_ah; 146 dev->sm_ah[port_num - 1] = new_ah;
146 spin_unlock(&dev->sm_lock); 147 spin_unlock_irqrestore(&dev->sm_lock, flags);
147} 148}
148 149
149/* 150/*
@@ -197,13 +198,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
197static void node_desc_override(struct ib_device *dev, 198static void node_desc_override(struct ib_device *dev,
198 struct ib_mad *mad) 199 struct ib_mad *mad)
199{ 200{
201 unsigned long flags;
202
200 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || 203 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
201 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && 204 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
202 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && 205 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
203 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { 206 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
204 spin_lock(&to_mdev(dev)->sm_lock); 207 spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
205 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); 208 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
206 spin_unlock(&to_mdev(dev)->sm_lock); 209 spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
207 } 210 }
208} 211}
209 212
@@ -213,6 +216,7 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
213 struct ib_mad_send_buf *send_buf; 216 struct ib_mad_send_buf *send_buf;
214 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; 217 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
215 int ret; 218 int ret;
219 unsigned long flags;
216 220
217 if (agent) { 221 if (agent) {
218 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, 222 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
@@ -225,13 +229,13 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
225 * wrong following the IB spec strictly, but we know 229 * wrong following the IB spec strictly, but we know
226 * it's OK for our devices). 230 * it's OK for our devices).
227 */ 231 */
228 spin_lock(&dev->sm_lock); 232 spin_lock_irqsave(&dev->sm_lock, flags);
229 memcpy(send_buf->mad, mad, sizeof *mad); 233 memcpy(send_buf->mad, mad, sizeof *mad);
230 if ((send_buf->ah = dev->sm_ah[port_num - 1])) 234 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
231 ret = ib_post_send_mad(send_buf, NULL); 235 ret = ib_post_send_mad(send_buf, NULL);
232 else 236 else
233 ret = -EINVAL; 237 ret = -EINVAL;
234 spin_unlock(&dev->sm_lock); 238 spin_unlock_irqrestore(&dev->sm_lock, flags);
235 239
236 if (ret) 240 if (ret)
237 ib_free_send_mad(send_buf); 241 ib_free_send_mad(send_buf);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index fe2088cfa6ee..cc05579ebce7 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -423,6 +423,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
423 struct ib_device_modify *props) 423 struct ib_device_modify *props)
424{ 424{
425 struct mlx4_cmd_mailbox *mailbox; 425 struct mlx4_cmd_mailbox *mailbox;
426 unsigned long flags;
426 427
427 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 428 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
428 return -EOPNOTSUPP; 429 return -EOPNOTSUPP;
@@ -430,9 +431,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
430 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 431 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
431 return 0; 432 return 0;
432 433
433 spin_lock(&to_mdev(ibdev)->sm_lock); 434 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
434 memcpy(ibdev->node_desc, props->node_desc, 64); 435 memcpy(ibdev->node_desc, props->node_desc, 64);
435 spin_unlock(&to_mdev(ibdev)->sm_lock); 436 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
436 437
437 /* 438 /*
438 * If possible, pass node desc to FW, so it can generate 439 * If possible, pass node desc to FW, so it can generate