aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2014-08-21 07:28:41 -0400
committerRoland Dreier <roland@purestorage.com>2014-09-22 12:46:52 -0400
commitdba3ad2addcd74ec850e510f3b8a9d046cc24ef3 (patch)
treeff6f8208ef3ddd0d7e274b2de0373f8a1c4ab2d4
parentbccb84f1dfab92ed180adf09c76cfa9ddc90edb9 (diff)
IB/mlx4: Fix lockdep splat for the iboe lock
Chuck Lever reported the following stack trace: ================================= [ INFO: inconsistent lock state ] 3.16.0-rc2-00024-g2e78883 #17 Tainted: G E --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (&(&iboe->lock)->rlock){+.?...}, at: [<ffffffffa065f68b>] mlx4_ib_addr_event+0xdb/0x1a0 [mlx4_ib] {SOFTIRQ-ON-W} state was registered at: [<ffffffff810b3110>] mark_irqflags+0x110/0x170 [<ffffffff810b4806>] __lock_acquire+0x2c6/0x5b0 [<ffffffff810b4bd9>] lock_acquire+0xe9/0x120 [<ffffffff815f7f6e>] _raw_spin_lock+0x3e/0x80 [<ffffffffa0661084>] mlx4_ib_scan_netdevs+0x34/0x260 [mlx4_ib] [<ffffffffa06612db>] mlx4_ib_netdev_event+0x2b/0x40 [mlx4_ib] [<ffffffff81522219>] register_netdevice_notifier+0x99/0x1e0 [<ffffffffa06626e3>] mlx4_ib_add+0x743/0xbc0 [mlx4_ib] [<ffffffffa05ec168>] mlx4_add_device+0x48/0xa0 [mlx4_core] [<ffffffffa05ec2c3>] mlx4_register_interface+0x73/0xb0 [mlx4_core] [<ffffffffa05c505e>] cm_req_handler+0x13e/0x460 [ib_cm] [<ffffffff810002e2>] do_one_initcall+0x112/0x1c0 [<ffffffff810e8264>] do_init_module+0x34/0x190 [<ffffffff810ea62f>] load_module+0x5cf/0x740 [<ffffffff810ea939>] SyS_init_module+0x99/0xd0 [<ffffffff815f8fd2>] system_call_fastpath+0x16/0x1b irq event stamp: 336142 hardirqs last enabled at (336142): [<ffffffff810612f5>] __local_bh_enable_ip+0xb5/0xc0 hardirqs last disabled at (336141): [<ffffffff81061296>] __local_bh_enable_ip+0x56/0xc0 softirqs last enabled at (336004): [<ffffffff8106123a>] _local_bh_enable+0x4a/0x50 softirqs last disabled at (336005): [<ffffffff810617a4>] irq_exit+0x44/0xd0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&iboe->lock)->rlock); <Interrupt> lock(&(&iboe->lock)->rlock); *** DEADLOCK *** The above problem was caused by the spin lock being taken both in the process context and in a soft-irq context (in a netdev notifier handler). The required fix is to use spin_lock/unlock_bh() instead of spin_lock/unlock on the iboe lock. Reported-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/hw/mlx4/main.c24
1 files changed, 12 insertions, 12 deletions
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d404a2eafa79..c231112396b2 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -360,7 +360,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
360 props->state = IB_PORT_DOWN; 360 props->state = IB_PORT_DOWN;
361 props->phys_state = state_to_phys_state(props->state); 361 props->phys_state = state_to_phys_state(props->state);
362 props->active_mtu = IB_MTU_256; 362 props->active_mtu = IB_MTU_256;
363 spin_lock(&iboe->lock); 363 spin_lock_bh(&iboe->lock);
364 ndev = iboe->netdevs[port - 1]; 364 ndev = iboe->netdevs[port - 1];
365 if (!ndev) 365 if (!ndev)
366 goto out_unlock; 366 goto out_unlock;
@@ -372,7 +372,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
372 IB_PORT_ACTIVE : IB_PORT_DOWN; 372 IB_PORT_ACTIVE : IB_PORT_DOWN;
373 props->phys_state = state_to_phys_state(props->state); 373 props->phys_state = state_to_phys_state(props->state);
374out_unlock: 374out_unlock:
375 spin_unlock(&iboe->lock); 375 spin_unlock_bh(&iboe->lock);
376out: 376out:
377 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 377 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
378 return err; 378 return err;
@@ -814,11 +814,11 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
814 if (!mqp->port) 814 if (!mqp->port)
815 return 0; 815 return 0;
816 816
817 spin_lock(&mdev->iboe.lock); 817 spin_lock_bh(&mdev->iboe.lock);
818 ndev = mdev->iboe.netdevs[mqp->port - 1]; 818 ndev = mdev->iboe.netdevs[mqp->port - 1];
819 if (ndev) 819 if (ndev)
820 dev_hold(ndev); 820 dev_hold(ndev);
821 spin_unlock(&mdev->iboe.lock); 821 spin_unlock_bh(&mdev->iboe.lock);
822 822
823 if (ndev) { 823 if (ndev) {
824 ret = 1; 824 ret = 1;
@@ -1265,11 +1265,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1265 mutex_lock(&mqp->mutex); 1265 mutex_lock(&mqp->mutex);
1266 ge = find_gid_entry(mqp, gid->raw); 1266 ge = find_gid_entry(mqp, gid->raw);
1267 if (ge) { 1267 if (ge) {
1268 spin_lock(&mdev->iboe.lock); 1268 spin_lock_bh(&mdev->iboe.lock);
1269 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; 1269 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
1270 if (ndev) 1270 if (ndev)
1271 dev_hold(ndev); 1271 dev_hold(ndev);
1272 spin_unlock(&mdev->iboe.lock); 1272 spin_unlock_bh(&mdev->iboe.lock);
1273 if (ndev) 1273 if (ndev)
1274 dev_put(ndev); 1274 dev_put(ndev);
1275 list_del(&ge->list); 1275 list_del(&ge->list);
@@ -1554,7 +1554,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
1554 return 0; 1554 return 0;
1555 1555
1556 iboe = &ibdev->iboe; 1556 iboe = &ibdev->iboe;
1557 spin_lock(&iboe->lock); 1557 spin_lock_bh(&iboe->lock);
1558 1558
1559 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) 1559 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
1560 if ((netif_is_bond_master(real_dev) && 1560 if ((netif_is_bond_master(real_dev) &&
@@ -1564,7 +1564,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
1564 update_gid_table(ibdev, port, gid, 1564 update_gid_table(ibdev, port, gid,
1565 event == NETDEV_DOWN, 0); 1565 event == NETDEV_DOWN, 0);
1566 1566
1567 spin_unlock(&iboe->lock); 1567 spin_unlock_bh(&iboe->lock);
1568 return 0; 1568 return 0;
1569 1569
1570} 1570}
@@ -1742,7 +1742,7 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
1742 } 1742 }
1743 1743
1744 read_lock(&dev_base_lock); 1744 read_lock(&dev_base_lock);
1745 spin_lock(&iboe->lock); 1745 spin_lock_bh(&iboe->lock);
1746 1746
1747 for_each_netdev(&init_net, dev) { 1747 for_each_netdev(&init_net, dev) {
1748 u8 port = mlx4_ib_get_dev_port(dev, ibdev); 1748 u8 port = mlx4_ib_get_dev_port(dev, ibdev);
@@ -1753,7 +1753,7 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
1753 } 1753 }
1754 } 1754 }
1755 1755
1756 spin_unlock(&iboe->lock); 1756 spin_unlock_bh(&iboe->lock);
1757 read_unlock(&dev_base_lock); 1757 read_unlock(&dev_base_lock);
1758out: 1758out:
1759 return err; 1759 return err;
@@ -1770,7 +1770,7 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
1770 1770
1771 iboe = &ibdev->iboe; 1771 iboe = &ibdev->iboe;
1772 1772
1773 spin_lock(&iboe->lock); 1773 spin_lock_bh(&iboe->lock);
1774 mlx4_foreach_ib_transport_port(port, ibdev->dev) { 1774 mlx4_foreach_ib_transport_port(port, ibdev->dev) {
1775 enum ib_port_state port_state = IB_PORT_NOP; 1775 enum ib_port_state port_state = IB_PORT_NOP;
1776 struct net_device *old_master = iboe->masters[port - 1]; 1776 struct net_device *old_master = iboe->masters[port - 1];
@@ -1842,7 +1842,7 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
1842 } 1842 }
1843 } 1843 }
1844 1844
1845 spin_unlock(&iboe->lock); 1845 spin_unlock_bh(&iboe->lock);
1846 1846
1847 if (update_qps_port > 0) 1847 if (update_qps_port > 0)
1848 mlx4_ib_update_qps(ibdev, dev, update_qps_port); 1848 mlx4_ib_update_qps(ibdev, dev, update_qps_port);