aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rdreier@cisco.com>2009-09-23 14:10:15 -0400
committerRoland Dreier <rolandd@cisco.com>2009-09-23 14:10:15 -0400
commit0e442afd92fcdde2cc63b6f25556b8934e42b7d2 (patch)
tree49b2f52ee849e7888473eb228f1abe3c27ed9434
parent86d710146fb9975f04c505ec78caa43d227c1018 (diff)
IB/mad: Fix lock-lock-timer deadlock in RMPP code
Holding agent->lock across cancel_delayed_work() (which does del_timer_sync()) in ib_cancel_rmpp_recvs() leads to lockdep reports of possible lock-timer deadlocks if a consumer ever does something that connects agent->lock to a lock taken in IRQ context (cf http://marc.info/?l=linux-rdma&m=125243699026045). Fix this by changing the list items to a new state "CANCELING" while holding the lock, and then canceling the delayed work without holding the lock. If the delayed work runs after the lock is dropped, it will see the state is CANCELING and return immediately, so the list will stay stable while we traverse it with the lock not held. Reviewed-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/core/mad_rmpp.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 57a3c6f947b2..4e0f2829e0e5 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -37,7 +37,8 @@
37enum rmpp_state { 37enum rmpp_state {
38 RMPP_STATE_ACTIVE, 38 RMPP_STATE_ACTIVE,
39 RMPP_STATE_TIMEOUT, 39 RMPP_STATE_TIMEOUT,
40 RMPP_STATE_COMPLETE 40 RMPP_STATE_COMPLETE,
41 RMPP_STATE_CANCELING
41}; 42};
42 43
43struct mad_rmpp_recv { 44struct mad_rmpp_recv {
@@ -87,18 +88,22 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
87 88
88 spin_lock_irqsave(&agent->lock, flags); 89 spin_lock_irqsave(&agent->lock, flags);
89 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { 90 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
91 if (rmpp_recv->state != RMPP_STATE_COMPLETE)
92 ib_free_recv_mad(rmpp_recv->rmpp_wc);
93 rmpp_recv->state = RMPP_STATE_CANCELING;
94 }
95 spin_unlock_irqrestore(&agent->lock, flags);
96
97 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
90 cancel_delayed_work(&rmpp_recv->timeout_work); 98 cancel_delayed_work(&rmpp_recv->timeout_work);
91 cancel_delayed_work(&rmpp_recv->cleanup_work); 99 cancel_delayed_work(&rmpp_recv->cleanup_work);
92 } 100 }
93 spin_unlock_irqrestore(&agent->lock, flags);
94 101
95 flush_workqueue(agent->qp_info->port_priv->wq); 102 flush_workqueue(agent->qp_info->port_priv->wq);
96 103
97 list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, 104 list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
98 &agent->rmpp_list, list) { 105 &agent->rmpp_list, list) {
99 list_del(&rmpp_recv->list); 106 list_del(&rmpp_recv->list);
100 if (rmpp_recv->state != RMPP_STATE_COMPLETE)
101 ib_free_recv_mad(rmpp_recv->rmpp_wc);
102 destroy_rmpp_recv(rmpp_recv); 107 destroy_rmpp_recv(rmpp_recv);
103 } 108 }
104} 109}
@@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work)
260 unsigned long flags; 265 unsigned long flags;
261 266
262 spin_lock_irqsave(&rmpp_recv->agent->lock, flags); 267 spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
268 if (rmpp_recv->state == RMPP_STATE_CANCELING) {
269 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
270 return;
271 }
263 list_del(&rmpp_recv->list); 272 list_del(&rmpp_recv->list);
264 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); 273 spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
265 destroy_rmpp_recv(rmpp_recv); 274 destroy_rmpp_recv(rmpp_recv);