diff options
author | Michael S. Tsirkin <mst@mellanox.co.il> | 2006-03-30 08:52:54 -0500 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-04-02 17:39:19 -0400 |
commit | 37289efe3ee0c0a00b5d8302df9a2b007e65c187 (patch) | |
tree | 92abe35ea148d32c735c7a675fdb9640fe9cfb7f | |
parent | f27f0a045b79de5729d064497e21a70871f1d6fe (diff) |
IB/mad: fix oops in cancel_mads
We have seen the following OOPs in cancel_mads, when restarting opensm
multiple times:
Call Trace:
[<c010549b>] show_stack+0x9b/0xb0
[<c01055ec>] show_registers+0x11c/0x190
[<c01057cd>] die+0xed/0x160
[<c031b966>] do_page_fault+0x3f6/0x5d0
[<c010511f>] error_code+0x4f/0x60
[<f8ac4e38>] cancel_mads+0x128/0x150 [ib_mad]
[<f8ac2811>] unregister_mad_agent+0x11/0x130 [ib_mad]
[<f8ac2a12>] ib_unregister_mad_agent+0x12/0x20 [ib_mad]
[<f8b10f23>] ib_umad_close+0xf3/0x130 [ib_umad]
[<c0162937>] __fput+0x187/0x1c0
[<c01627a9>] fput+0x19/0x20
[<c0160f7a>] filp_close+0x3a/0x60
[<c0121ca8>] put_files_struct+0x68/0xa0
[<c0103cf7>] do_signal+0x47/0x100
[<c0103ded>] do_notify_resume+0x3d/0x40
[<c0103f9e>] work_notifysig+0x13/0x25
We traced this back to local_completions unlocking mad_agent_priv->lock
while still keeping a pointer into local_list. A later call to
list_del(&local->completion_list) would then corrupt the list.
To fix this, remove the entry from local_list after looking it up but
before releasing mad_agent_priv->lock, to prevent cancel_mads from
finding and freeing it.
Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r-- | drivers/infiniband/core/mad.c | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ba54c856b0e5..3a702da83e41 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c | |||
@@ -2311,6 +2311,7 @@ static void local_completions(void *data) | |||
2311 | local = list_entry(mad_agent_priv->local_list.next, | 2311 | local = list_entry(mad_agent_priv->local_list.next, |
2312 | struct ib_mad_local_private, | 2312 | struct ib_mad_local_private, |
2313 | completion_list); | 2313 | completion_list); |
2314 | list_del(&local->completion_list); | ||
2314 | spin_unlock_irqrestore(&mad_agent_priv->lock, flags); | 2315 | spin_unlock_irqrestore(&mad_agent_priv->lock, flags); |
2315 | if (local->mad_priv) { | 2316 | if (local->mad_priv) { |
2316 | recv_mad_agent = local->recv_mad_agent; | 2317 | recv_mad_agent = local->recv_mad_agent; |
@@ -2362,7 +2363,6 @@ local_send_completion: | |||
2362 | &mad_send_wc); | 2363 | &mad_send_wc); |
2363 | 2364 | ||
2364 | spin_lock_irqsave(&mad_agent_priv->lock, flags); | 2365 | spin_lock_irqsave(&mad_agent_priv->lock, flags); |
2365 | list_del(&local->completion_list); | ||
2366 | atomic_dec(&mad_agent_priv->refcount); | 2366 | atomic_dec(&mad_agent_priv->refcount); |
2367 | if (!recv) | 2367 | if (!recv) |
2368 | kmem_cache_free(ib_mad_cache, local->mad_priv); | 2368 | kmem_cache_free(ib_mad_cache, local->mad_priv); |