diff options
author | piaojun <piaojun@huawei.com> | 2016-08-02 17:02:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 17:31:41 -0400 |
commit | 86b652b93adb57d8fed8edd532ed2eb8a791950d (patch) | |
tree | c1ffef69a234dc18a177652ac46daee05cebba0c /fs/ocfs2/dlm | |
parent | 2070ad1aebfff2c26190188844c38e55d2df2ae2 (diff) |
ocfs2/dlm: disable BUG_ON when DLM_LOCK_RES_DROPPING_REF is cleared before dlm_deref_lockres_done_handler
We found a BUG situation in which DLM_LOCK_RES_DROPPING_REF is cleared
unexpected that described below. To solve the bug, we disable the
BUG_ON and purge lockres in dlm_do_local_recovery_cleanup.
Node 1 Node 2(master)
dlm_purge_lockres
dlm_deref_lockres_handler
DLM_LOCK_RES_SETREF_INPROG is set
response DLM_DEREF_RESPONSE_INPROG
receive DLM_DEREF_RESPONSE_INPROG
stop puring in dlm_purge_lockres
and wait for DLM_DEREF_RESPONSE_DONE
dispatch dlm_deref_lockres_worker
response DLM_DEREF_RESPONSE_DONE
receive DLM_DEREF_RESPONSE_DONE and
prepare to purge lockres
Node 2 goes down
find Node2 down and do local
clean up for Node2:
dlm_do_local_recovery_cleanup
-> clear DLM_LOCK_RES_DROPPING_REF
when purging lockres, BUG_ON happens
because DLM_LOCK_RES_DROPPING_REF is clear:
dlm_deref_lockres_done_handler
->BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF));
[akpm@linux-foundation.org: fix duplicated write to `ret']
Fixes: 60d663cb5273 ("ocfs2/dlm: add DEREF_DONE message")
Link: http://lkml.kernel.org/r/57845055.9080702@huawei.com
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
Reviewed-by: Jiufei Xue <xuejiufei@huawei.com>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 13719d3f35f8..525dc06468c4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2416,7 +2416,17 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2416 | } | 2416 | } |
2417 | 2417 | ||
2418 | spin_lock(&res->spinlock); | 2418 | spin_lock(&res->spinlock); |
2419 | BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF)); | 2419 | if (!(res->state & DLM_LOCK_RES_DROPPING_REF)) { |
2420 | spin_unlock(&res->spinlock); | ||
2421 | spin_unlock(&dlm->spinlock); | ||
2422 | mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done " | ||
2423 | "but it is already derefed!\n", dlm->name, | ||
2424 | res->lockname.len, res->lockname.name, node); | ||
2425 | dlm_lockres_put(res); | ||
2426 | ret = 0; | ||
2427 | goto done; | ||
2428 | } | ||
2429 | |||
2420 | if (!list_empty(&res->purge)) { | 2430 | if (!list_empty(&res->purge)) { |
2421 | mlog(0, "%s: Removing res %.*s from purgelist\n", | 2431 | mlog(0, "%s: Removing res %.*s from purgelist\n", |
2422 | dlm->name, res->lockname.len, res->lockname.name); | 2432 | dlm->name, res->lockname.len, res->lockname.name); |
@@ -2456,7 +2466,6 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2456 | spin_unlock(&dlm->spinlock); | 2466 | spin_unlock(&dlm->spinlock); |
2457 | 2467 | ||
2458 | ret = 0; | 2468 | ret = 0; |
2459 | |||
2460 | done: | 2469 | done: |
2461 | dlm_put(dlm); | 2470 | dlm_put(dlm); |
2462 | return ret; | 2471 | return ret; |