aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm
diff options
context:
space:
mode:
authorpiaojun <piaojun@huawei.com>2016-08-02 17:02:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 17:31:41 -0400
commit86b652b93adb57d8fed8edd532ed2eb8a791950d (patch)
treec1ffef69a234dc18a177652ac46daee05cebba0c /fs/ocfs2/dlm
parent2070ad1aebfff2c26190188844c38e55d2df2ae2 (diff)
ocfs2/dlm: disable BUG_ON when DLM_LOCK_RES_DROPPING_REF is cleared before dlm_deref_lockres_done_handler
We found a BUG situation in which DLM_LOCK_RES_DROPPING_REF is cleared unexpected that described below. To solve the bug, we disable the BUG_ON and purge lockres in dlm_do_local_recovery_cleanup. Node 1 Node 2(master) dlm_purge_lockres dlm_deref_lockres_handler DLM_LOCK_RES_SETREF_INPROG is set response DLM_DEREF_RESPONSE_INPROG receive DLM_DEREF_RESPONSE_INPROG stop puring in dlm_purge_lockres and wait for DLM_DEREF_RESPONSE_DONE dispatch dlm_deref_lockres_worker response DLM_DEREF_RESPONSE_DONE receive DLM_DEREF_RESPONSE_DONE and prepare to purge lockres Node 2 goes down find Node2 down and do local clean up for Node2: dlm_do_local_recovery_cleanup -> clear DLM_LOCK_RES_DROPPING_REF when purging lockres, BUG_ON happens because DLM_LOCK_RES_DROPPING_REF is clear: dlm_deref_lockres_done_handler ->BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF)); [akpm@linux-foundation.org: fix duplicated write to `ret'] Fixes: 60d663cb5273 ("ocfs2/dlm: add DEREF_DONE message") Link: http://lkml.kernel.org/r/57845055.9080702@huawei.com Signed-off-by: Jun Piao <piaojun@huawei.com> Reviewed-by: Joseph Qi <joseph.qi@huawei.com> Reviewed-by: Jiufei Xue <xuejiufei@huawei.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 13719d3f35f8..525dc06468c4 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2416,7 +2416,17 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
2416 } 2416 }
2417 2417
2418 spin_lock(&res->spinlock); 2418 spin_lock(&res->spinlock);
2419 BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF)); 2419 if (!(res->state & DLM_LOCK_RES_DROPPING_REF)) {
2420 spin_unlock(&res->spinlock);
2421 spin_unlock(&dlm->spinlock);
2422 mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
2423 "but it is already derefed!\n", dlm->name,
2424 res->lockname.len, res->lockname.name, node);
2425 dlm_lockres_put(res);
2426 ret = 0;
2427 goto done;
2428 }
2429
2420 if (!list_empty(&res->purge)) { 2430 if (!list_empty(&res->purge)) {
2421 mlog(0, "%s: Removing res %.*s from purgelist\n", 2431 mlog(0, "%s: Removing res %.*s from purgelist\n",
2422 dlm->name, res->lockname.len, res->lockname.name); 2432 dlm->name, res->lockname.len, res->lockname.name);
@@ -2456,7 +2466,6 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
2456 spin_unlock(&dlm->spinlock); 2466 spin_unlock(&dlm->spinlock);
2457 2467
2458 ret = 0; 2468 ret = 0;
2459
2460done: 2469done:
2461 dlm_put(dlm); 2470 dlm_put(dlm);
2462 return ret; 2471 return ret;