aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm/dlmconvert.c
diff options
context:
space:
mode:
authorJoseph Qi <joseph.qi@huawei.com>2016-03-25 17:21:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-25 19:37:42 -0400
commitac7cf246dfdbec3d8fed296c7bf30e16f5099dac (patch)
tree685e1bf6903a2fbf67911fc6f349e3c3e81ce773 /fs/ocfs2/dlm/dlmconvert.c
parent28888681b4f641ce9a96478ce4683577cd3abbff (diff)
ocfs2/dlm: fix race between convert and recovery
There is a race window between dlmconvert_remote and dlm_move_lockres_to_recovery_list, which will cause a lock with OCFS2_LOCK_BUSY in grant list, thus system hangs. dlmconvert_remote { spin_lock(&res->spinlock); list_move_tail(&lock->list, &res->converting); lock->convert_pending = 1; spin_unlock(&res->spinlock); status = dlm_send_remote_convert_request(); >>>>>> race window, master has queued ast and return DLM_NORMAL, and then down before sending ast. this node detects master down and calls dlm_move_lockres_to_recovery_list, which will revert the lock to grant list. Then OCFS2_LOCK_BUSY won't be cleared as new master won't send ast any more because it thinks already be authorized. spin_lock(&res->spinlock); lock->convert_pending = 0; if (status != DLM_NORMAL) dlm_revert_pending_convert(res, lock); spin_unlock(&res->spinlock); } In this case, check if res->state has DLM_LOCK_RES_RECOVERING bit set (res is still in recovering) or res master changed (new master has finished recovery), reset the status to DLM_RECOVERING, then it will retry convert. Signed-off-by: Joseph Qi <joseph.qi@huawei.com> Reported-by: Yiwen Jiang <jiangyiwen@huawei.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Cc: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Cc: Tariq Saeed <tariq.x.saeed@oracle.com> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/dlm/dlmconvert.c')
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index e36d63ff1783..84de55ed865a 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -262,6 +262,7 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
262 struct dlm_lock *lock, int flags, int type) 262 struct dlm_lock *lock, int flags, int type)
263{ 263{
264 enum dlm_status status; 264 enum dlm_status status;
265 u8 old_owner = res->owner;
265 266
266 mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, 267 mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
267 lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); 268 lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -316,11 +317,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
316 spin_lock(&res->spinlock); 317 spin_lock(&res->spinlock);
317 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 318 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
318 lock->convert_pending = 0; 319 lock->convert_pending = 0;
319 /* if it failed, move it back to granted queue */ 320 /* if it failed, move it back to granted queue.
321 * if master returns DLM_NORMAL and then down before sending ast,
322 * it may have already been moved to granted queue, reset to
323 * DLM_RECOVERING and retry convert */
320 if (status != DLM_NORMAL) { 324 if (status != DLM_NORMAL) {
321 if (status != DLM_NOTQUEUED) 325 if (status != DLM_NOTQUEUED)
322 dlm_error(status); 326 dlm_error(status);
323 dlm_revert_pending_convert(res, lock); 327 dlm_revert_pending_convert(res, lock);
328 } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
329 (old_owner != res->owner)) {
330 mlog(0, "res %.*s is in recovering or has been recovered.\n",
331 res->lockname.len, res->lockname.name);
332 status = DLM_RECOVERING;
324 } 333 }
325bail: 334bail:
326 spin_unlock(&res->spinlock); 335 spin_unlock(&res->spinlock);