summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChangwei Ge <gechangwei@live.cn>2019-09-23 18:33:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-24 18:54:07 -0400
commit0a3775e4f883912944481cf2ef36eb6383a9cc74 (patch)
treec32030265f51ad8fd58f25dae5850b75cb0e826f
parenta89bd89fae638965ca5a79a3467d79f926260882 (diff)
ocfs2: wait for recovering done after direct unlock request
There is a scenario causing ocfs2 umount hang when multiple hosts are rebooting at the same time. NODE1 NODE2 NODE3 send unlock requset to NODE2 dies become recovery master recover NODE2 find NODE2 dead mark resource RECOVERING directly remove lock from grant list calculate usage but RECOVERING marked **miss the window of purging clear RECOVERING To reproduce this issue, crash a host and then umount ocfs2 from another node. To solve this, just let unlock progress wait for recovery done. Link: http://lkml.kernel.org/r/1550124866-20367-1-git-send-email-gechangwei@live.cn Signed-off-by: Changwei Ge <gechangwei@live.cn> Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Changwei Ge <gechangwei@live.cn> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c23
1 files changed, 19 insertions, 4 deletions
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index e78657742bd8..3883633e82eb 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -90,7 +90,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
90 enum dlm_status status; 90 enum dlm_status status;
91 int actions = 0; 91 int actions = 0;
92 int in_use; 92 int in_use;
93 u8 owner; 93 u8 owner;
94 int recovery_wait = 0;
94 95
95 mlog(0, "master_node = %d, valblk = %d\n", master_node, 96 mlog(0, "master_node = %d, valblk = %d\n", master_node,
96 flags & LKM_VALBLK); 97 flags & LKM_VALBLK);
@@ -193,9 +194,12 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
193 } 194 }
194 if (flags & LKM_CANCEL) 195 if (flags & LKM_CANCEL)
195 lock->cancel_pending = 0; 196 lock->cancel_pending = 0;
196 else 197 else {
197 lock->unlock_pending = 0; 198 if (!lock->unlock_pending)
198 199 recovery_wait = 1;
200 else
201 lock->unlock_pending = 0;
202 }
199 } 203 }
200 204
201 /* get an extra ref on lock. if we are just switching 205 /* get an extra ref on lock. if we are just switching
@@ -229,6 +233,17 @@ leave:
229 spin_unlock(&res->spinlock); 233 spin_unlock(&res->spinlock);
230 wake_up(&res->wq); 234 wake_up(&res->wq);
231 235
236 if (recovery_wait) {
237 spin_lock(&res->spinlock);
238 /* Unlock request will directly succeed after owner dies,
239 * and the lock is already removed from grant list. We have to
240 * wait for RECOVERING done or we miss the chance to purge it
241 * since the removement is much faster than RECOVERING proc.
242 */
243 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING);
244 spin_unlock(&res->spinlock);
245 }
246
232 /* let the caller's final dlm_lock_put handle the actual kfree */ 247 /* let the caller's final dlm_lock_put handle the actual kfree */
233 if (actions & DLM_UNLOCK_FREE_LOCK) { 248 if (actions & DLM_UNLOCK_FREE_LOCK) {
234 /* this should always be coupled with list removal */ 249 /* this should always be coupled with list removal */