aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c5
-rw-r--r--fs/ocfs2/dlm/dlmlock.c14
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c18
4 files changed, 40 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 42eb53b5293b..23ceaa7127b4 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010 208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
209#define DLM_LOCK_RES_MIGRATING 0x00000020 209#define DLM_LOCK_RES_MIGRATING 0x00000020
210 210
211/* max milliseconds to wait to sync up a network failure with a node death */
212#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
213
211#define DLM_PURGE_INTERVAL_MS (8 * 1000) 214#define DLM_PURGE_INTERVAL_MS (8 * 1000)
212 215
213struct dlm_lock_resource 216struct dlm_lock_resource
@@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
658void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 661void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
659void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 662void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
660int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 663int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
664int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
661 665
662void dlm_put(struct dlm_ctxt *dlm); 666void dlm_put(struct dlm_ctxt *dlm);
663struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 667struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f5c2f1979ad3..f66e2d818ccd 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
392 } else { 392 } else {
393 mlog_errno(tmpret); 393 mlog_errno(tmpret);
394 if (dlm_is_host_down(tmpret)) { 394 if (dlm_is_host_down(tmpret)) {
395 /* instead of logging the same network error over
396 * and over, sleep here and wait for the heartbeat
397 * to notice the node is dead. times out after 5s. */
398 dlm_wait_for_node_death(dlm, res->owner,
399 DLM_NODE_DEATH_WAIT_MAX);
395 ret = DLM_RECOVERING; 400 ret = DLM_RECOVERING;
396 mlog(0, "node %u died so returning DLM_RECOVERING " 401 mlog(0, "node %u died so returning DLM_RECOVERING "
397 "from convert message!\n", res->owner); 402 "from convert message!\n", res->owner);
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index d1a0038557a3..e709412e6e32 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -646,7 +646,19 @@ retry_lock:
646 mlog(0, "retrying lock with migration/" 646 mlog(0, "retrying lock with migration/"
647 "recovery/in progress\n"); 647 "recovery/in progress\n");
648 msleep(100); 648 msleep(100);
649 dlm_wait_for_recovery(dlm); 649 /* no waiting for dlm_reco_thread */
650 if (recovery) {
651 if (status == DLM_RECOVERING) {
652 mlog(0, "%s: got RECOVERING "
653 "for $REOCVERY lock, master "
654 "was %u\n", dlm->name,
655 res->owner);
656 dlm_wait_for_node_death(dlm, res->owner,
657 DLM_NODE_DEATH_WAIT_MAX);
658 }
659 } else {
660 dlm_wait_for_recovery(dlm);
661 }
650 goto retry_lock; 662 goto retry_lock;
651 } 663 }
652 664
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f9ce864966ec..ed76bda1a534 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
278 return dead; 278 return dead;
279} 279}
280 280
281int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
282{
283 if (timeout) {
284 mlog(ML_NOTICE, "%s: waiting %dms for notification of "
285 "death of node %u\n", dlm->name, timeout, node);
286 wait_event_timeout(dlm->dlm_reco_thread_wq,
287 dlm_is_node_dead(dlm, node),
288 msecs_to_jiffies(timeout));
289 } else {
290 mlog(ML_NOTICE, "%s: waiting indefinitely for notification "
291 "of death of node %u\n", dlm->name, node);
292 wait_event(dlm->dlm_reco_thread_wq,
293 dlm_is_node_dead(dlm, node));
294 }
295 /* for now, return 0 */
296 return 0;
297}
298
281/* callers of the top-level api calls (dlmlock/dlmunlock) should 299/* callers of the top-level api calls (dlmlock/dlmunlock) should
282 * block on the dlm->reco.event when recovery is in progress. 300 * block on the dlm->reco.event when recovery is in progress.
283 * the dlm recovery thread will set this state when it begins 301 * the dlm recovery thread will set this state when it begins