diff options
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmconvert.c | 5 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 14 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 18 |
4 files changed, 40 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 42eb53b5293b..23ceaa7127b4 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, | |||
208 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 | 208 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 |
209 | #define DLM_LOCK_RES_MIGRATING 0x00000020 | 209 | #define DLM_LOCK_RES_MIGRATING 0x00000020 |
210 | 210 | ||
211 | /* max milliseconds to wait to sync up a network failure with a node death */ | ||
212 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) | ||
213 | |||
211 | #define DLM_PURGE_INTERVAL_MS (8 * 1000) | 214 | #define DLM_PURGE_INTERVAL_MS (8 * 1000) |
212 | 215 | ||
213 | struct dlm_lock_resource | 216 | struct dlm_lock_resource |
@@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); | |||
658 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | 661 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); |
659 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); | 662 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); |
660 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); | 663 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); |
664 | int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); | ||
661 | 665 | ||
662 | void dlm_put(struct dlm_ctxt *dlm); | 666 | void dlm_put(struct dlm_ctxt *dlm); |
663 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); | 667 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index f5c2f1979ad3..f66e2d818ccd 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, | |||
392 | } else { | 392 | } else { |
393 | mlog_errno(tmpret); | 393 | mlog_errno(tmpret); |
394 | if (dlm_is_host_down(tmpret)) { | 394 | if (dlm_is_host_down(tmpret)) { |
395 | /* instead of logging the same network error over | ||
396 | * and over, sleep here and wait for the heartbeat | ||
397 | * to notice the node is dead. times out after 5s. */ | ||
398 | dlm_wait_for_node_death(dlm, res->owner, | ||
399 | DLM_NODE_DEATH_WAIT_MAX); | ||
395 | ret = DLM_RECOVERING; | 400 | ret = DLM_RECOVERING; |
396 | mlog(0, "node %u died so returning DLM_RECOVERING " | 401 | mlog(0, "node %u died so returning DLM_RECOVERING " |
397 | "from convert message!\n", res->owner); | 402 | "from convert message!\n", res->owner); |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index d1a0038557a3..e709412e6e32 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -646,7 +646,19 @@ retry_lock: | |||
646 | mlog(0, "retrying lock with migration/" | 646 | mlog(0, "retrying lock with migration/" |
647 | "recovery/in progress\n"); | 647 | "recovery/in progress\n"); |
648 | msleep(100); | 648 | msleep(100); |
649 | dlm_wait_for_recovery(dlm); | 649 | /* no waiting for dlm_reco_thread */ |
650 | if (recovery) { | ||
651 | if (status == DLM_RECOVERING) { | ||
652 | mlog(0, "%s: got RECOVERING " | ||
653 | "for $REOCVERY lock, master " | ||
654 | "was %u\n", dlm->name, | ||
655 | res->owner); | ||
656 | dlm_wait_for_node_death(dlm, res->owner, | ||
657 | DLM_NODE_DEATH_WAIT_MAX); | ||
658 | } | ||
659 | } else { | ||
660 | dlm_wait_for_recovery(dlm); | ||
661 | } | ||
650 | goto retry_lock; | 662 | goto retry_lock; |
651 | } | 663 | } |
652 | 664 | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f9ce864966ec..ed76bda1a534 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node) | |||
278 | return dead; | 278 | return dead; |
279 | } | 279 | } |
280 | 280 | ||
281 | int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) | ||
282 | { | ||
283 | if (timeout) { | ||
284 | mlog(ML_NOTICE, "%s: waiting %dms for notification of " | ||
285 | "death of node %u\n", dlm->name, timeout, node); | ||
286 | wait_event_timeout(dlm->dlm_reco_thread_wq, | ||
287 | dlm_is_node_dead(dlm, node), | ||
288 | msecs_to_jiffies(timeout)); | ||
289 | } else { | ||
290 | mlog(ML_NOTICE, "%s: waiting indefinitely for notification " | ||
291 | "of death of node %u\n", dlm->name, node); | ||
292 | wait_event(dlm->dlm_reco_thread_wq, | ||
293 | dlm_is_node_dead(dlm, node)); | ||
294 | } | ||
295 | /* for now, return 0 */ | ||
296 | return 0; | ||
297 | } | ||
298 | |||
281 | /* callers of the top-level api calls (dlmlock/dlmunlock) should | 299 | /* callers of the top-level api calls (dlmlock/dlmunlock) should |
282 | * block on the dlm->reco.event when recovery is in progress. | 300 | * block on the dlm->reco.event when recovery is in progress. |
283 | * the dlm recovery thread will set this state when it begins | 301 | * the dlm recovery thread will set this state when it begins |