diff options
author | Kurt Hackel <kurt.hackel@oracle.com> | 2006-01-12 17:24:55 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2006-02-03 16:47:20 -0500 |
commit | e2faea4ce340f199c1957986c4c3dc2de76f5746 (patch) | |
tree | 2336b06cf270b3cff2ff39ba75fc67639dc63df9 /fs/ocfs2/dlm/dlmmaster.c | |
parent | 0d419a6a95ee158675aa184c6c3e476b22d02145 (diff) |
[PATCH] ocfs2/dlm: fixes
* fix a hang which can occur during shutdown migration
* do not allow nodes to join during recovery
* when restarting lock mastery, do not ignore nodes which come up
* more than one node could become recovery master, fix this
* sleep to allow some time for heartbeat state to catch up to network
* extra debug info for bad recovery state problems
* make DLM_RECO_NODE_DATA_DONE a valid state for non-master recovery nodes
* prune all locks from dead nodes on $RECOVERY lock resources
* do NOT automatically add new nodes to mle nodemaps until they have properly
joined the domain
* make sure dlm_pick_recovery_master only exits when all nodes have synced
* properly handle dlmunlock errors in dlm_pick_recovery_master
* do not propagate network errors in dlm_send_begin_reco_message
* dead nodes were not being put in the recovery map sometimes, fix this
* dlmunlock was failing to clear the unlock actions on DLM_DENIED
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm/dlmmaster.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 27e984f7e4cd..a3194fe173d9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -1050,17 +1050,10 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
1050 | node = dlm_bitmap_diff_iter_next(&bdi, &sc); | 1050 | node = dlm_bitmap_diff_iter_next(&bdi, &sc); |
1051 | while (node >= 0) { | 1051 | while (node >= 0) { |
1052 | if (sc == NODE_UP) { | 1052 | if (sc == NODE_UP) { |
1053 | /* a node came up. easy. might not even need | 1053 | /* a node came up. clear any old vote from |
1054 | * to talk to it if its node number is higher | 1054 | * the response map and set it in the vote map |
1055 | * or if we are already blocked. */ | 1055 | * then restart the mastery. */ |
1056 | mlog(0, "node up! %d\n", node); | 1056 | mlog(ML_NOTICE, "node %d up while restarting\n", node); |
1057 | if (blocked) | ||
1058 | goto next; | ||
1059 | |||
1060 | if (node > dlm->node_num) { | ||
1061 | mlog(0, "node > this node. skipping.\n"); | ||
1062 | goto next; | ||
1063 | } | ||
1064 | 1057 | ||
1065 | /* redo the master request, but only for the new node */ | 1058 | /* redo the master request, but only for the new node */ |
1066 | mlog(0, "sending request to new node\n"); | 1059 | mlog(0, "sending request to new node\n"); |
@@ -2005,6 +1998,15 @@ fail: | |||
2005 | break; | 1998 | break; |
2006 | 1999 | ||
2007 | mlog(0, "timed out during migration\n"); | 2000 | mlog(0, "timed out during migration\n"); |
2001 | /* avoid hang during shutdown when migrating lockres | ||
2002 | * to a node which also goes down */ | ||
2003 | if (dlm_is_node_dead(dlm, target)) { | ||
2004 | mlog(0, "%s:%.*s: expected migration target %u " | ||
2005 | "is no longer up. restarting.\n", | ||
2006 | dlm->name, res->lockname.len, | ||
2007 | res->lockname.name, target); | ||
2008 | ret = -ERESTARTSYS; | ||
2009 | } | ||
2008 | } | 2010 | } |
2009 | if (ret == -ERESTARTSYS) { | 2011 | if (ret == -ERESTARTSYS) { |
2010 | /* migration failed, detach and clean up mle */ | 2012 | /* migration failed, detach and clean up mle */ |