diff options
author | Kurt Hackel <kurt.hackel@oracle.com> | 2006-01-12 17:24:55 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2006-02-03 16:47:20 -0500 |
commit | e2faea4ce340f199c1957986c4c3dc2de76f5746 (patch) | |
tree | 2336b06cf270b3cff2ff39ba75fc67639dc63df9 /fs/ocfs2/dlm/dlmdomain.c | |
parent | 0d419a6a95ee158675aa184c6c3e476b22d02145 (diff) |
[PATCH] ocfs2/dlm: fixes
* fix a hang which can occur during shutdown migration
* do not allow nodes to join during recovery
* when restarting lock mastery, do not ignore nodes which come up
* more than one node could become recovery master, fix this
* sleep to allow some time for heartbeat state to catch up to network
* extra debug info for bad recovery state problems
* make DLM_RECO_NODE_DATA_DONE a valid state for non-master recovery nodes
* prune all locks from dead nodes on $RECOVERY lock resources
* do NOT automatically add new nodes to mle nodemaps until they have properly
joined the domain
* make sure dlm_pick_recovery_master only exits when all nodes have synced
* properly handle dlmunlock errors in dlm_pick_recovery_master
* do not propagate network errors in dlm_send_begin_reco_message
* dead nodes were not being put in the recovery map sometimes, fix this
* dlmunlock was failing to clear the unlock actions on DLM_DENIED
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm/dlmdomain.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index da3c22045f89..6ee30837389c 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -573,8 +573,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
573 | spin_lock(&dlm_domain_lock); | 573 | spin_lock(&dlm_domain_lock); |
574 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); | 574 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); |
575 | /* Once the dlm ctxt is marked as leaving then we don't want | 575 | /* Once the dlm ctxt is marked as leaving then we don't want |
576 | * to be put in someone's domain map. */ | 576 | * to be put in someone's domain map. |
577 | * Also, explicitly disallow joining at certain troublesome | ||
578 | * times (ie. during recovery). */ | ||
577 | if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { | 579 | if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { |
580 | int bit = query->node_idx; | ||
578 | spin_lock(&dlm->spinlock); | 581 | spin_lock(&dlm->spinlock); |
579 | 582 | ||
580 | if (dlm->dlm_state == DLM_CTXT_NEW && | 583 | if (dlm->dlm_state == DLM_CTXT_NEW && |
@@ -586,6 +589,19 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
586 | } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { | 589 | } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { |
587 | /* Disallow parallel joins. */ | 590 | /* Disallow parallel joins. */ |
588 | response = JOIN_DISALLOW; | 591 | response = JOIN_DISALLOW; |
592 | } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { | ||
593 | mlog(ML_NOTICE, "node %u trying to join, but recovery " | ||
594 | "is ongoing.\n", bit); | ||
595 | response = JOIN_DISALLOW; | ||
596 | } else if (test_bit(bit, dlm->recovery_map)) { | ||
597 | mlog(ML_NOTICE, "node %u trying to join, but it " | ||
598 | "still needs recovery.\n", bit); | ||
599 | response = JOIN_DISALLOW; | ||
600 | } else if (test_bit(bit, dlm->domain_map)) { | ||
601 | mlog(ML_NOTICE, "node %u trying to join, but it " | ||
602 | "is still in the domain! needs recovery?\n", | ||
603 | bit); | ||
604 | response = JOIN_DISALLOW; | ||
589 | } else { | 605 | } else { |
590 | /* Alright we're fully a part of this domain | 606 | /* Alright we're fully a part of this domain |
591 | * so we keep some state as to who's joining | 607 | * so we keep some state as to who's joining |