aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm
diff options
context:
space:
mode:
authorKurt Hackel <kurt.hackel@oracle.com>2006-05-01 16:49:20 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-06-26 17:43:09 -0400
commit6a41321121ee2af33b8ac55c87657603df480b25 (patch)
tree648abdd1bf2ede54a3e9759bd4b989587381dcc4 /fs/ocfs2/dlm
parentc8df412e1c746dd21094966d04b3a79aad0f4d08 (diff)
ocfs2: dlm_remaster_locks() should never exit without completing
We cannot restart recovery. Once we begin to recover a node, keep the state of the recovery intact and follow through, regardless of any other node deaths that may occur. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c116
1 files changed, 62 insertions, 54 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 00209f4a2916..22a0b055cfcd 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -480,6 +480,7 @@ master_here:
480 480
481 status = dlm_remaster_locks(dlm, dlm->reco.dead_node); 481 status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
482 if (status < 0) { 482 if (status < 0) {
483 /* we should never hit this anymore */
483 mlog(ML_ERROR, "error %d remastering locks for node %u, " 484 mlog(ML_ERROR, "error %d remastering locks for node %u, "
484 "retrying.\n", status, dlm->reco.dead_node); 485 "retrying.\n", status, dlm->reco.dead_node);
485 /* yield a bit to allow any final network messages 486 /* yield a bit to allow any final network messages
@@ -506,9 +507,16 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
506 int destroy = 0; 507 int destroy = 0;
507 int pass = 0; 508 int pass = 0;
508 509
509 status = dlm_init_recovery_area(dlm, dead_node); 510 do {
510 if (status < 0) 511 /* we have become recovery master. there is no escaping
511 goto leave; 512 * this, so just keep trying until we get it. */
513 status = dlm_init_recovery_area(dlm, dead_node);
514 if (status < 0) {
515 mlog(ML_ERROR, "%s: failed to alloc recovery area, "
516 "retrying\n", dlm->name);
517 msleep(1000);
518 }
519 } while (status != 0);
512 520
513 /* safe to access the node data list without a lock, since this 521 /* safe to access the node data list without a lock, since this
514 * process is the only one to change the list */ 522 * process is the only one to change the list */
@@ -525,16 +533,36 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
525 continue; 533 continue;
526 } 534 }
527 535
528 status = dlm_request_all_locks(dlm, ndata->node_num, dead_node); 536 do {
529 if (status < 0) { 537 status = dlm_request_all_locks(dlm, ndata->node_num,
530 mlog_errno(status); 538 dead_node);
531 if (dlm_is_host_down(status)) 539 if (status < 0) {
532 ndata->state = DLM_RECO_NODE_DATA_DEAD; 540 mlog_errno(status);
533 else { 541 if (dlm_is_host_down(status)) {
534 destroy = 1; 542 /* node died, ignore it for recovery */
535 goto leave; 543 status = 0;
544 ndata->state = DLM_RECO_NODE_DATA_DEAD;
545 /* wait for the domain map to catch up
546 * with the network state. */
547 wait_event_timeout(dlm->dlm_reco_thread_wq,
548 dlm_is_node_dead(dlm,
549 ndata->node_num),
550 msecs_to_jiffies(1000));
551 mlog(0, "waited 1 sec for %u, "
552 "dead? %s\n", ndata->node_num,
553 dlm_is_node_dead(dlm, ndata->node_num) ?
554 "yes" : "no");
555 } else {
556 /* -ENOMEM on the other node */
557 mlog(0, "%s: node %u returned "
558 "%d during recovery, retrying "
559 "after a short wait\n",
560 dlm->name, ndata->node_num,
561 status);
562 msleep(100);
563 }
536 } 564 }
537 } 565 } while (status != 0);
538 566
539 switch (ndata->state) { 567 switch (ndata->state) {
540 case DLM_RECO_NODE_DATA_INIT: 568 case DLM_RECO_NODE_DATA_INIT:
@@ -546,10 +574,9 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
546 mlog(0, "node %u died after requesting " 574 mlog(0, "node %u died after requesting "
547 "recovery info for node %u\n", 575 "recovery info for node %u\n",
548 ndata->node_num, dead_node); 576 ndata->node_num, dead_node);
549 // start all over 577 /* fine. don't need this node's info.
550 destroy = 1; 578 * continue without it. */
551 status = -EAGAIN; 579 break;
552 goto leave;
553 case DLM_RECO_NODE_DATA_REQUESTING: 580 case DLM_RECO_NODE_DATA_REQUESTING:
554 ndata->state = DLM_RECO_NODE_DATA_REQUESTED; 581 ndata->state = DLM_RECO_NODE_DATA_REQUESTED;
555 mlog(0, "now receiving recovery data from " 582 mlog(0, "now receiving recovery data from "
@@ -593,28 +620,12 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
593 BUG(); 620 BUG();
594 break; 621 break;
595 case DLM_RECO_NODE_DATA_DEAD: 622 case DLM_RECO_NODE_DATA_DEAD:
596 mlog(ML_NOTICE, "node %u died after " 623 mlog(0, "node %u died after "
597 "requesting recovery info for " 624 "requesting recovery info for "
598 "node %u\n", ndata->node_num, 625 "node %u\n", ndata->node_num,
599 dead_node); 626 dead_node);
600 spin_unlock(&dlm_reco_state_lock); 627 spin_unlock(&dlm_reco_state_lock);
601 // start all over 628 break;
602 destroy = 1;
603 status = -EAGAIN;
604 /* instead of spinning like crazy here,
605 * wait for the domain map to catch up
606 * with the network state. otherwise this
607 * can be hit hundreds of times before
608 * the node is really seen as dead. */
609 wait_event_timeout(dlm->dlm_reco_thread_wq,
610 dlm_is_node_dead(dlm,
611 ndata->node_num),
612 msecs_to_jiffies(1000));
613 mlog(0, "waited 1 sec for %u, "
614 "dead? %s\n", ndata->node_num,
615 dlm_is_node_dead(dlm, ndata->node_num) ?
616 "yes" : "no");
617 goto leave;
618 case DLM_RECO_NODE_DATA_RECEIVING: 629 case DLM_RECO_NODE_DATA_RECEIVING:
619 case DLM_RECO_NODE_DATA_REQUESTED: 630 case DLM_RECO_NODE_DATA_REQUESTED:
620 mlog(0, "%s: node %u still in state %s\n", 631 mlog(0, "%s: node %u still in state %s\n",
@@ -659,7 +670,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
659 jiffies, dlm->reco.dead_node, 670 jiffies, dlm->reco.dead_node,
660 dlm->node_num, dlm->reco.new_master); 671 dlm->node_num, dlm->reco.new_master);
661 destroy = 1; 672 destroy = 1;
662 status = ret; 673 status = 0;
663 /* rescan everything marked dirty along the way */ 674 /* rescan everything marked dirty along the way */
664 dlm_kick_thread(dlm, NULL); 675 dlm_kick_thread(dlm, NULL);
665 break; 676 break;
@@ -672,7 +683,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
672 683
673 } 684 }
674 685
675leave:
676 if (destroy) 686 if (destroy)
677 dlm_destroy_recovery_area(dlm, dead_node); 687 dlm_destroy_recovery_area(dlm, dead_node);
678 688
@@ -832,24 +842,22 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
832 842
833 if (dead_node != dlm->reco.dead_node || 843 if (dead_node != dlm->reco.dead_node ||
834 reco_master != dlm->reco.new_master) { 844 reco_master != dlm->reco.new_master) {
835 /* show extra debug info if the recovery state is messed */ 845 /* worker could have been created before the recovery master
836 mlog(ML_ERROR, "%s: bad reco state: reco(dead=%u, master=%u), " 846 * died. if so, do not continue, but do not error. */
837 "request(dead=%u, master=%u)\n", 847 if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
838 dlm->name, dlm->reco.dead_node, dlm->reco.new_master, 848 mlog(ML_NOTICE, "%s: will not send recovery state, "
839 dead_node, reco_master); 849 "recovery master %u died, thread=(dead=%u,mas=%u)"
840 mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " 850 " current=(dead=%u,mas=%u)\n", dlm->name,
841 "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", 851 reco_master, dead_node, reco_master,
842 dlm->name, mres->lockname_len, mres->lockname, mres->master, 852 dlm->reco.dead_node, dlm->reco.new_master);
843 mres->num_locks, mres->total_locks, mres->flags, 853 } else {
844 dlm_get_lock_cookie_node(mres->ml[0].cookie), 854 mlog(ML_NOTICE, "%s: reco state invalid: reco(dead=%u, "
845 dlm_get_lock_cookie_seq(mres->ml[0].cookie), 855 "master=%u), request(dead=%u, master=%u)\n",
846 mres->ml[0].list, mres->ml[0].flags, 856 dlm->name, dlm->reco.dead_node,
847 mres->ml[0].type, mres->ml[0].convert_type, 857 dlm->reco.new_master, dead_node, reco_master);
848 mres->ml[0].highest_blocked, mres->ml[0].node); 858 }
849 BUG(); 859 goto leave;
850 } 860 }
851 BUG_ON(dead_node != dlm->reco.dead_node);
852 BUG_ON(reco_master != dlm->reco.new_master);
853 861
854 /* lock resources should have already been moved to the 862 /* lock resources should have already been moved to the
855 * dlm->reco.resources list. now move items from that list 863 * dlm->reco.resources list. now move items from that list
@@ -889,7 +897,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
889 dlm->name, reco_master, dead_node, ret); 897 dlm->name, reco_master, dead_node, ret);
890 } 898 }
891 } 899 }
892 900leave:
893 free_page((unsigned long)data); 901 free_page((unsigned long)data);
894} 902}
895 903