diff options
Diffstat (limited to 'fs/ocfs2/dlm/dlmrecovery.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7efab6d28a21..a3c312c43b90 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -430,6 +430,8 @@ static void dlm_begin_recovery(struct dlm_ctxt *dlm) | |||
430 | { | 430 | { |
431 | spin_lock(&dlm->spinlock); | 431 | spin_lock(&dlm->spinlock); |
432 | BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); | 432 | BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); |
433 | printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", | ||
434 | dlm->name, dlm->reco.dead_node); | ||
433 | dlm->reco.state |= DLM_RECO_STATE_ACTIVE; | 435 | dlm->reco.state |= DLM_RECO_STATE_ACTIVE; |
434 | spin_unlock(&dlm->spinlock); | 436 | spin_unlock(&dlm->spinlock); |
435 | } | 437 | } |
@@ -440,9 +442,18 @@ static void dlm_end_recovery(struct dlm_ctxt *dlm) | |||
440 | BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); | 442 | BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); |
441 | dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; | 443 | dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; |
442 | spin_unlock(&dlm->spinlock); | 444 | spin_unlock(&dlm->spinlock); |
445 | printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name); | ||
443 | wake_up(&dlm->reco.event); | 446 | wake_up(&dlm->reco.event); |
444 | } | 447 | } |
445 | 448 | ||
449 | static void dlm_print_recovery_master(struct dlm_ctxt *dlm) | ||
450 | { | ||
451 | printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the " | ||
452 | "dead node %u in domain %s\n", dlm->reco.new_master, | ||
453 | (dlm->node_num == dlm->reco.new_master ? "me" : "he"), | ||
454 | dlm->reco.dead_node, dlm->name); | ||
455 | } | ||
456 | |||
446 | static int dlm_do_recovery(struct dlm_ctxt *dlm) | 457 | static int dlm_do_recovery(struct dlm_ctxt *dlm) |
447 | { | 458 | { |
448 | int status = 0; | 459 | int status = 0; |
@@ -505,9 +516,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
505 | } | 516 | } |
506 | mlog(0, "another node will master this recovery session.\n"); | 517 | mlog(0, "another node will master this recovery session.\n"); |
507 | } | 518 | } |
508 | mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", | 519 | |
509 | dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, | 520 | dlm_print_recovery_master(dlm); |
510 | dlm->node_num, dlm->reco.dead_node); | ||
511 | 521 | ||
512 | /* it is safe to start everything back up here | 522 | /* it is safe to start everything back up here |
513 | * because all of the dead node's lock resources | 523 | * because all of the dead node's lock resources |
@@ -518,15 +528,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
518 | return 0; | 528 | return 0; |
519 | 529 | ||
520 | master_here: | 530 | master_here: |
521 | mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node " | 531 | dlm_print_recovery_master(dlm); |
522 | "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task), | ||
523 | dlm->node_num, dlm->reco.dead_node, dlm->name); | ||
524 | 532 | ||
525 | status = dlm_remaster_locks(dlm, dlm->reco.dead_node); | 533 | status = dlm_remaster_locks(dlm, dlm->reco.dead_node); |
526 | if (status < 0) { | 534 | if (status < 0) { |
527 | /* we should never hit this anymore */ | 535 | /* we should never hit this anymore */ |
528 | mlog(ML_ERROR, "error %d remastering locks for node %u, " | 536 | mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, " |
529 | "retrying.\n", status, dlm->reco.dead_node); | 537 | "retrying.\n", dlm->name, status, dlm->reco.dead_node); |
530 | /* yield a bit to allow any final network messages | 538 | /* yield a bit to allow any final network messages |
531 | * to get handled on remaining nodes */ | 539 | * to get handled on remaining nodes */ |
532 | msleep(100); | 540 | msleep(100); |
@@ -567,7 +575,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
567 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); | 575 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); |
568 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; | 576 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; |
569 | 577 | ||
570 | mlog(0, "requesting lock info from node %u\n", | 578 | mlog(0, "%s: Requesting lock info from node %u\n", dlm->name, |
571 | ndata->node_num); | 579 | ndata->node_num); |
572 | 580 | ||
573 | if (ndata->node_num == dlm->node_num) { | 581 | if (ndata->node_num == dlm->node_num) { |
@@ -640,7 +648,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
640 | spin_unlock(&dlm_reco_state_lock); | 648 | spin_unlock(&dlm_reco_state_lock); |
641 | } | 649 | } |
642 | 650 | ||
643 | mlog(0, "done requesting all lock info\n"); | 651 | mlog(0, "%s: Done requesting all lock info\n", dlm->name); |
644 | 652 | ||
645 | /* nodes should be sending reco data now | 653 | /* nodes should be sending reco data now |
646 | * just need to wait */ | 654 | * just need to wait */ |
@@ -802,10 +810,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
802 | 810 | ||
803 | /* negative status is handled by caller */ | 811 | /* negative status is handled by caller */ |
804 | if (ret < 0) | 812 | if (ret < 0) |
805 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 813 | mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " |
806 | "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG, | 814 | "to recover dead node %u\n", dlm->name, ret, |
807 | dlm->key, request_from); | 815 | request_from, dead_node); |
808 | |||
809 | // return from here, then | 816 | // return from here, then |
810 | // sleep until all received or error | 817 | // sleep until all received or error |
811 | return ret; | 818 | return ret; |
@@ -956,9 +963,9 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) | |||
956 | ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, | 963 | ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, |
957 | sizeof(done_msg), send_to, &tmpret); | 964 | sizeof(done_msg), send_to, &tmpret); |
958 | if (ret < 0) { | 965 | if (ret < 0) { |
959 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 966 | mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u " |
960 | "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG, | 967 | "to recover dead node %u\n", dlm->name, ret, send_to, |
961 | dlm->key, send_to); | 968 | dead_node); |
962 | if (!dlm_is_host_down(ret)) { | 969 | if (!dlm_is_host_down(ret)) { |
963 | BUG(); | 970 | BUG(); |
964 | } | 971 | } |
@@ -1127,9 +1134,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, | |||
1127 | if (ret < 0) { | 1134 | if (ret < 0) { |
1128 | /* XXX: negative status is not handled. | 1135 | /* XXX: negative status is not handled. |
1129 | * this will end up killing this node. */ | 1136 | * this will end up killing this node. */ |
1130 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 1137 | mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to " |
1131 | "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG, | 1138 | "node %u (%s)\n", dlm->name, mres->lockname_len, |
1132 | dlm->key, send_to); | 1139 | mres->lockname, ret, send_to, |
1140 | (orig_flags & DLM_MRES_MIGRATION ? | ||
1141 | "migration" : "recovery")); | ||
1133 | } else { | 1142 | } else { |
1134 | /* might get an -ENOMEM back here */ | 1143 | /* might get an -ENOMEM back here */ |
1135 | ret = status; | 1144 | ret = status; |
@@ -2324,9 +2333,9 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2324 | dlm_revalidate_lvb(dlm, res, dead_node); | 2333 | dlm_revalidate_lvb(dlm, res, dead_node); |
2325 | if (res->owner == dead_node) { | 2334 | if (res->owner == dead_node) { |
2326 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | 2335 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
2327 | mlog(ML_NOTICE, "Ignore %.*s for " | 2336 | mlog(ML_NOTICE, "%s: res %.*s, Skip " |
2328 | "recovery as it is being freed\n", | 2337 | "recovery as it is being freed\n", |
2329 | res->lockname.len, | 2338 | dlm->name, res->lockname.len, |
2330 | res->lockname.name); | 2339 | res->lockname.name); |
2331 | } else | 2340 | } else |
2332 | dlm_move_lockres_to_recovery_list(dlm, | 2341 | dlm_move_lockres_to_recovery_list(dlm, |