diff options
| author | Kurt Hackel <kurt.hackel@oracle.com> | 2006-05-01 14:11:13 -0400 |
|---|---|---|
| committer | Mark Fasheh <mark.fasheh@oracle.com> | 2006-06-26 17:42:58 -0400 |
| commit | 466d1a4591c4e1bc3affd5c0cf3df5ad20338fb9 (patch) | |
| tree | ef7eef15780bfdaf339967be320b6a74146dbec5 /fs | |
| parent | 69d72b066cc5971318d9e29e34289b74cf8a9d22 (diff) | |
ocfs2: make dlm recovery finalization 2 stage
Makes it easier for the recovery process to deal with node death.
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 6 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 112 |
2 files changed, 99 insertions, 19 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 9e052445b0b5..78eccd0951e4 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -71,7 +71,8 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len) | |||
| 71 | return 0; | 71 | return 0; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | #define DLM_RECO_STATE_ACTIVE 0x0001 | 74 | #define DLM_RECO_STATE_ACTIVE 0x0001 |
| 75 | #define DLM_RECO_STATE_FINALIZE 0x0002 | ||
| 75 | 76 | ||
| 76 | struct dlm_recovery_ctxt | 77 | struct dlm_recovery_ctxt |
| 77 | { | 78 | { |
| @@ -633,7 +634,8 @@ struct dlm_finalize_reco | |||
| 633 | { | 634 | { |
| 634 | u8 node_idx; | 635 | u8 node_idx; |
| 635 | u8 dead_node; | 636 | u8 dead_node; |
| 636 | __be16 pad1; | 637 | u8 flags; |
| 638 | u8 pad1; | ||
| 637 | __be32 pad2; | 639 | __be32 pad2; |
| 638 | }; | 640 | }; |
| 639 | 641 | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 6ee8b3247129..19123ce8b306 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -134,12 +134,18 @@ static inline void dlm_set_reco_master(struct dlm_ctxt *dlm, | |||
| 134 | dlm->reco.new_master = master; | 134 | dlm->reco.new_master = master; |
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | static inline void dlm_reset_recovery(struct dlm_ctxt *dlm) | 137 | static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm) |
| 138 | { | 138 | { |
| 139 | spin_lock(&dlm->spinlock); | 139 | assert_spin_locked(&dlm->spinlock); |
| 140 | clear_bit(dlm->reco.dead_node, dlm->recovery_map); | 140 | clear_bit(dlm->reco.dead_node, dlm->recovery_map); |
| 141 | dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); | 141 | dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); |
| 142 | dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM); | 142 | dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM); |
| 143 | } | ||
| 144 | |||
| 145 | static inline void dlm_reset_recovery(struct dlm_ctxt *dlm) | ||
| 146 | { | ||
| 147 | spin_lock(&dlm->spinlock); | ||
| 148 | __dlm_reset_recovery(dlm); | ||
| 143 | spin_unlock(&dlm->spinlock); | 149 | spin_unlock(&dlm->spinlock); |
| 144 | } | 150 | } |
| 145 | 151 | ||
| @@ -2074,6 +2080,20 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
| 2074 | { | 2080 | { |
| 2075 | assert_spin_locked(&dlm->spinlock); | 2081 | assert_spin_locked(&dlm->spinlock); |
| 2076 | 2082 | ||
| 2083 | if (dlm->reco.new_master == idx) { | ||
| 2084 | mlog(0, "%s: recovery master %d just died\n", | ||
| 2085 | dlm->name, idx); | ||
| 2086 | if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { | ||
| 2087 | /* finalize1 was reached, so it is safe to clear | ||
| 2088 | * the new_master and dead_node. that recovery | ||
| 2089 | * is complete. */ | ||
| 2090 | mlog(0, "%s: dead master %d had reached " | ||
| 2091 | "finalize1 state, clearing\n", dlm->name, idx); | ||
| 2092 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; | ||
| 2093 | __dlm_reset_recovery(dlm); | ||
| 2094 | } | ||
| 2095 | } | ||
| 2096 | |||
| 2077 | /* check to see if the node is already considered dead */ | 2097 | /* check to see if the node is already considered dead */ |
| 2078 | if (!test_bit(idx, dlm->live_nodes_map)) { | 2098 | if (!test_bit(idx, dlm->live_nodes_map)) { |
| 2079 | mlog(0, "for domain %s, node %d is already dead. " | 2099 | mlog(0, "for domain %s, node %d is already dead. " |
| @@ -2364,6 +2384,14 @@ retry: | |||
| 2364 | * another ENOMEM */ | 2384 | * another ENOMEM */ |
| 2365 | msleep(100); | 2385 | msleep(100); |
| 2366 | goto retry; | 2386 | goto retry; |
| 2387 | } else if (ret == EAGAIN) { | ||
| 2388 | mlog(0, "%s: trying to start recovery of node " | ||
| 2389 | "%u, but node %u is waiting for last recovery " | ||
| 2390 | "to complete, backoff for a bit\n", dlm->name, | ||
| 2391 | dead_node, nodenum); | ||
| 2392 | /* TODO Look into replacing msleep with cond_resched() */ | ||
| 2393 | msleep(100); | ||
| 2394 | goto retry; | ||
| 2367 | } | 2395 | } |
| 2368 | } | 2396 | } |
| 2369 | 2397 | ||
| @@ -2379,6 +2407,17 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) | |||
| 2379 | if (!dlm_grab(dlm)) | 2407 | if (!dlm_grab(dlm)) |
| 2380 | return 0; | 2408 | return 0; |
| 2381 | 2409 | ||
| 2410 | spin_lock(&dlm->spinlock); | ||
| 2411 | if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { | ||
| 2412 | mlog(0, "%s: node %u wants to recover node %u (%u:%u) " | ||
| 2413 | "but this node is in finalize state, waiting on finalize2\n", | ||
| 2414 | dlm->name, br->node_idx, br->dead_node, | ||
| 2415 | dlm->reco.dead_node, dlm->reco.new_master); | ||
| 2416 | spin_unlock(&dlm->spinlock); | ||
| 2417 | return EAGAIN; | ||
| 2418 | } | ||
| 2419 | spin_unlock(&dlm->spinlock); | ||
| 2420 | |||
| 2382 | mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n", | 2421 | mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n", |
| 2383 | dlm->name, br->node_idx, br->dead_node, | 2422 | dlm->name, br->node_idx, br->dead_node, |
| 2384 | dlm->reco.dead_node, dlm->reco.new_master); | 2423 | dlm->reco.dead_node, dlm->reco.new_master); |
| @@ -2432,6 +2471,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) | |||
| 2432 | return 0; | 2471 | return 0; |
| 2433 | } | 2472 | } |
| 2434 | 2473 | ||
| 2474 | #define DLM_FINALIZE_STAGE2 0x01 | ||
| 2435 | static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) | 2475 | static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) |
| 2436 | { | 2476 | { |
| 2437 | int ret = 0; | 2477 | int ret = 0; |
| @@ -2439,25 +2479,31 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) | |||
| 2439 | struct dlm_node_iter iter; | 2479 | struct dlm_node_iter iter; |
| 2440 | int nodenum; | 2480 | int nodenum; |
| 2441 | int status; | 2481 | int status; |
| 2482 | int stage = 1; | ||
| 2442 | 2483 | ||
| 2443 | mlog(0, "finishing recovery for node %s:%u\n", | 2484 | mlog(0, "finishing recovery for node %s:%u, " |
| 2444 | dlm->name, dlm->reco.dead_node); | 2485 | "stage %d\n", dlm->name, dlm->reco.dead_node, stage); |
| 2445 | 2486 | ||
| 2446 | spin_lock(&dlm->spinlock); | 2487 | spin_lock(&dlm->spinlock); |
| 2447 | dlm_node_iter_init(dlm->domain_map, &iter); | 2488 | dlm_node_iter_init(dlm->domain_map, &iter); |
| 2448 | spin_unlock(&dlm->spinlock); | 2489 | spin_unlock(&dlm->spinlock); |
| 2449 | 2490 | ||
| 2491 | stage2: | ||
| 2450 | memset(&fr, 0, sizeof(fr)); | 2492 | memset(&fr, 0, sizeof(fr)); |
| 2451 | fr.node_idx = dlm->node_num; | 2493 | fr.node_idx = dlm->node_num; |
| 2452 | fr.dead_node = dlm->reco.dead_node; | 2494 | fr.dead_node = dlm->reco.dead_node; |
| 2495 | if (stage == 2) | ||
| 2496 | fr.flags |= DLM_FINALIZE_STAGE2; | ||
| 2453 | 2497 | ||
| 2454 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { | 2498 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { |
| 2455 | if (nodenum == dlm->node_num) | 2499 | if (nodenum == dlm->node_num) |
| 2456 | continue; | 2500 | continue; |
| 2457 | ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key, | 2501 | ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key, |
| 2458 | &fr, sizeof(fr), nodenum, &status); | 2502 | &fr, sizeof(fr), nodenum, &status); |
| 2459 | if (ret >= 0) { | 2503 | if (ret >= 0) |
| 2460 | ret = status; | 2504 | ret = status; |
| 2505 | if (ret < 0) { | ||
| 2506 | mlog_errno(ret); | ||
| 2461 | if (dlm_is_host_down(ret)) { | 2507 | if (dlm_is_host_down(ret)) { |
| 2462 | /* this has no effect on this recovery | 2508 | /* this has no effect on this recovery |
| 2463 | * session, so set the status to zero to | 2509 | * session, so set the status to zero to |
| @@ -2466,12 +2512,15 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) | |||
| 2466 | "node finished recovery.\n", nodenum); | 2512 | "node finished recovery.\n", nodenum); |
| 2467 | ret = 0; | 2513 | ret = 0; |
| 2468 | } | 2514 | } |
| 2469 | } | ||
| 2470 | if (ret < 0) { | ||
| 2471 | mlog_errno(ret); | ||
| 2472 | break; | 2515 | break; |
| 2473 | } | 2516 | } |
| 2474 | } | 2517 | } |
| 2518 | if (stage == 1) { | ||
| 2519 | /* reset the node_iter back to the top and send finalize2 */ | ||
| 2520 | iter.curnode = -1; | ||
| 2521 | stage = 2; | ||
| 2522 | goto stage2; | ||
| 2523 | } | ||
| 2475 | 2524 | ||
| 2476 | return ret; | 2525 | return ret; |
| 2477 | } | 2526 | } |
| @@ -2480,15 +2529,19 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data) | |||
| 2480 | { | 2529 | { |
| 2481 | struct dlm_ctxt *dlm = data; | 2530 | struct dlm_ctxt *dlm = data; |
| 2482 | struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; | 2531 | struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; |
| 2532 | int stage = 1; | ||
| 2483 | 2533 | ||
| 2484 | /* ok to return 0, domain has gone away */ | 2534 | /* ok to return 0, domain has gone away */ |
| 2485 | if (!dlm_grab(dlm)) | 2535 | if (!dlm_grab(dlm)) |
| 2486 | return 0; | 2536 | return 0; |
| 2487 | 2537 | ||
| 2488 | mlog(0, "%s: node %u finalizing recovery of node %u (%u:%u)\n", | 2538 | if (fr->flags & DLM_FINALIZE_STAGE2) |
| 2489 | dlm->name, fr->node_idx, fr->dead_node, | 2539 | stage = 2; |
| 2490 | dlm->reco.dead_node, dlm->reco.new_master); | ||
| 2491 | 2540 | ||
| 2541 | mlog(0, "%s: node %u finalizing recovery stage%d of " | ||
| 2542 | "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, | ||
| 2543 | fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); | ||
| 2544 | |||
| 2492 | spin_lock(&dlm->spinlock); | 2545 | spin_lock(&dlm->spinlock); |
| 2493 | 2546 | ||
| 2494 | if (dlm->reco.new_master != fr->node_idx) { | 2547 | if (dlm->reco.new_master != fr->node_idx) { |
| @@ -2504,13 +2557,38 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data) | |||
| 2504 | BUG(); | 2557 | BUG(); |
| 2505 | } | 2558 | } |
| 2506 | 2559 | ||
| 2507 | dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx); | 2560 | switch (stage) { |
| 2508 | 2561 | case 1: | |
| 2509 | spin_unlock(&dlm->spinlock); | 2562 | dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx); |
| 2510 | 2563 | if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { | |
| 2511 | dlm_reset_recovery(dlm); | 2564 | mlog(ML_ERROR, "%s: received finalize1 from " |
| 2565 | "new master %u for dead node %u, but " | ||
| 2566 | "this node has already received it!\n", | ||
| 2567 | dlm->name, fr->node_idx, fr->dead_node); | ||
| 2568 | dlm_print_reco_node_status(dlm); | ||
| 2569 | BUG(); | ||
| 2570 | } | ||
| 2571 | dlm->reco.state |= DLM_RECO_STATE_FINALIZE; | ||
| 2572 | spin_unlock(&dlm->spinlock); | ||
| 2573 | break; | ||
| 2574 | case 2: | ||
| 2575 | if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) { | ||
| 2576 | mlog(ML_ERROR, "%s: received finalize2 from " | ||
| 2577 | "new master %u for dead node %u, but " | ||
| 2578 | "this node did not have finalize1!\n", | ||
| 2579 | dlm->name, fr->node_idx, fr->dead_node); | ||
| 2580 | dlm_print_reco_node_status(dlm); | ||
| 2581 | BUG(); | ||
| 2582 | } | ||
| 2583 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; | ||
| 2584 | spin_unlock(&dlm->spinlock); | ||
| 2585 | dlm_reset_recovery(dlm); | ||
| 2586 | dlm_kick_recovery_thread(dlm); | ||
| 2587 | break; | ||
| 2588 | default: | ||
| 2589 | BUG(); | ||
| 2590 | } | ||
| 2512 | 2591 | ||
| 2513 | dlm_kick_recovery_thread(dlm); | ||
| 2514 | mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", | 2592 | mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", |
| 2515 | dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master); | 2593 | dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master); |
| 2516 | 2594 | ||
