aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorKurt Hackel <kurt.hackel@oracle.com>2006-05-01 14:11:13 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-06-26 17:42:58 -0400
commit466d1a4591c4e1bc3affd5c0cf3df5ad20338fb9 (patch)
treeef7eef15780bfdaf339967be320b6a74146dbec5 /fs/ocfs2
parent69d72b066cc5971318d9e29e34289b74cf8a9d22 (diff)
ocfs2: make dlm recovery finalization 2 stage
Makes it easier for the recovery process to deal with node death. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h6
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c112
2 files changed, 99 insertions, 19 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9e052445b0b5..78eccd0951e4 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -71,7 +71,8 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
71 return 0; 71 return 0;
72} 72}
73 73
74#define DLM_RECO_STATE_ACTIVE 0x0001 74#define DLM_RECO_STATE_ACTIVE 0x0001
75#define DLM_RECO_STATE_FINALIZE 0x0002
75 76
76struct dlm_recovery_ctxt 77struct dlm_recovery_ctxt
77{ 78{
@@ -633,7 +634,8 @@ struct dlm_finalize_reco
633{ 634{
634 u8 node_idx; 635 u8 node_idx;
635 u8 dead_node; 636 u8 dead_node;
636 __be16 pad1; 637 u8 flags;
638 u8 pad1;
637 __be32 pad2; 639 __be32 pad2;
638}; 640};
639 641
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 6ee8b3247129..19123ce8b306 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -134,12 +134,18 @@ static inline void dlm_set_reco_master(struct dlm_ctxt *dlm,
134 dlm->reco.new_master = master; 134 dlm->reco.new_master = master;
135} 135}
136 136
137static inline void dlm_reset_recovery(struct dlm_ctxt *dlm) 137static inline void __dlm_reset_recovery(struct dlm_ctxt *dlm)
138{ 138{
139 spin_lock(&dlm->spinlock); 139 assert_spin_locked(&dlm->spinlock);
140 clear_bit(dlm->reco.dead_node, dlm->recovery_map); 140 clear_bit(dlm->reco.dead_node, dlm->recovery_map);
141 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); 141 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
142 dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM); 142 dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM);
143}
144
145static inline void dlm_reset_recovery(struct dlm_ctxt *dlm)
146{
147 spin_lock(&dlm->spinlock);
148 __dlm_reset_recovery(dlm);
143 spin_unlock(&dlm->spinlock); 149 spin_unlock(&dlm->spinlock);
144} 150}
145 151
@@ -2074,6 +2080,20 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
2074{ 2080{
2075 assert_spin_locked(&dlm->spinlock); 2081 assert_spin_locked(&dlm->spinlock);
2076 2082
2083 if (dlm->reco.new_master == idx) {
2084 mlog(0, "%s: recovery master %d just died\n",
2085 dlm->name, idx);
2086 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2087 /* finalize1 was reached, so it is safe to clear
2088 * the new_master and dead_node. that recovery
2089 * is complete. */
2090 mlog(0, "%s: dead master %d had reached "
2091 "finalize1 state, clearing\n", dlm->name, idx);
2092 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2093 __dlm_reset_recovery(dlm);
2094 }
2095 }
2096
2077 /* check to see if the node is already considered dead */ 2097 /* check to see if the node is already considered dead */
2078 if (!test_bit(idx, dlm->live_nodes_map)) { 2098 if (!test_bit(idx, dlm->live_nodes_map)) {
2079 mlog(0, "for domain %s, node %d is already dead. " 2099 mlog(0, "for domain %s, node %d is already dead. "
@@ -2364,6 +2384,14 @@ retry:
2364 * another ENOMEM */ 2384 * another ENOMEM */
2365 msleep(100); 2385 msleep(100);
2366 goto retry; 2386 goto retry;
2387 } else if (ret == EAGAIN) {
2388 mlog(0, "%s: trying to start recovery of node "
2389 "%u, but node %u is waiting for last recovery "
2390 "to complete, backoff for a bit\n", dlm->name,
2391 dead_node, nodenum);
2392 /* TODO Look into replacing msleep with cond_resched() */
2393 msleep(100);
2394 goto retry;
2367 } 2395 }
2368 } 2396 }
2369 2397
@@ -2379,6 +2407,17 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2379 if (!dlm_grab(dlm)) 2407 if (!dlm_grab(dlm))
2380 return 0; 2408 return 0;
2381 2409
2410 spin_lock(&dlm->spinlock);
2411 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2412 mlog(0, "%s: node %u wants to recover node %u (%u:%u) "
2413 "but this node is in finalize state, waiting on finalize2\n",
2414 dlm->name, br->node_idx, br->dead_node,
2415 dlm->reco.dead_node, dlm->reco.new_master);
2416 spin_unlock(&dlm->spinlock);
2417 return EAGAIN;
2418 }
2419 spin_unlock(&dlm->spinlock);
2420
2382 mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n", 2421 mlog(0, "%s: node %u wants to recover node %u (%u:%u)\n",
2383 dlm->name, br->node_idx, br->dead_node, 2422 dlm->name, br->node_idx, br->dead_node,
2384 dlm->reco.dead_node, dlm->reco.new_master); 2423 dlm->reco.dead_node, dlm->reco.new_master);
@@ -2432,6 +2471,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2432 return 0; 2471 return 0;
2433} 2472}
2434 2473
2474#define DLM_FINALIZE_STAGE2 0x01
2435static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm) 2475static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2436{ 2476{
2437 int ret = 0; 2477 int ret = 0;
@@ -2439,25 +2479,31 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2439 struct dlm_node_iter iter; 2479 struct dlm_node_iter iter;
2440 int nodenum; 2480 int nodenum;
2441 int status; 2481 int status;
2482 int stage = 1;
2442 2483
2443 mlog(0, "finishing recovery for node %s:%u\n", 2484 mlog(0, "finishing recovery for node %s:%u, "
2444 dlm->name, dlm->reco.dead_node); 2485 "stage %d\n", dlm->name, dlm->reco.dead_node, stage);
2445 2486
2446 spin_lock(&dlm->spinlock); 2487 spin_lock(&dlm->spinlock);
2447 dlm_node_iter_init(dlm->domain_map, &iter); 2488 dlm_node_iter_init(dlm->domain_map, &iter);
2448 spin_unlock(&dlm->spinlock); 2489 spin_unlock(&dlm->spinlock);
2449 2490
2491stage2:
2450 memset(&fr, 0, sizeof(fr)); 2492 memset(&fr, 0, sizeof(fr));
2451 fr.node_idx = dlm->node_num; 2493 fr.node_idx = dlm->node_num;
2452 fr.dead_node = dlm->reco.dead_node; 2494 fr.dead_node = dlm->reco.dead_node;
2495 if (stage == 2)
2496 fr.flags |= DLM_FINALIZE_STAGE2;
2453 2497
2454 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { 2498 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
2455 if (nodenum == dlm->node_num) 2499 if (nodenum == dlm->node_num)
2456 continue; 2500 continue;
2457 ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key, 2501 ret = o2net_send_message(DLM_FINALIZE_RECO_MSG, dlm->key,
2458 &fr, sizeof(fr), nodenum, &status); 2502 &fr, sizeof(fr), nodenum, &status);
2459 if (ret >= 0) { 2503 if (ret >= 0)
2460 ret = status; 2504 ret = status;
2505 if (ret < 0) {
2506 mlog_errno(ret);
2461 if (dlm_is_host_down(ret)) { 2507 if (dlm_is_host_down(ret)) {
2462 /* this has no effect on this recovery 2508 /* this has no effect on this recovery
2463 * session, so set the status to zero to 2509 * session, so set the status to zero to
@@ -2466,12 +2512,15 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
2466 "node finished recovery.\n", nodenum); 2512 "node finished recovery.\n", nodenum);
2467 ret = 0; 2513 ret = 0;
2468 } 2514 }
2469 }
2470 if (ret < 0) {
2471 mlog_errno(ret);
2472 break; 2515 break;
2473 } 2516 }
2474 } 2517 }
2518 if (stage == 1) {
2519 /* reset the node_iter back to the top and send finalize2 */
2520 iter.curnode = -1;
2521 stage = 2;
2522 goto stage2;
2523 }
2475 2524
2476 return ret; 2525 return ret;
2477} 2526}
@@ -2480,15 +2529,19 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2480{ 2529{
2481 struct dlm_ctxt *dlm = data; 2530 struct dlm_ctxt *dlm = data;
2482 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; 2531 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
2532 int stage = 1;
2483 2533
2484 /* ok to return 0, domain has gone away */ 2534 /* ok to return 0, domain has gone away */
2485 if (!dlm_grab(dlm)) 2535 if (!dlm_grab(dlm))
2486 return 0; 2536 return 0;
2487 2537
2488 mlog(0, "%s: node %u finalizing recovery of node %u (%u:%u)\n", 2538 if (fr->flags & DLM_FINALIZE_STAGE2)
2489 dlm->name, fr->node_idx, fr->dead_node, 2539 stage = 2;
2490 dlm->reco.dead_node, dlm->reco.new_master);
2491 2540
2541 mlog(0, "%s: node %u finalizing recovery stage%d of "
2542 "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
2543 fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
2544
2492 spin_lock(&dlm->spinlock); 2545 spin_lock(&dlm->spinlock);
2493 2546
2494 if (dlm->reco.new_master != fr->node_idx) { 2547 if (dlm->reco.new_master != fr->node_idx) {
@@ -2504,13 +2557,38 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2504 BUG(); 2557 BUG();
2505 } 2558 }
2506 2559
2507 dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx); 2560 switch (stage) {
2508 2561 case 1:
2509 spin_unlock(&dlm->spinlock); 2562 dlm_finish_local_lockres_recovery(dlm, fr->dead_node, fr->node_idx);
2510 2563 if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) {
2511 dlm_reset_recovery(dlm); 2564 mlog(ML_ERROR, "%s: received finalize1 from "
2565 "new master %u for dead node %u, but "
2566 "this node has already received it!\n",
2567 dlm->name, fr->node_idx, fr->dead_node);
2568 dlm_print_reco_node_status(dlm);
2569 BUG();
2570 }
2571 dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
2572 spin_unlock(&dlm->spinlock);
2573 break;
2574 case 2:
2575 if (!(dlm->reco.state & DLM_RECO_STATE_FINALIZE)) {
2576 mlog(ML_ERROR, "%s: received finalize2 from "
2577 "new master %u for dead node %u, but "
2578 "this node did not have finalize1!\n",
2579 dlm->name, fr->node_idx, fr->dead_node);
2580 dlm_print_reco_node_status(dlm);
2581 BUG();
2582 }
2583 dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
2584 spin_unlock(&dlm->spinlock);
2585 dlm_reset_recovery(dlm);
2586 dlm_kick_recovery_thread(dlm);
2587 break;
2588 default:
2589 BUG();
2590 }
2512 2591
2513 dlm_kick_recovery_thread(dlm);
2514 mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", 2592 mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
2515 dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master); 2593 dlm->name, fr->node_idx, dlm->reco.dead_node, dlm->reco.new_master);
2516 2594