aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm/dlmrecovery.c
diff options
context:
space:
mode:
authorKurt Hackel <kurt.hackel@oracle.com>2006-03-06 17:08:49 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2006-03-24 17:58:25 -0500
commitc03872f5f50bc10f2a1a485f08879a8d01bcfe49 (patch)
tree9ac370cf1a7c015522af75af3f60e9d6c4425bbc /fs/ocfs2/dlm/dlmrecovery.c
parent9c6510a5bfe2f1c5f5b93386c06954be02e974e4 (diff)
[PATCH] ocfs2: dlm recovery fixes
when starting lock mastery (excepting the recovery lock) wait on any nodes needing recovery. fix one instance where lock resources were left attached to the recovery list after recovery completed. ensure that the node_down code is run uniformly regardless of which node found the dead node first. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm/dlmrecovery.c')
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c38
1 files changed, 21 insertions, 17 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 1e232000f3f..36610bdf123 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
58static int dlm_recovery_thread(void *data); 58static int dlm_recovery_thread(void *data);
59void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 59void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
60int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); 60int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
61static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); 61void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
62static int dlm_do_recovery(struct dlm_ctxt *dlm); 62static int dlm_do_recovery(struct dlm_ctxt *dlm);
63 63
64static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); 64static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
78 u8 send_to, 78 u8 send_to,
79 struct dlm_lock_resource *res, 79 struct dlm_lock_resource *res,
80 int total_locks); 80 int total_locks);
81static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
82 struct dlm_lock_resource *res,
83 u8 *real_master);
84static int dlm_process_recovery_data(struct dlm_ctxt *dlm, 81static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
85 struct dlm_lock_resource *res, 82 struct dlm_lock_resource *res,
86 struct dlm_migratable_lockres *mres); 83 struct dlm_migratable_lockres *mres);
87static int dlm_do_master_requery(struct dlm_ctxt *dlm,
88 struct dlm_lock_resource *res,
89 u8 nodenum, u8 *real_master);
90static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm); 84static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
91static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, 85static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
92 u8 dead_node, u8 send_to); 86 u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
165 * RECOVERY THREAD 159 * RECOVERY THREAD
166 */ 160 */
167 161
168static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) 162void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
169{ 163{
170 /* wake the recovery thread 164 /* wake the recovery thread
171 * this will wake the reco thread in one of three places 165 * this will wake the reco thread in one of three places
@@ -1316,9 +1310,8 @@ leave:
1316 1310
1317 1311
1318 1312
1319static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, 1313int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1320 struct dlm_lock_resource *res, 1314 struct dlm_lock_resource *res, u8 *real_master)
1321 u8 *real_master)
1322{ 1315{
1323 struct dlm_node_iter iter; 1316 struct dlm_node_iter iter;
1324 int nodenum; 1317 int nodenum;
@@ -1360,8 +1353,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1360 ret = dlm_do_master_requery(dlm, res, nodenum, real_master); 1353 ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
1361 if (ret < 0) { 1354 if (ret < 0) {
1362 mlog_errno(ret); 1355 mlog_errno(ret);
1363 BUG(); 1356 if (!dlm_is_host_down(ret))
1364 /* TODO: need to figure a way to restart this */ 1357 BUG();
1358 /* host is down, so answer for that node would be
1359 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
1365 } 1360 }
1366 if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) { 1361 if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
1367 mlog(0, "lock master is %u\n", *real_master); 1362 mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1367,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1372} 1367}
1373 1368
1374 1369
1375static int dlm_do_master_requery(struct dlm_ctxt *dlm, 1370int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1376 struct dlm_lock_resource *res, 1371 u8 nodenum, u8 *real_master)
1377 u8 nodenum, u8 *real_master)
1378{ 1372{
1379 int ret = -EINVAL; 1373 int ret = -EINVAL;
1380 struct dlm_master_requery req; 1374 struct dlm_master_requery req;
@@ -1739,6 +1733,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1739 } else 1733 } else
1740 continue; 1734 continue;
1741 1735
1736 if (!list_empty(&res->recovering)) {
1737 mlog(0, "%s:%.*s: lockres was "
1738 "marked RECOVERING, owner=%u\n",
1739 dlm->name, res->lockname.len,
1740 res->lockname.name, res->owner);
1741 list_del_init(&res->recovering);
1742 }
1742 spin_lock(&res->spinlock); 1743 spin_lock(&res->spinlock);
1743 dlm_change_lockres_owner(dlm, res, new_master); 1744 dlm_change_lockres_owner(dlm, res, new_master);
1744 res->state &= ~DLM_LOCK_RES_RECOVERING; 1745 res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2259,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2258 mlog(0, "%u not in domain/live_nodes map " 2259 mlog(0, "%u not in domain/live_nodes map "
2259 "so setting it in reco map manually\n", 2260 "so setting it in reco map manually\n",
2260 br->dead_node); 2261 br->dead_node);
2261 set_bit(br->dead_node, dlm->recovery_map); 2262 /* force the recovery cleanup in __dlm_hb_node_down
2263 * both of these will be cleared in a moment */
2264 set_bit(br->dead_node, dlm->domain_map);
2265 set_bit(br->dead_node, dlm->live_nodes_map);
2262 __dlm_hb_node_down(dlm, br->dead_node); 2266 __dlm_hb_node_down(dlm, br->dead_node);
2263 } 2267 }
2264 spin_unlock(&dlm->spinlock); 2268 spin_unlock(&dlm->spinlock);