aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm/dlmrecovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/dlm/dlmrecovery.c')
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c50
1 files changed, 29 insertions, 21 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 1e232000f3f7..805cbabac051 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
58static int dlm_recovery_thread(void *data); 58static int dlm_recovery_thread(void *data);
59void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 59void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
60int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); 60int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
61static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); 61void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
62static int dlm_do_recovery(struct dlm_ctxt *dlm); 62static int dlm_do_recovery(struct dlm_ctxt *dlm);
63 63
64static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); 64static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
78 u8 send_to, 78 u8 send_to,
79 struct dlm_lock_resource *res, 79 struct dlm_lock_resource *res,
80 int total_locks); 80 int total_locks);
81static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
82 struct dlm_lock_resource *res,
83 u8 *real_master);
84static int dlm_process_recovery_data(struct dlm_ctxt *dlm, 81static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
85 struct dlm_lock_resource *res, 82 struct dlm_lock_resource *res,
86 struct dlm_migratable_lockres *mres); 83 struct dlm_migratable_lockres *mres);
87static int dlm_do_master_requery(struct dlm_ctxt *dlm,
88 struct dlm_lock_resource *res,
89 u8 nodenum, u8 *real_master);
90static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm); 84static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
91static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, 85static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
92 u8 dead_node, u8 send_to); 86 u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
165 * RECOVERY THREAD 159 * RECOVERY THREAD
166 */ 160 */
167 161
168static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) 162void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
169{ 163{
170 /* wake the recovery thread 164 /* wake the recovery thread
171 * this will wake the reco thread in one of three places 165 * this will wake the reco thread in one of three places
@@ -750,10 +744,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
750 dlm->name, dlm->reco.dead_node, dlm->reco.new_master, 744 dlm->name, dlm->reco.dead_node, dlm->reco.new_master,
751 dead_node, reco_master); 745 dead_node, reco_master);
752 mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " 746 mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u "
753 "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", 747 "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
754 dlm->name, mres->lockname_len, mres->lockname, mres->master, 748 dlm->name, mres->lockname_len, mres->lockname, mres->master,
755 mres->num_locks, mres->total_locks, mres->flags, 749 mres->num_locks, mres->total_locks, mres->flags,
756 mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags, 750 dlm_get_lock_cookie_node(mres->ml[0].cookie),
751 dlm_get_lock_cookie_seq(mres->ml[0].cookie),
752 mres->ml[0].list, mres->ml[0].flags,
757 mres->ml[0].type, mres->ml[0].convert_type, 753 mres->ml[0].type, mres->ml[0].convert_type,
758 mres->ml[0].highest_blocked, mres->ml[0].node); 754 mres->ml[0].highest_blocked, mres->ml[0].node);
759 BUG(); 755 BUG();
@@ -1316,9 +1312,8 @@ leave:
1316 1312
1317 1313
1318 1314
1319static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, 1315int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1320 struct dlm_lock_resource *res, 1316 struct dlm_lock_resource *res, u8 *real_master)
1321 u8 *real_master)
1322{ 1317{
1323 struct dlm_node_iter iter; 1318 struct dlm_node_iter iter;
1324 int nodenum; 1319 int nodenum;
@@ -1360,8 +1355,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1360 ret = dlm_do_master_requery(dlm, res, nodenum, real_master); 1355 ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
1361 if (ret < 0) { 1356 if (ret < 0) {
1362 mlog_errno(ret); 1357 mlog_errno(ret);
1363 BUG(); 1358 if (!dlm_is_host_down(ret))
1364 /* TODO: need to figure a way to restart this */ 1359 BUG();
1360 /* host is down, so answer for that node would be
1361 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
1365 } 1362 }
1366 if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) { 1363 if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
1367 mlog(0, "lock master is %u\n", *real_master); 1364 mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1369,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1372} 1369}
1373 1370
1374 1371
1375static int dlm_do_master_requery(struct dlm_ctxt *dlm, 1372int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1376 struct dlm_lock_resource *res, 1373 u8 nodenum, u8 *real_master)
1377 u8 nodenum, u8 *real_master)
1378{ 1374{
1379 int ret = -EINVAL; 1375 int ret = -EINVAL;
1380 struct dlm_master_requery req; 1376 struct dlm_master_requery req;
@@ -1519,9 +1515,11 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1519 /* lock is always created locally first, and 1515 /* lock is always created locally first, and
1520 * destroyed locally last. it must be on the list */ 1516 * destroyed locally last. it must be on the list */
1521 if (!lock) { 1517 if (!lock) {
1518 u64 c = ml->cookie;
1522 mlog(ML_ERROR, "could not find local lock " 1519 mlog(ML_ERROR, "could not find local lock "
1523 "with cookie %"MLFu64"!\n", 1520 "with cookie %u:%llu!\n",
1524 ml->cookie); 1521 dlm_get_lock_cookie_node(c),
1522 dlm_get_lock_cookie_seq(c));
1525 BUG(); 1523 BUG();
1526 } 1524 }
1527 BUG_ON(lock->ml.node != ml->node); 1525 BUG_ON(lock->ml.node != ml->node);
@@ -1739,6 +1737,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1739 } else 1737 } else
1740 continue; 1738 continue;
1741 1739
1740 if (!list_empty(&res->recovering)) {
1741 mlog(0, "%s:%.*s: lockres was "
1742 "marked RECOVERING, owner=%u\n",
1743 dlm->name, res->lockname.len,
1744 res->lockname.name, res->owner);
1745 list_del_init(&res->recovering);
1746 }
1742 spin_lock(&res->spinlock); 1747 spin_lock(&res->spinlock);
1743 dlm_change_lockres_owner(dlm, res, new_master); 1748 dlm_change_lockres_owner(dlm, res, new_master);
1744 res->state &= ~DLM_LOCK_RES_RECOVERING; 1749 res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2263,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2258 mlog(0, "%u not in domain/live_nodes map " 2263 mlog(0, "%u not in domain/live_nodes map "
2259 "so setting it in reco map manually\n", 2264 "so setting it in reco map manually\n",
2260 br->dead_node); 2265 br->dead_node);
2261 set_bit(br->dead_node, dlm->recovery_map); 2266 /* force the recovery cleanup in __dlm_hb_node_down
2267 * both of these will be cleared in a moment */
2268 set_bit(br->dead_node, dlm->domain_map);
2269 set_bit(br->dead_node, dlm->live_nodes_map);
2262 __dlm_hb_node_down(dlm, br->dead_node); 2270 __dlm_hb_node_down(dlm, br->dead_node);
2263 } 2271 }
2264 spin_unlock(&dlm->spinlock); 2272 spin_unlock(&dlm->spinlock);