diff options
Diffstat (limited to 'fs/ocfs2/dlm/dlmrecovery.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 50 |
1 files changed, 29 insertions, 21 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 1e232000f3f7..805cbabac051 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); | |||
58 | static int dlm_recovery_thread(void *data); | 58 | static int dlm_recovery_thread(void *data); |
59 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | 59 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); |
60 | int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); | 60 | int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); |
61 | static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); | 61 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); |
62 | static int dlm_do_recovery(struct dlm_ctxt *dlm); | 62 | static int dlm_do_recovery(struct dlm_ctxt *dlm); |
63 | 63 | ||
64 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); | 64 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); |
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, | |||
78 | u8 send_to, | 78 | u8 send_to, |
79 | struct dlm_lock_resource *res, | 79 | struct dlm_lock_resource *res, |
80 | int total_locks); | 80 | int total_locks); |
81 | static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, | ||
82 | struct dlm_lock_resource *res, | ||
83 | u8 *real_master); | ||
84 | static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | 81 | static int dlm_process_recovery_data(struct dlm_ctxt *dlm, |
85 | struct dlm_lock_resource *res, | 82 | struct dlm_lock_resource *res, |
86 | struct dlm_migratable_lockres *mres); | 83 | struct dlm_migratable_lockres *mres); |
87 | static int dlm_do_master_requery(struct dlm_ctxt *dlm, | ||
88 | struct dlm_lock_resource *res, | ||
89 | u8 nodenum, u8 *real_master); | ||
90 | static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm); | 84 | static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm); |
91 | static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, | 85 | static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, |
92 | u8 dead_node, u8 send_to); | 86 | u8 dead_node, u8 send_to); |
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data) | |||
165 | * RECOVERY THREAD | 159 | * RECOVERY THREAD |
166 | */ | 160 | */ |
167 | 161 | ||
168 | static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) | 162 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm) |
169 | { | 163 | { |
170 | /* wake the recovery thread | 164 | /* wake the recovery thread |
171 | * this will wake the reco thread in one of three places | 165 | * this will wake the reco thread in one of three places |
@@ -750,10 +744,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data) | |||
750 | dlm->name, dlm->reco.dead_node, dlm->reco.new_master, | 744 | dlm->name, dlm->reco.dead_node, dlm->reco.new_master, |
751 | dead_node, reco_master); | 745 | dead_node, reco_master); |
752 | mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " | 746 | mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u " |
753 | "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", | 747 | "entry[0]={c=%u:%llu,l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n", |
754 | dlm->name, mres->lockname_len, mres->lockname, mres->master, | 748 | dlm->name, mres->lockname_len, mres->lockname, mres->master, |
755 | mres->num_locks, mres->total_locks, mres->flags, | 749 | mres->num_locks, mres->total_locks, mres->flags, |
756 | mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags, | 750 | dlm_get_lock_cookie_node(mres->ml[0].cookie), |
751 | dlm_get_lock_cookie_seq(mres->ml[0].cookie), | ||
752 | mres->ml[0].list, mres->ml[0].flags, | ||
757 | mres->ml[0].type, mres->ml[0].convert_type, | 753 | mres->ml[0].type, mres->ml[0].convert_type, |
758 | mres->ml[0].highest_blocked, mres->ml[0].node); | 754 | mres->ml[0].highest_blocked, mres->ml[0].node); |
759 | BUG(); | 755 | BUG(); |
@@ -1316,9 +1312,8 @@ leave: | |||
1316 | 1312 | ||
1317 | 1313 | ||
1318 | 1314 | ||
1319 | static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, | 1315 | int dlm_lockres_master_requery(struct dlm_ctxt *dlm, |
1320 | struct dlm_lock_resource *res, | 1316 | struct dlm_lock_resource *res, u8 *real_master) |
1321 | u8 *real_master) | ||
1322 | { | 1317 | { |
1323 | struct dlm_node_iter iter; | 1318 | struct dlm_node_iter iter; |
1324 | int nodenum; | 1319 | int nodenum; |
@@ -1360,8 +1355,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, | |||
1360 | ret = dlm_do_master_requery(dlm, res, nodenum, real_master); | 1355 | ret = dlm_do_master_requery(dlm, res, nodenum, real_master); |
1361 | if (ret < 0) { | 1356 | if (ret < 0) { |
1362 | mlog_errno(ret); | 1357 | mlog_errno(ret); |
1363 | BUG(); | 1358 | if (!dlm_is_host_down(ret)) |
1364 | /* TODO: need to figure a way to restart this */ | 1359 | BUG(); |
1360 | /* host is down, so answer for that node would be | ||
1361 | * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */ | ||
1365 | } | 1362 | } |
1366 | if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) { | 1363 | if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) { |
1367 | mlog(0, "lock master is %u\n", *real_master); | 1364 | mlog(0, "lock master is %u\n", *real_master); |
@@ -1372,9 +1369,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, | |||
1372 | } | 1369 | } |
1373 | 1370 | ||
1374 | 1371 | ||
1375 | static int dlm_do_master_requery(struct dlm_ctxt *dlm, | 1372 | int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
1376 | struct dlm_lock_resource *res, | 1373 | u8 nodenum, u8 *real_master) |
1377 | u8 nodenum, u8 *real_master) | ||
1378 | { | 1374 | { |
1379 | int ret = -EINVAL; | 1375 | int ret = -EINVAL; |
1380 | struct dlm_master_requery req; | 1376 | struct dlm_master_requery req; |
@@ -1519,9 +1515,11 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1519 | /* lock is always created locally first, and | 1515 | /* lock is always created locally first, and |
1520 | * destroyed locally last. it must be on the list */ | 1516 | * destroyed locally last. it must be on the list */ |
1521 | if (!lock) { | 1517 | if (!lock) { |
1518 | u64 c = ml->cookie; | ||
1522 | mlog(ML_ERROR, "could not find local lock " | 1519 | mlog(ML_ERROR, "could not find local lock " |
1523 | "with cookie %"MLFu64"!\n", | 1520 | "with cookie %u:%llu!\n", |
1524 | ml->cookie); | 1521 | dlm_get_lock_cookie_node(c), |
1522 | dlm_get_lock_cookie_seq(c)); | ||
1525 | BUG(); | 1523 | BUG(); |
1526 | } | 1524 | } |
1527 | BUG_ON(lock->ml.node != ml->node); | 1525 | BUG_ON(lock->ml.node != ml->node); |
@@ -1739,6 +1737,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
1739 | } else | 1737 | } else |
1740 | continue; | 1738 | continue; |
1741 | 1739 | ||
1740 | if (!list_empty(&res->recovering)) { | ||
1741 | mlog(0, "%s:%.*s: lockres was " | ||
1742 | "marked RECOVERING, owner=%u\n", | ||
1743 | dlm->name, res->lockname.len, | ||
1744 | res->lockname.name, res->owner); | ||
1745 | list_del_init(&res->recovering); | ||
1746 | } | ||
1742 | spin_lock(&res->spinlock); | 1747 | spin_lock(&res->spinlock); |
1743 | dlm_change_lockres_owner(dlm, res, new_master); | 1748 | dlm_change_lockres_owner(dlm, res, new_master); |
1744 | res->state &= ~DLM_LOCK_RES_RECOVERING; | 1749 | res->state &= ~DLM_LOCK_RES_RECOVERING; |
@@ -2258,7 +2263,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) | |||
2258 | mlog(0, "%u not in domain/live_nodes map " | 2263 | mlog(0, "%u not in domain/live_nodes map " |
2259 | "so setting it in reco map manually\n", | 2264 | "so setting it in reco map manually\n", |
2260 | br->dead_node); | 2265 | br->dead_node); |
2261 | set_bit(br->dead_node, dlm->recovery_map); | 2266 | /* force the recovery cleanup in __dlm_hb_node_down |
2267 | * both of these will be cleared in a moment */ | ||
2268 | set_bit(br->dead_node, dlm->domain_map); | ||
2269 | set_bit(br->dead_node, dlm->live_nodes_map); | ||
2262 | __dlm_hb_node_down(dlm, br->dead_node); | 2270 | __dlm_hb_node_down(dlm, br->dead_node); |
2263 | } | 2271 | } |
2264 | spin_unlock(&dlm->spinlock); | 2272 | spin_unlock(&dlm->spinlock); |