4 files changed, 142 insertions, 19 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 9c772583744a..a8aec9341347 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -658,6 +658,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
 int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
@@ -762,6 +763,11 @@ int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
 int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+                          u8 nodenum, u8 *real_master);
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+                               struct dlm_lock_resource *res, u8 *real_master);
 int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
                               struct dlm_lock_resource *res,
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 671d4ff222cc..6fea28318d6d 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -141,13 +141,23 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
                                          res->lockname.len)) {
                        kick_thread = 1;
                        call_ast = 1;
+                } else {
+                        mlog(0, "%s: returning DLM_NORMAL to "
+                             "node %u for reco lock\n", dlm->name,
+                             lock->ml.node);
                }
        } else {
                /* for NOQUEUE request, unless we get the
                 * lock right away, return DLM_NOTQUEUED */
-                if (flags & LKM_NOQUEUE)
+                if (flags & LKM_NOQUEUE) {
                        status = DLM_NOTQUEUED;
-                else {
+                        if (dlm_is_recovery_lock(res->lockname.name,
+                                                 res->lockname.len)) {
+                                mlog(0, "%s: returning NOTQUEUED to "
+                                     "node %u for reco lock\n", dlm->name,
+                                     lock->ml.node);
+                        }
+                } else {
                        dlm_lock_get(lock);
                        list_add_tail(&lock->list, &res->blocked);
                        kick_thread = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 78ac3a00eb54..940be4c13b1f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
                                       struct dlm_lock_resource *res,
                                       u8 target);
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+                                       struct dlm_lock_resource *res);
 int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
        struct dlm_node_iter iter;
        unsigned int namelen;
        int tries = 0;
+        int bit, wait_on_recovery = 0;
        BUG_ON(!lockid);
@@ -762,6 +765,18 @@ lookup:
                dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
                set_bit(dlm->node_num, mle->maybe_map);
                list_add(&mle->list, &dlm->master_list);
+                /* still holding the dlm spinlock, check the recovery map
+                 * to see if there are any nodes that still need to be 
+                 * considered.  these will not appear in the mle nodemap
+                 * but they might own this lockres.  wait on them. */
+                bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+                if (bit < O2NM_MAX_NODES) {
+                        mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+                             "recover before lock mastery can begin\n",
+                             dlm->name, namelen, (char *)lockid, bit);
+                        wait_on_recovery = 1;
+                }
        }
        /* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
        spin_unlock(&dlm->master_lock);
        spin_unlock(&dlm->spinlock);
+        while (wait_on_recovery) {
+                /* any cluster changes that occurred after dropping the
+                 * dlm spinlock would be detectable be a change on the mle,
+                 * so we only need to clear out the recovery map once. */
+                if (dlm_is_recovery_lock(lockid, namelen)) {
+                        mlog(ML_NOTICE, "%s: recovery map is not empty, but "
+                             "must master $RECOVERY lock now\n", dlm->name);
+                        if (!dlm_pre_master_reco_lockres(dlm, res))
+                                wait_on_recovery = 0;
+                        else {
+                                mlog(0, "%s: waiting 500ms for heartbeat state "
+                                    "change\n", dlm->name);
+                                msleep(500);
+                        }
+                        continue;
+                } 
+                dlm_kick_recovery_thread(dlm);
+                msleep(100);
+                dlm_wait_for_recovery(dlm);
+                spin_lock(&dlm->spinlock);
+                bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+                if (bit < O2NM_MAX_NODES) {
+                        mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
+                             "recover before lock mastery can begin\n",
+                             dlm->name, namelen, (char *)lockid, bit);
+                        wait_on_recovery = 1;
+                } else
+                        wait_on_recovery = 0;
+                spin_unlock(&dlm->spinlock);
+        }
        /* must wait for lock to be mastered elsewhere */
        if (blocked)
                goto wait;
@@ -1835,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
        mlog(0, "finished with dlm_assert_master_worker\n");
 }
+/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
+ * We cannot wait for node recovery to complete to begin mastering this
+ * lockres because this lockres is used to kick off recovery! ;-)
+ * So, do a pre-check on all living nodes to see if any of those nodes
+ * think that $RECOVERY is currently mastered by a dead node.  If so,
+ * we wait a short time to allow that node to get notified by its own
+ * heartbeat stack, then check again.  All $RECOVERY lock resources
+ * mastered by dead nodes are purged when the hearbeat callback is 
+ * fired, so we can know for sure that it is safe to continue once
+ * the node returns a live node or no node.  */
+static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
+                                       struct dlm_lock_resource *res)
+{
+        struct dlm_node_iter iter;
+        int nodenum;
+        int ret = 0;
+        u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
+        spin_lock(&dlm->spinlock);
+        dlm_node_iter_init(dlm->domain_map, &iter);
+        spin_unlock(&dlm->spinlock);
+        while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
+                /* do not send to self */
+                if (nodenum == dlm->node_num)
+                        continue;
+                ret = dlm_do_master_requery(dlm, res, nodenum, &master);
+                if (ret < 0) {
+                        mlog_errno(ret);
+                        if (!dlm_is_host_down(ret))
+                                BUG();
+                        /* host is down, so answer for that node would be
+                         * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
+                }
+                if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
+                        /* check to see if this master is in the recovery map */
+                        spin_lock(&dlm->spinlock);
+                        if (test_bit(master, dlm->recovery_map)) {
+                                mlog(ML_NOTICE, "%s: node %u has not seen "
+                                     "node %u go down yet, and thinks the "
+                                     "dead node is mastering the recovery "
+                                     "lock.  must wait.\n", dlm->name,
+                                     nodenum, master);
+                                ret = -EAGAIN;
+                        }
+                        spin_unlock(&dlm->spinlock);
+                        mlog(0, "%s: reco lock master is %u\n", dlm->name, 
+                             master);
+                        break;
+                }
+        }
+        return ret;
+}
 /*
 * DLM_MIGRATE_LOCKRES
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 1e232000f3f7..36610bdf1231 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
 static int dlm_recovery_thread(void *data);
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
 static int dlm_do_recovery(struct dlm_ctxt *dlm);
 static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
                                    u8 send_to,
                                    struct dlm_lock_resource *res,
                                    int total_locks);
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-                                      struct dlm_lock_resource *res,
-                                      u8 *real_master);
 static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
                                     struct dlm_lock_resource *res,
                                     struct dlm_migratable_lockres *mres);
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
-                                 struct dlm_lock_resource *res,
-                                 u8 nodenum, u8 *real_master);
 static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
 static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
                                 u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
 * RECOVERY THREAD
 */
-static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
+void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
 {
        /* wake the recovery thread
         * this will wake the reco thread in one of three places
@@ -1316,9 +1310,8 @@ leave:
-static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
+int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
-                                      struct dlm_lock_resource *res,
+                               struct dlm_lock_resource *res, u8 *real_master)
-                                      u8 *real_master)
 {
        struct dlm_node_iter iter;
        int nodenum;
@@ -1360,8 +1353,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
                ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
                if (ret < 0) {
                        mlog_errno(ret);
-                        BUG();
+                        if (!dlm_is_host_down(ret))
-                        /* TODO: need to figure a way to restart this */
+                                BUG();
+                        /* host is down, so answer for that node would be
+                         * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
                }
                if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
                        mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1367,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
 }
-static int dlm_do_master_requery(struct dlm_ctxt *dlm,
+int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
-                                 struct dlm_lock_resource *res,
+                          u8 nodenum, u8 *real_master)
-                                 u8 nodenum, u8 *real_master)
 {
        int ret = -EINVAL;
        struct dlm_master_requery req;
@@ -1739,6 +1733,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
                                } else
                                        continue;
+                                if (!list_empty(&res->recovering)) {
+                                        mlog(0, "%s:%.*s: lockres was "
+                                             "marked RECOVERING, owner=%u\n",
+                                             dlm->name, res->lockname.len,
+                                             res->lockname.name, res->owner);
+                                        list_del_init(&res->recovering);
+                                }
                                spin_lock(&res->spinlock);
                                dlm_change_lockres_owner(dlm, res, new_master);
                                res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2259,10 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
                        mlog(0, "%u not in domain/live_nodes map "
                             "so setting it in reco map manually\n",
                             br->dead_node);
-                set_bit(br->dead_node, dlm->recovery_map);
+                /* force the recovery cleanup in __dlm_hb_node_down
+                 * both of these will be cleared in a moment */
+                set_bit(br->dead_node, dlm->domain_map);
+                set_bit(br->dead_node, dlm->live_nodes_map);
                __dlm_hb_node_down(dlm, br->dead_node);
        }
        spin_unlock(&dlm->spinlock);

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 9c772583744a..a8aec9341347 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -658,6 +658,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
658	int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);	658	int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
659	void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);	659	void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
660	void dlm_wait_for_recovery(struct dlm_ctxt *dlm);	660	void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
		661	void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
661	int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);	662	int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
662	int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);	663	int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
663		664
@@ -762,6 +763,11 @@ int dlm_request_all_locks_handler(struct o2net_msg msg, u32 len, void data);
762	int dlm_reco_data_done_handler(struct o2net_msg msg, u32 len, void data);	763	int dlm_reco_data_done_handler(struct o2net_msg msg, u32 len, void data);
763	int dlm_begin_reco_handler(struct o2net_msg msg, u32 len, void data);	764	int dlm_begin_reco_handler(struct o2net_msg msg, u32 len, void data);
764	int dlm_finalize_reco_handler(struct o2net_msg msg, u32 len, void data);	765	int dlm_finalize_reco_handler(struct o2net_msg msg, u32 len, void data);
		766	int dlm_do_master_requery(struct dlm_ctxt dlm, struct dlm_lock_resource res,
		767	u8 nodenum, u8 *real_master);
		768	int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
		769	struct dlm_lock_resource res, u8 real_master);
		770
765		771
766	int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,	772	int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
767	struct dlm_lock_resource *res,	773	struct dlm_lock_resource *res,


diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 671d4ff222cc..6fea28318d6d 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c
@@ -141,13 +141,23 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
141	res->lockname.len)) {	141	res->lockname.len)) {
142	kick_thread = 1;	142	kick_thread = 1;
143	call_ast = 1;	143	call_ast = 1;
		144	} else {
		145	mlog(0, "%s: returning DLM_NORMAL to "
		146	"node %u for reco lock\n", dlm->name,
		147	lock->ml.node);
144	}	148	}
145	} else {	149	} else {
146	/* for NOQUEUE request, unless we get the	150	/* for NOQUEUE request, unless we get the
147	* lock right away, return DLM_NOTQUEUED */	151	* lock right away, return DLM_NOTQUEUED */
148	if (flags & LKM_NOQUEUE)	152	if (flags & LKM_NOQUEUE) {
149	status = DLM_NOTQUEUED;	153	status = DLM_NOTQUEUED;
150	else {	154	if (dlm_is_recovery_lock(res->lockname.name,
		155	res->lockname.len)) {
		156	mlog(0, "%s: returning NOTQUEUED to "
		157	"node %u for reco lock\n", dlm->name,
		158	lock->ml.node);
		159	}
		160	} else {
151	dlm_lock_get(lock);	161	dlm_lock_get(lock);
152	list_add_tail(&lock->list, &res->blocked);	162	list_add_tail(&lock->list, &res->blocked);
153	kick_thread = 1;	163	kick_thread = 1;


diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 78ac3a00eb54..940be4c13b1f 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
239	static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,	239	static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
240	struct dlm_lock_resource *res,	240	struct dlm_lock_resource *res,
241	u8 target);	241	u8 target);
		242	static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
		243	struct dlm_lock_resource *res);
242		244
243		245
244	int dlm_is_host_down(int errno)	246	int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
677	struct dlm_node_iter iter;	679	struct dlm_node_iter iter;
678	unsigned int namelen;	680	unsigned int namelen;
679	int tries = 0;	681	int tries = 0;
		682	int bit, wait_on_recovery = 0;
680		683
681	BUG_ON(!lockid);	684	BUG_ON(!lockid);
682		685
@@ -762,6 +765,18 @@ lookup:
762	dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);	765	dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
763	set_bit(dlm->node_num, mle->maybe_map);	766	set_bit(dlm->node_num, mle->maybe_map);
764	list_add(&mle->list, &dlm->master_list);	767	list_add(&mle->list, &dlm->master_list);
		768
		769	/* still holding the dlm spinlock, check the recovery map
		770	* to see if there are any nodes that still need to be
		771	* considered. these will not appear in the mle nodemap
		772	* but they might own this lockres. wait on them. */
		773	bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
		774	if (bit < O2NM_MAX_NODES) {
		775	mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
		776	"recover before lock mastery can begin\n",
		777	dlm->name, namelen, (char *)lockid, bit);
		778	wait_on_recovery = 1;
		779	}
765	}	780	}
766		781
767	/* at this point there is either a DLM_MLE_BLOCK or a	782	/* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
779	spin_unlock(&dlm->master_lock);	794	spin_unlock(&dlm->master_lock);
780	spin_unlock(&dlm->spinlock);	795	spin_unlock(&dlm->spinlock);
781		796
		797	while (wait_on_recovery) {
		798	/* any cluster changes that occurred after dropping the
		799	* dlm spinlock would be detectable be a change on the mle,
		800	* so we only need to clear out the recovery map once. */
		801	if (dlm_is_recovery_lock(lockid, namelen)) {
		802	mlog(ML_NOTICE, "%s: recovery map is not empty, but "
		803	"must master $RECOVERY lock now\n", dlm->name);
		804	if (!dlm_pre_master_reco_lockres(dlm, res))
		805	wait_on_recovery = 0;
		806	else {
		807	mlog(0, "%s: waiting 500ms for heartbeat state "
		808	"change\n", dlm->name);
		809	msleep(500);
		810	}
		811	continue;
		812	}
		813
		814	dlm_kick_recovery_thread(dlm);
		815	msleep(100);
		816	dlm_wait_for_recovery(dlm);
		817
		818	spin_lock(&dlm->spinlock);
		819	bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
		820	if (bit < O2NM_MAX_NODES) {
		821	mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
		822	"recover before lock mastery can begin\n",
		823	dlm->name, namelen, (char *)lockid, bit);
		824	wait_on_recovery = 1;
		825	} else
		826	wait_on_recovery = 0;
		827	spin_unlock(&dlm->spinlock);
		828	}
		829
782	/* must wait for lock to be mastered elsewhere */	830	/* must wait for lock to be mastered elsewhere */
783	if (blocked)	831	if (blocked)
784	goto wait;	832	goto wait;
@@ -1835,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item item, void data)
1835	mlog(0, "finished with dlm_assert_master_worker\n");	1883	mlog(0, "finished with dlm_assert_master_worker\n");
1836	}	1884	}
1837		1885
		1886	/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
		1887	* We cannot wait for node recovery to complete to begin mastering this
		1888	* lockres because this lockres is used to kick off recovery! ;-)
		1889	* So, do a pre-check on all living nodes to see if any of those nodes
		1890	* think that $RECOVERY is currently mastered by a dead node. If so,
		1891	* we wait a short time to allow that node to get notified by its own
		1892	* heartbeat stack, then check again. All $RECOVERY lock resources
		1893	* mastered by dead nodes are purged when the hearbeat callback is
		1894	* fired, so we can know for sure that it is safe to continue once
		1895	* the node returns a live node or no node. */
		1896	static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
		1897	struct dlm_lock_resource *res)
		1898	{
		1899	struct dlm_node_iter iter;
		1900	int nodenum;
		1901	int ret = 0;
		1902	u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
		1903
		1904	spin_lock(&dlm->spinlock);
		1905	dlm_node_iter_init(dlm->domain_map, &iter);
		1906	spin_unlock(&dlm->spinlock);
		1907
		1908	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
		1909	/* do not send to self */
		1910	if (nodenum == dlm->node_num)
		1911	continue;
		1912	ret = dlm_do_master_requery(dlm, res, nodenum, &master);
		1913	if (ret < 0) {
		1914	mlog_errno(ret);
		1915	if (!dlm_is_host_down(ret))
		1916	BUG();
		1917	/* host is down, so answer for that node would be
		1918	* DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
		1919	}
		1920
		1921	if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
		1922	/* check to see if this master is in the recovery map */
		1923	spin_lock(&dlm->spinlock);
		1924	if (test_bit(master, dlm->recovery_map)) {
		1925	mlog(ML_NOTICE, "%s: node %u has not seen "
		1926	"node %u go down yet, and thinks the "
		1927	"dead node is mastering the recovery "
		1928	"lock. must wait.\n", dlm->name,
		1929	nodenum, master);
		1930	ret = -EAGAIN;
		1931	}
		1932	spin_unlock(&dlm->spinlock);
		1933	mlog(0, "%s: reco lock master is %u\n", dlm->name,
		1934	master);
		1935	break;
		1936	}
		1937	}
		1938	return ret;
		1939	}
		1940
1838		1941
1839	/*	1942	/*
1840	* DLM_MIGRATE_LOCKRES	1943	* DLM_MIGRATE_LOCKRES


diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 1e232000f3f7..36610bdf1231 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -58,7 +58,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
58	static int dlm_recovery_thread(void *data);	58	static int dlm_recovery_thread(void *data);
59	void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);	59	void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
60	int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);	60	int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
61	static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);	61	void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
62	static int dlm_do_recovery(struct dlm_ctxt *dlm);	62	static int dlm_do_recovery(struct dlm_ctxt *dlm);
63		63
64	static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);	64	static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -78,15 +78,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
78	u8 send_to,	78	u8 send_to,
79	struct dlm_lock_resource *res,	79	struct dlm_lock_resource *res,
80	int total_locks);	80	int total_locks);
81	static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
82	struct dlm_lock_resource *res,
83	u8 *real_master);
84	static int dlm_process_recovery_data(struct dlm_ctxt *dlm,	81	static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
85	struct dlm_lock_resource *res,	82	struct dlm_lock_resource *res,
86	struct dlm_migratable_lockres *mres);	83	struct dlm_migratable_lockres *mres);
87	static int dlm_do_master_requery(struct dlm_ctxt *dlm,
88	struct dlm_lock_resource *res,
89	u8 nodenum, u8 *real_master);
90	static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);	84	static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm);
91	static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,	85	static int dlm_send_all_done_msg(struct dlm_ctxt *dlm,
92	u8 dead_node, u8 send_to);	86	u8 dead_node, u8 send_to);
@@ -165,7 +159,7 @@ void dlm_dispatch_work(void *data)
165	* RECOVERY THREAD	159	* RECOVERY THREAD
166	*/	160	*/
167		161
168	static void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)	162	void dlm_kick_recovery_thread(struct dlm_ctxt *dlm)
169	{	163	{
170	/* wake the recovery thread	164	/* wake the recovery thread
171	* this will wake the reco thread in one of three places	165	* this will wake the reco thread in one of three places
@@ -1316,9 +1310,8 @@ leave:
1316		1310
1317		1311
1318		1312
1319	static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,	1313	int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1320	struct dlm_lock_resource *res,	1314	struct dlm_lock_resource res, u8 real_master)
1321	u8 *real_master)
1322	{	1315	{
1323	struct dlm_node_iter iter;	1316	struct dlm_node_iter iter;
1324	int nodenum;	1317	int nodenum;
@@ -1360,8 +1353,10 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1360	ret = dlm_do_master_requery(dlm, res, nodenum, real_master);	1353	ret = dlm_do_master_requery(dlm, res, nodenum, real_master);
1361	if (ret < 0) {	1354	if (ret < 0) {
1362	mlog_errno(ret);	1355	mlog_errno(ret);
1363	BUG();	1356	if (!dlm_is_host_down(ret))
1364	/* TODO: need to figure a way to restart this */	1357	BUG();
		1358	/* host is down, so answer for that node would be
		1359	* DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
1365	}	1360	}
1366	if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {	1361	if (*real_master != DLM_LOCK_RES_OWNER_UNKNOWN) {
1367	mlog(0, "lock master is %u\n", *real_master);	1362	mlog(0, "lock master is %u\n", *real_master);
@@ -1372,9 +1367,8 @@ static int dlm_lockres_master_requery(struct dlm_ctxt *dlm,
1372	}	1367	}
1373		1368
1374		1369
1375	static int dlm_do_master_requery(struct dlm_ctxt *dlm,	1370	int dlm_do_master_requery(struct dlm_ctxt dlm, struct dlm_lock_resource res,
1376	struct dlm_lock_resource *res,	1371	u8 nodenum, u8 *real_master)
1377	u8 nodenum, u8 *real_master)
1378	{	1372	{
1379	int ret = -EINVAL;	1373	int ret = -EINVAL;
1380	struct dlm_master_requery req;	1374	struct dlm_master_requery req;
@@ -1739,6 +1733,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1739	} else	1733	} else
1740	continue;	1734	continue;
1741		1735
		1736	if (!list_empty(&res->recovering)) {
		1737	mlog(0, "%s:%.*s: lockres was "
		1738	"marked RECOVERING, owner=%u\n",
		1739	dlm->name, res->lockname.len,
		1740	res->lockname.name, res->owner);
		1741	list_del_init(&res->recovering);
		1742	}
1742	spin_lock(&res->spinlock);	1743	spin_lock(&res->spinlock);
1743	dlm_change_lockres_owner(dlm, res, new_master);	1744	dlm_change_lockres_owner(dlm, res, new_master);
1744	res->state &= ~DLM_LOCK_RES_RECOVERING;	1745	res->state &= ~DLM_LOCK_RES_RECOVERING;
@@ -2258,7 +2259,10 @@ int dlm_begin_reco_handler(struct o2net_msg msg, u32 len, void data)
2258	mlog(0, "%u not in domain/live_nodes map "	2259	mlog(0, "%u not in domain/live_nodes map "
2259	"so setting it in reco map manually\n",	2260	"so setting it in reco map manually\n",
2260	br->dead_node);	2261	br->dead_node);
2261	set_bit(br->dead_node, dlm->recovery_map);	2262	/* force the recovery cleanup in __dlm_hb_node_down
		2263	* both of these will be cleared in a moment */
		2264	set_bit(br->dead_node, dlm->domain_map);
		2265	set_bit(br->dead_node, dlm->live_nodes_map);
2262	__dlm_hb_node_down(dlm, br->dead_node);	2266	__dlm_hb_node_down(dlm, br->dead_node);
2263	}	2267	}
2264	spin_unlock(&dlm->spinlock);	2268	spin_unlock(&dlm->spinlock);