aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm/dlmmaster.c
diff options
context:
space:
mode:
authorKurt Hackel <kurt.hackel@oracle.com>2006-03-06 17:08:49 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2006-03-24 17:58:25 -0500
commitc03872f5f50bc10f2a1a485f08879a8d01bcfe49 (patch)
tree9ac370cf1a7c015522af75af3f60e9d6c4425bbc /fs/ocfs2/dlm/dlmmaster.c
parent9c6510a5bfe2f1c5f5b93386c06954be02e974e4 (diff)
[PATCH] ocfs2: dlm recovery fixes
when starting lock mastery (excepting the recovery lock) wait on any nodes needing recovery. fix one instance where lock resources were left attached to the recovery list after recovery completed. ensure that the node_down code is run uniformly regardless of which node found the dead node first. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm/dlmmaster.c')
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c103
1 files changed, 103 insertions, 0 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 78ac3a00eb54..940be4c13b1f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
239static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, 239static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
240 struct dlm_lock_resource *res, 240 struct dlm_lock_resource *res,
241 u8 target); 241 u8 target);
242static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
243 struct dlm_lock_resource *res);
242 244
243 245
244int dlm_is_host_down(int errno) 246int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
677 struct dlm_node_iter iter; 679 struct dlm_node_iter iter;
678 unsigned int namelen; 680 unsigned int namelen;
679 int tries = 0; 681 int tries = 0;
682 int bit, wait_on_recovery = 0;
680 683
681 BUG_ON(!lockid); 684 BUG_ON(!lockid);
682 685
@@ -762,6 +765,18 @@ lookup:
762 dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); 765 dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
763 set_bit(dlm->node_num, mle->maybe_map); 766 set_bit(dlm->node_num, mle->maybe_map);
764 list_add(&mle->list, &dlm->master_list); 767 list_add(&mle->list, &dlm->master_list);
768
769 /* still holding the dlm spinlock, check the recovery map
770 * to see if there are any nodes that still need to be
771 * considered. these will not appear in the mle nodemap
772 * but they might own this lockres. wait on them. */
773 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
774 if (bit < O2NM_MAX_NODES) {
775 mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
776 "recover before lock mastery can begin\n",
777 dlm->name, namelen, (char *)lockid, bit);
778 wait_on_recovery = 1;
779 }
765 } 780 }
766 781
767 /* at this point there is either a DLM_MLE_BLOCK or a 782 /* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
779 spin_unlock(&dlm->master_lock); 794 spin_unlock(&dlm->master_lock);
780 spin_unlock(&dlm->spinlock); 795 spin_unlock(&dlm->spinlock);
781 796
797 while (wait_on_recovery) {
798 /* any cluster changes that occurred after dropping the
799 * dlm spinlock would be detectable be a change on the mle,
800 * so we only need to clear out the recovery map once. */
801 if (dlm_is_recovery_lock(lockid, namelen)) {
802 mlog(ML_NOTICE, "%s: recovery map is not empty, but "
803 "must master $RECOVERY lock now\n", dlm->name);
804 if (!dlm_pre_master_reco_lockres(dlm, res))
805 wait_on_recovery = 0;
806 else {
807 mlog(0, "%s: waiting 500ms for heartbeat state "
808 "change\n", dlm->name);
809 msleep(500);
810 }
811 continue;
812 }
813
814 dlm_kick_recovery_thread(dlm);
815 msleep(100);
816 dlm_wait_for_recovery(dlm);
817
818 spin_lock(&dlm->spinlock);
819 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
820 if (bit < O2NM_MAX_NODES) {
821 mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
822 "recover before lock mastery can begin\n",
823 dlm->name, namelen, (char *)lockid, bit);
824 wait_on_recovery = 1;
825 } else
826 wait_on_recovery = 0;
827 spin_unlock(&dlm->spinlock);
828 }
829
782 /* must wait for lock to be mastered elsewhere */ 830 /* must wait for lock to be mastered elsewhere */
783 if (blocked) 831 if (blocked)
784 goto wait; 832 goto wait;
@@ -1835,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
1835 mlog(0, "finished with dlm_assert_master_worker\n"); 1883 mlog(0, "finished with dlm_assert_master_worker\n");
1836} 1884}
1837 1885
1886/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
1887 * We cannot wait for node recovery to complete to begin mastering this
1888 * lockres because this lockres is used to kick off recovery! ;-)
1889 * So, do a pre-check on all living nodes to see if any of those nodes
1890 * think that $RECOVERY is currently mastered by a dead node. If so,
1891 * we wait a short time to allow that node to get notified by its own
1892 * heartbeat stack, then check again. All $RECOVERY lock resources
1893 * mastered by dead nodes are purged when the hearbeat callback is
1894 * fired, so we can know for sure that it is safe to continue once
1895 * the node returns a live node or no node. */
1896static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
1897 struct dlm_lock_resource *res)
1898{
1899 struct dlm_node_iter iter;
1900 int nodenum;
1901 int ret = 0;
1902 u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
1903
1904 spin_lock(&dlm->spinlock);
1905 dlm_node_iter_init(dlm->domain_map, &iter);
1906 spin_unlock(&dlm->spinlock);
1907
1908 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
1909 /* do not send to self */
1910 if (nodenum == dlm->node_num)
1911 continue;
1912 ret = dlm_do_master_requery(dlm, res, nodenum, &master);
1913 if (ret < 0) {
1914 mlog_errno(ret);
1915 if (!dlm_is_host_down(ret))
1916 BUG();
1917 /* host is down, so answer for that node would be
1918 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
1919 }
1920
1921 if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
1922 /* check to see if this master is in the recovery map */
1923 spin_lock(&dlm->spinlock);
1924 if (test_bit(master, dlm->recovery_map)) {
1925 mlog(ML_NOTICE, "%s: node %u has not seen "
1926 "node %u go down yet, and thinks the "
1927 "dead node is mastering the recovery "
1928 "lock. must wait.\n", dlm->name,
1929 nodenum, master);
1930 ret = -EAGAIN;
1931 }
1932 spin_unlock(&dlm->spinlock);
1933 mlog(0, "%s: reco lock master is %u\n", dlm->name,
1934 master);
1935 break;
1936 }
1937 }
1938 return ret;
1939}
1940
1838 1941
1839/* 1942/*
1840 * DLM_MIGRATE_LOCKRES 1943 * DLM_MIGRATE_LOCKRES