aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm/dlmmaster.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/dlm/dlmmaster.c')
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c103
1 files changed, 103 insertions, 0 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 78ac3a00eb54..940be4c13b1f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -239,6 +239,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
239static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, 239static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
240 struct dlm_lock_resource *res, 240 struct dlm_lock_resource *res,
241 u8 target); 241 u8 target);
242static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
243 struct dlm_lock_resource *res);
242 244
243 245
244int dlm_is_host_down(int errno) 246int dlm_is_host_down(int errno)
@@ -677,6 +679,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
677 struct dlm_node_iter iter; 679 struct dlm_node_iter iter;
678 unsigned int namelen; 680 unsigned int namelen;
679 int tries = 0; 681 int tries = 0;
682 int bit, wait_on_recovery = 0;
680 683
681 BUG_ON(!lockid); 684 BUG_ON(!lockid);
682 685
@@ -762,6 +765,18 @@ lookup:
762 dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); 765 dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
763 set_bit(dlm->node_num, mle->maybe_map); 766 set_bit(dlm->node_num, mle->maybe_map);
764 list_add(&mle->list, &dlm->master_list); 767 list_add(&mle->list, &dlm->master_list);
768
769 /* still holding the dlm spinlock, check the recovery map
770 * to see if there are any nodes that still need to be
771 * considered. these will not appear in the mle nodemap
772 * but they might own this lockres. wait on them. */
773 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
774 if (bit < O2NM_MAX_NODES) {
775 mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
776 "recover before lock mastery can begin\n",
777 dlm->name, namelen, (char *)lockid, bit);
778 wait_on_recovery = 1;
779 }
765 } 780 }
766 781
767 /* at this point there is either a DLM_MLE_BLOCK or a 782 /* at this point there is either a DLM_MLE_BLOCK or a
@@ -779,6 +794,39 @@ lookup:
779 spin_unlock(&dlm->master_lock); 794 spin_unlock(&dlm->master_lock);
780 spin_unlock(&dlm->spinlock); 795 spin_unlock(&dlm->spinlock);
781 796
797 while (wait_on_recovery) {
798 /* any cluster changes that occurred after dropping the
799 * dlm spinlock would be detectable be a change on the mle,
800 * so we only need to clear out the recovery map once. */
801 if (dlm_is_recovery_lock(lockid, namelen)) {
802 mlog(ML_NOTICE, "%s: recovery map is not empty, but "
803 "must master $RECOVERY lock now\n", dlm->name);
804 if (!dlm_pre_master_reco_lockres(dlm, res))
805 wait_on_recovery = 0;
806 else {
807 mlog(0, "%s: waiting 500ms for heartbeat state "
808 "change\n", dlm->name);
809 msleep(500);
810 }
811 continue;
812 }
813
814 dlm_kick_recovery_thread(dlm);
815 msleep(100);
816 dlm_wait_for_recovery(dlm);
817
818 spin_lock(&dlm->spinlock);
819 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
820 if (bit < O2NM_MAX_NODES) {
821 mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to"
822 "recover before lock mastery can begin\n",
823 dlm->name, namelen, (char *)lockid, bit);
824 wait_on_recovery = 1;
825 } else
826 wait_on_recovery = 0;
827 spin_unlock(&dlm->spinlock);
828 }
829
782 /* must wait for lock to be mastered elsewhere */ 830 /* must wait for lock to be mastered elsewhere */
783 if (blocked) 831 if (blocked)
784 goto wait; 832 goto wait;
@@ -1835,6 +1883,61 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
1835 mlog(0, "finished with dlm_assert_master_worker\n"); 1883 mlog(0, "finished with dlm_assert_master_worker\n");
1836} 1884}
1837 1885
1886/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
1887 * We cannot wait for node recovery to complete to begin mastering this
1888 * lockres because this lockres is used to kick off recovery! ;-)
1889 * So, do a pre-check on all living nodes to see if any of those nodes
1890 * think that $RECOVERY is currently mastered by a dead node. If so,
1891 * we wait a short time to allow that node to get notified by its own
1892 * heartbeat stack, then check again. All $RECOVERY lock resources
1893 * mastered by dead nodes are purged when the hearbeat callback is
1894 * fired, so we can know for sure that it is safe to continue once
1895 * the node returns a live node or no node. */
1896static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
1897 struct dlm_lock_resource *res)
1898{
1899 struct dlm_node_iter iter;
1900 int nodenum;
1901 int ret = 0;
1902 u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
1903
1904 spin_lock(&dlm->spinlock);
1905 dlm_node_iter_init(dlm->domain_map, &iter);
1906 spin_unlock(&dlm->spinlock);
1907
1908 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
1909 /* do not send to self */
1910 if (nodenum == dlm->node_num)
1911 continue;
1912 ret = dlm_do_master_requery(dlm, res, nodenum, &master);
1913 if (ret < 0) {
1914 mlog_errno(ret);
1915 if (!dlm_is_host_down(ret))
1916 BUG();
1917 /* host is down, so answer for that node would be
1918 * DLM_LOCK_RES_OWNER_UNKNOWN. continue. */
1919 }
1920
1921 if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
1922 /* check to see if this master is in the recovery map */
1923 spin_lock(&dlm->spinlock);
1924 if (test_bit(master, dlm->recovery_map)) {
1925 mlog(ML_NOTICE, "%s: node %u has not seen "
1926 "node %u go down yet, and thinks the "
1927 "dead node is mastering the recovery "
1928 "lock. must wait.\n", dlm->name,
1929 nodenum, master);
1930 ret = -EAGAIN;
1931 }
1932 spin_unlock(&dlm->spinlock);
1933 mlog(0, "%s: reco lock master is %u\n", dlm->name,
1934 master);
1935 break;
1936 }
1937 }
1938 return ret;
1939}
1940
1838 1941
1839/* 1942/*
1840 * DLM_MIGRATE_LOCKRES 1943 * DLM_MIGRATE_LOCKRES