aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm/dlmrecovery.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-08 13:37:22 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-08 13:37:22 -0500
commit5986a2ec35836a878350c54af4bd91b1de6abc59 (patch)
tree2efe068e124071ca30a5f1886402b890d7ba429e /fs/ocfs2/dlm/dlmrecovery.c
parent43187902cbfafe73ede0144166b741fb0f7d04e1 (diff)
parentff05d1c4643dd4260eb699396043d7e8009c0de4 (diff)
Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/mfasheh/ocfs2: (22 commits) configfs: Zero terminate data in configfs attribute writes. [PATCH] ocfs2 heartbeat: clean up bio submission code ocfs2: introduce sc->sc_send_lock to protect outbound outbound messages [PATCH] ocfs2: drop INET from Kconfig, not needed ocfs2_dlm: Add timeout to dlm join domain ocfs2_dlm: Silence some messages during join domain ocfs2_dlm: disallow a domain join if node maps mismatch ocfs2_dlm: Ensure correct ordering of set/clear refmap bit on lockres ocfs2: Binds listener to the configured ip address ocfs2_dlm: Calling post handler function in assert master handler ocfs2: Added post handler callable function in o2net message handler ocfs2_dlm: Cookies in locks not being printed correctly in error messages ocfs2_dlm: Silence a failed convert ocfs2_dlm: wake up sleepers on the lockres waitqueue ocfs2_dlm: Dlm dispatch was stopping too early ocfs2_dlm: Drop inflight refmap even if no locks found on the lockres ocfs2_dlm: Flush dlm workqueue before starting to migrate ocfs2_dlm: Fix migrate lockres handler queue scanning ocfs2_dlm: Make dlmunlock() wait for migration to complete ocfs2_dlm: Fixes race between migrate and dirty ...
Diffstat (limited to 'fs/ocfs2/dlm/dlmrecovery.c')
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c182
1 files changed, 154 insertions, 28 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 367a11e9e2ed..6d4a83d50152 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -163,9 +163,6 @@ void dlm_dispatch_work(struct work_struct *work)
163 dlm_workfunc_t *workfunc; 163 dlm_workfunc_t *workfunc;
164 int tot=0; 164 int tot=0;
165 165
166 if (!dlm_joined(dlm))
167 return;
168
169 spin_lock(&dlm->work_lock); 166 spin_lock(&dlm->work_lock);
170 list_splice_init(&dlm->work_list, &tmp_list); 167 list_splice_init(&dlm->work_list, &tmp_list);
171 spin_unlock(&dlm->work_lock); 168 spin_unlock(&dlm->work_lock);
@@ -821,7 +818,8 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
821 818
822} 819}
823 820
824int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data) 821int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
822 void **ret_data)
825{ 823{
826 struct dlm_ctxt *dlm = data; 824 struct dlm_ctxt *dlm = data;
827 struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; 825 struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf;
@@ -978,7 +976,8 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
978} 976}
979 977
980 978
981int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data) 979int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
980 void **ret_data)
982{ 981{
983 struct dlm_ctxt *dlm = data; 982 struct dlm_ctxt *dlm = data;
984 struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; 983 struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
@@ -1129,6 +1128,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1129 if (total_locks == mres_total_locks) 1128 if (total_locks == mres_total_locks)
1130 mres->flags |= DLM_MRES_ALL_DONE; 1129 mres->flags |= DLM_MRES_ALL_DONE;
1131 1130
1131 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1132 dlm->name, res->lockname.len, res->lockname.name,
1133 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
1134 send_to);
1135
1132 /* send it */ 1136 /* send it */
1133 ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, 1137 ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
1134 sz, send_to, &status); 1138 sz, send_to, &status);
@@ -1213,6 +1217,34 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
1213 return 0; 1217 return 0;
1214} 1218}
1215 1219
1220static void dlm_add_dummy_lock(struct dlm_ctxt *dlm,
1221 struct dlm_migratable_lockres *mres)
1222{
1223 struct dlm_lock dummy;
1224 memset(&dummy, 0, sizeof(dummy));
1225 dummy.ml.cookie = 0;
1226 dummy.ml.type = LKM_IVMODE;
1227 dummy.ml.convert_type = LKM_IVMODE;
1228 dummy.ml.highest_blocked = LKM_IVMODE;
1229 dummy.lksb = NULL;
1230 dummy.ml.node = dlm->node_num;
1231 dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST);
1232}
1233
1234static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm,
1235 struct dlm_migratable_lock *ml,
1236 u8 *nodenum)
1237{
1238 if (unlikely(ml->cookie == 0 &&
1239 ml->type == LKM_IVMODE &&
1240 ml->convert_type == LKM_IVMODE &&
1241 ml->highest_blocked == LKM_IVMODE &&
1242 ml->list == DLM_BLOCKED_LIST)) {
1243 *nodenum = ml->node;
1244 return 1;
1245 }
1246 return 0;
1247}
1216 1248
1217int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, 1249int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1218 struct dlm_migratable_lockres *mres, 1250 struct dlm_migratable_lockres *mres,
@@ -1260,6 +1292,14 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1260 goto error; 1292 goto error;
1261 } 1293 }
1262 } 1294 }
1295 if (total_locks == 0) {
1296 /* send a dummy lock to indicate a mastery reference only */
1297 mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n",
1298 dlm->name, res->lockname.len, res->lockname.name,
1299 send_to, flags & DLM_MRES_RECOVERY ? "recovery" :
1300 "migration");
1301 dlm_add_dummy_lock(dlm, mres);
1302 }
1263 /* flush any remaining locks */ 1303 /* flush any remaining locks */
1264 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); 1304 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
1265 if (ret < 0) 1305 if (ret < 0)
@@ -1293,7 +1333,8 @@ error:
1293 * do we spin? returning an error only delays the problem really 1333 * do we spin? returning an error only delays the problem really
1294 */ 1334 */
1295 1335
1296int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) 1336int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1337 void **ret_data)
1297{ 1338{
1298 struct dlm_ctxt *dlm = data; 1339 struct dlm_ctxt *dlm = data;
1299 struct dlm_migratable_lockres *mres = 1340 struct dlm_migratable_lockres *mres =
@@ -1382,17 +1423,21 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1382 spin_lock(&res->spinlock); 1423 spin_lock(&res->spinlock);
1383 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 1424 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
1384 spin_unlock(&res->spinlock); 1425 spin_unlock(&res->spinlock);
1426 wake_up(&res->wq);
1385 1427
1386 /* add an extra ref for just-allocated lockres 1428 /* add an extra ref for just-allocated lockres
1387 * otherwise the lockres will be purged immediately */ 1429 * otherwise the lockres will be purged immediately */
1388 dlm_lockres_get(res); 1430 dlm_lockres_get(res);
1389
1390 } 1431 }
1391 1432
1392 /* at this point we have allocated everything we need, 1433 /* at this point we have allocated everything we need,
1393 * and we have a hashed lockres with an extra ref and 1434 * and we have a hashed lockres with an extra ref and
1394 * the proper res->state flags. */ 1435 * the proper res->state flags. */
1395 ret = 0; 1436 ret = 0;
1437 spin_lock(&res->spinlock);
1438 /* drop this either when master requery finds a different master
1439 * or when a lock is added by the recovery worker */
1440 dlm_lockres_grab_inflight_ref(dlm, res);
1396 if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { 1441 if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) {
1397 /* migration cannot have an unknown master */ 1442 /* migration cannot have an unknown master */
1398 BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); 1443 BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
@@ -1400,10 +1445,11 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
1400 "unknown owner.. will need to requery: " 1445 "unknown owner.. will need to requery: "
1401 "%.*s\n", mres->lockname_len, mres->lockname); 1446 "%.*s\n", mres->lockname_len, mres->lockname);
1402 } else { 1447 } else {
1403 spin_lock(&res->spinlock); 1448 /* take a reference now to pin the lockres, drop it
1449 * when locks are added in the worker */
1404 dlm_change_lockres_owner(dlm, res, dlm->node_num); 1450 dlm_change_lockres_owner(dlm, res, dlm->node_num);
1405 spin_unlock(&res->spinlock);
1406 } 1451 }
1452 spin_unlock(&res->spinlock);
1407 1453
1408 /* queue up work for dlm_mig_lockres_worker */ 1454 /* queue up work for dlm_mig_lockres_worker */
1409 dlm_grab(dlm); /* get an extra ref for the work item */ 1455 dlm_grab(dlm); /* get an extra ref for the work item */
@@ -1459,6 +1505,9 @@ again:
1459 "this node will take it.\n", 1505 "this node will take it.\n",
1460 res->lockname.len, res->lockname.name); 1506 res->lockname.len, res->lockname.name);
1461 } else { 1507 } else {
1508 spin_lock(&res->spinlock);
1509 dlm_lockres_drop_inflight_ref(dlm, res);
1510 spin_unlock(&res->spinlock);
1462 mlog(0, "master needs to respond to sender " 1511 mlog(0, "master needs to respond to sender "
1463 "that node %u still owns %.*s\n", 1512 "that node %u still owns %.*s\n",
1464 real_master, res->lockname.len, 1513 real_master, res->lockname.len,
@@ -1578,7 +1627,8 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1578/* this function cannot error, so unless the sending 1627/* this function cannot error, so unless the sending
1579 * or receiving of the message failed, the owner can 1628 * or receiving of the message failed, the owner can
1580 * be trusted */ 1629 * be trusted */
1581int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data) 1630int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1631 void **ret_data)
1582{ 1632{
1583 struct dlm_ctxt *dlm = data; 1633 struct dlm_ctxt *dlm = data;
1584 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; 1634 struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
@@ -1660,21 +1710,38 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1660{ 1710{
1661 struct dlm_migratable_lock *ml; 1711 struct dlm_migratable_lock *ml;
1662 struct list_head *queue; 1712 struct list_head *queue;
1713 struct list_head *tmpq = NULL;
1663 struct dlm_lock *newlock = NULL; 1714 struct dlm_lock *newlock = NULL;
1664 struct dlm_lockstatus *lksb = NULL; 1715 struct dlm_lockstatus *lksb = NULL;
1665 int ret = 0; 1716 int ret = 0;
1666 int i, bad; 1717 int i, j, bad;
1667 struct list_head *iter; 1718 struct list_head *iter;
1668 struct dlm_lock *lock = NULL; 1719 struct dlm_lock *lock = NULL;
1720 u8 from = O2NM_MAX_NODES;
1721 unsigned int added = 0;
1669 1722
1670 mlog(0, "running %d locks for this lockres\n", mres->num_locks); 1723 mlog(0, "running %d locks for this lockres\n", mres->num_locks);
1671 for (i=0; i<mres->num_locks; i++) { 1724 for (i=0; i<mres->num_locks; i++) {
1672 ml = &(mres->ml[i]); 1725 ml = &(mres->ml[i]);
1726
1727 if (dlm_is_dummy_lock(dlm, ml, &from)) {
1728 /* placeholder, just need to set the refmap bit */
1729 BUG_ON(mres->num_locks != 1);
1730 mlog(0, "%s:%.*s: dummy lock for %u\n",
1731 dlm->name, mres->lockname_len, mres->lockname,
1732 from);
1733 spin_lock(&res->spinlock);
1734 dlm_lockres_set_refmap_bit(from, res);
1735 spin_unlock(&res->spinlock);
1736 added++;
1737 break;
1738 }
1673 BUG_ON(ml->highest_blocked != LKM_IVMODE); 1739 BUG_ON(ml->highest_blocked != LKM_IVMODE);
1674 newlock = NULL; 1740 newlock = NULL;
1675 lksb = NULL; 1741 lksb = NULL;
1676 1742
1677 queue = dlm_list_num_to_pointer(res, ml->list); 1743 queue = dlm_list_num_to_pointer(res, ml->list);
1744 tmpq = NULL;
1678 1745
1679 /* if the lock is for the local node it needs to 1746 /* if the lock is for the local node it needs to
1680 * be moved to the proper location within the queue. 1747 * be moved to the proper location within the queue.
@@ -1684,11 +1751,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1684 BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); 1751 BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
1685 1752
1686 spin_lock(&res->spinlock); 1753 spin_lock(&res->spinlock);
1687 list_for_each(iter, queue) { 1754 for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
1688 lock = list_entry (iter, struct dlm_lock, list); 1755 tmpq = dlm_list_idx_to_ptr(res, j);
1689 if (lock->ml.cookie != ml->cookie) 1756 list_for_each(iter, tmpq) {
1690 lock = NULL; 1757 lock = list_entry (iter, struct dlm_lock, list);
1691 else 1758 if (lock->ml.cookie != ml->cookie)
1759 lock = NULL;
1760 else
1761 break;
1762 }
1763 if (lock)
1692 break; 1764 break;
1693 } 1765 }
1694 1766
@@ -1698,12 +1770,20 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1698 u64 c = ml->cookie; 1770 u64 c = ml->cookie;
1699 mlog(ML_ERROR, "could not find local lock " 1771 mlog(ML_ERROR, "could not find local lock "
1700 "with cookie %u:%llu!\n", 1772 "with cookie %u:%llu!\n",
1701 dlm_get_lock_cookie_node(c), 1773 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1702 dlm_get_lock_cookie_seq(c)); 1774 dlm_get_lock_cookie_seq(be64_to_cpu(c)));
1775 __dlm_print_one_lock_resource(res);
1703 BUG(); 1776 BUG();
1704 } 1777 }
1705 BUG_ON(lock->ml.node != ml->node); 1778 BUG_ON(lock->ml.node != ml->node);
1706 1779
1780 if (tmpq != queue) {
1781 mlog(0, "lock was on %u instead of %u for %.*s\n",
1782 j, ml->list, res->lockname.len, res->lockname.name);
1783 spin_unlock(&res->spinlock);
1784 continue;
1785 }
1786
1707 /* see NOTE above about why we do not update 1787 /* see NOTE above about why we do not update
1708 * to match the master here */ 1788 * to match the master here */
1709 1789
@@ -1711,6 +1791,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1711 /* do not alter lock refcount. switching lists. */ 1791 /* do not alter lock refcount. switching lists. */
1712 list_move_tail(&lock->list, queue); 1792 list_move_tail(&lock->list, queue);
1713 spin_unlock(&res->spinlock); 1793 spin_unlock(&res->spinlock);
1794 added++;
1714 1795
1715 mlog(0, "just reordered a local lock!\n"); 1796 mlog(0, "just reordered a local lock!\n");
1716 continue; 1797 continue;
@@ -1799,14 +1880,14 @@ skip_lvb:
1799 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " 1880 mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
1800 "exists on this lockres!\n", dlm->name, 1881 "exists on this lockres!\n", dlm->name,
1801 res->lockname.len, res->lockname.name, 1882 res->lockname.len, res->lockname.name,
1802 dlm_get_lock_cookie_node(c), 1883 dlm_get_lock_cookie_node(be64_to_cpu(c)),
1803 dlm_get_lock_cookie_seq(c)); 1884 dlm_get_lock_cookie_seq(be64_to_cpu(c)));
1804 1885
1805 mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " 1886 mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
1806 "node=%u, cookie=%u:%llu, queue=%d\n", 1887 "node=%u, cookie=%u:%llu, queue=%d\n",
1807 ml->type, ml->convert_type, ml->node, 1888 ml->type, ml->convert_type, ml->node,
1808 dlm_get_lock_cookie_node(ml->cookie), 1889 dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)),
1809 dlm_get_lock_cookie_seq(ml->cookie), 1890 dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)),
1810 ml->list); 1891 ml->list);
1811 1892
1812 __dlm_print_one_lock_resource(res); 1893 __dlm_print_one_lock_resource(res);
@@ -1817,12 +1898,22 @@ skip_lvb:
1817 if (!bad) { 1898 if (!bad) {
1818 dlm_lock_get(newlock); 1899 dlm_lock_get(newlock);
1819 list_add_tail(&newlock->list, queue); 1900 list_add_tail(&newlock->list, queue);
1901 mlog(0, "%s:%.*s: added lock for node %u, "
1902 "setting refmap bit\n", dlm->name,
1903 res->lockname.len, res->lockname.name, ml->node);
1904 dlm_lockres_set_refmap_bit(ml->node, res);
1905 added++;
1820 } 1906 }
1821 spin_unlock(&res->spinlock); 1907 spin_unlock(&res->spinlock);
1822 } 1908 }
1823 mlog(0, "done running all the locks\n"); 1909 mlog(0, "done running all the locks\n");
1824 1910
1825leave: 1911leave:
1912 /* balance the ref taken when the work was queued */
1913 spin_lock(&res->spinlock);
1914 dlm_lockres_drop_inflight_ref(dlm, res);
1915 spin_unlock(&res->spinlock);
1916
1826 if (ret < 0) { 1917 if (ret < 0) {
1827 mlog_errno(ret); 1918 mlog_errno(ret);
1828 if (newlock) 1919 if (newlock)
@@ -1935,9 +2026,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1935 if (res->owner == dead_node) { 2026 if (res->owner == dead_node) {
1936 list_del_init(&res->recovering); 2027 list_del_init(&res->recovering);
1937 spin_lock(&res->spinlock); 2028 spin_lock(&res->spinlock);
2029 /* new_master has our reference from
2030 * the lock state sent during recovery */
1938 dlm_change_lockres_owner(dlm, res, new_master); 2031 dlm_change_lockres_owner(dlm, res, new_master);
1939 res->state &= ~DLM_LOCK_RES_RECOVERING; 2032 res->state &= ~DLM_LOCK_RES_RECOVERING;
1940 if (!__dlm_lockres_unused(res)) 2033 if (__dlm_lockres_has_locks(res))
1941 __dlm_dirty_lockres(dlm, res); 2034 __dlm_dirty_lockres(dlm, res);
1942 spin_unlock(&res->spinlock); 2035 spin_unlock(&res->spinlock);
1943 wake_up(&res->wq); 2036 wake_up(&res->wq);
@@ -1977,9 +2070,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
1977 dlm_lockres_put(res); 2070 dlm_lockres_put(res);
1978 } 2071 }
1979 spin_lock(&res->spinlock); 2072 spin_lock(&res->spinlock);
2073 /* new_master has our reference from
2074 * the lock state sent during recovery */
1980 dlm_change_lockres_owner(dlm, res, new_master); 2075 dlm_change_lockres_owner(dlm, res, new_master);
1981 res->state &= ~DLM_LOCK_RES_RECOVERING; 2076 res->state &= ~DLM_LOCK_RES_RECOVERING;
1982 if (!__dlm_lockres_unused(res)) 2077 if (__dlm_lockres_has_locks(res))
1983 __dlm_dirty_lockres(dlm, res); 2078 __dlm_dirty_lockres(dlm, res);
1984 spin_unlock(&res->spinlock); 2079 spin_unlock(&res->spinlock);
1985 wake_up(&res->wq); 2080 wake_up(&res->wq);
@@ -2048,6 +2143,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2048{ 2143{
2049 struct list_head *iter, *tmpiter; 2144 struct list_head *iter, *tmpiter;
2050 struct dlm_lock *lock; 2145 struct dlm_lock *lock;
2146 unsigned int freed = 0;
2051 2147
2052 /* this node is the lockres master: 2148 /* this node is the lockres master:
2053 * 1) remove any stale locks for the dead node 2149 * 1) remove any stale locks for the dead node
@@ -2062,6 +2158,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2062 if (lock->ml.node == dead_node) { 2158 if (lock->ml.node == dead_node) {
2063 list_del_init(&lock->list); 2159 list_del_init(&lock->list);
2064 dlm_lock_put(lock); 2160 dlm_lock_put(lock);
2161 freed++;
2065 } 2162 }
2066 } 2163 }
2067 list_for_each_safe(iter, tmpiter, &res->converting) { 2164 list_for_each_safe(iter, tmpiter, &res->converting) {
@@ -2069,6 +2166,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2069 if (lock->ml.node == dead_node) { 2166 if (lock->ml.node == dead_node) {
2070 list_del_init(&lock->list); 2167 list_del_init(&lock->list);
2071 dlm_lock_put(lock); 2168 dlm_lock_put(lock);
2169 freed++;
2072 } 2170 }
2073 } 2171 }
2074 list_for_each_safe(iter, tmpiter, &res->blocked) { 2172 list_for_each_safe(iter, tmpiter, &res->blocked) {
@@ -2076,9 +2174,23 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2076 if (lock->ml.node == dead_node) { 2174 if (lock->ml.node == dead_node) {
2077 list_del_init(&lock->list); 2175 list_del_init(&lock->list);
2078 dlm_lock_put(lock); 2176 dlm_lock_put(lock);
2177 freed++;
2079 } 2178 }
2080 } 2179 }
2081 2180
2181 if (freed) {
2182 mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
2183 "dropping ref from lockres\n", dlm->name,
2184 res->lockname.len, res->lockname.name, freed, dead_node);
2185 BUG_ON(!test_bit(dead_node, res->refmap));
2186 dlm_lockres_clear_refmap_bit(dead_node, res);
2187 } else if (test_bit(dead_node, res->refmap)) {
2188 mlog(0, "%s:%.*s: dead node %u had a ref, but had "
2189 "no locks and had not purged before dying\n", dlm->name,
2190 res->lockname.len, res->lockname.name, dead_node);
2191 dlm_lockres_clear_refmap_bit(dead_node, res);
2192 }
2193
2082 /* do not kick thread yet */ 2194 /* do not kick thread yet */
2083 __dlm_dirty_lockres(dlm, res); 2195 __dlm_dirty_lockres(dlm, res);
2084} 2196}
@@ -2141,9 +2253,21 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2141 spin_lock(&res->spinlock); 2253 spin_lock(&res->spinlock);
2142 /* zero the lvb if necessary */ 2254 /* zero the lvb if necessary */
2143 dlm_revalidate_lvb(dlm, res, dead_node); 2255 dlm_revalidate_lvb(dlm, res, dead_node);
2144 if (res->owner == dead_node) 2256 if (res->owner == dead_node) {
2257 if (res->state & DLM_LOCK_RES_DROPPING_REF)
2258 mlog(0, "%s:%.*s: owned by "
2259 "dead node %u, this node was "
2260 "dropping its ref when it died. "
2261 "continue, dropping the flag.\n",
2262 dlm->name, res->lockname.len,
2263 res->lockname.name, dead_node);
2264
2265 /* the wake_up for this will happen when the
2266 * RECOVERING flag is dropped later */
2267 res->state &= ~DLM_LOCK_RES_DROPPING_REF;
2268
2145 dlm_move_lockres_to_recovery_list(dlm, res); 2269 dlm_move_lockres_to_recovery_list(dlm, res);
2146 else if (res->owner == dlm->node_num) { 2270 } else if (res->owner == dlm->node_num) {
2147 dlm_free_dead_locks(dlm, res, dead_node); 2271 dlm_free_dead_locks(dlm, res, dead_node);
2148 __dlm_lockres_calc_usage(dlm, res); 2272 __dlm_lockres_calc_usage(dlm, res);
2149 } 2273 }
@@ -2480,7 +2604,8 @@ retry:
2480 return ret; 2604 return ret;
2481} 2605}
2482 2606
2483int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) 2607int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2608 void **ret_data)
2484{ 2609{
2485 struct dlm_ctxt *dlm = data; 2610 struct dlm_ctxt *dlm = data;
2486 struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; 2611 struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf;
@@ -2608,7 +2733,8 @@ stage2:
2608 return ret; 2733 return ret;
2609} 2734}
2610 2735
2611int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data) 2736int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2737 void **ret_data)
2612{ 2738{
2613 struct dlm_ctxt *dlm = data; 2739 struct dlm_ctxt *dlm = data;
2614 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; 2740 struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;