aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm
diff options
context:
space:
mode:
authorXue jiufei <xuejiufei@huawei.com>2016-01-14 18:17:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 19:00:49 -0500
commit30bee898f86506893883ffb8db20d8101a29b5f5 (patch)
treed1998a65a4373824d785773e6a9c2e7b504b01b5 /fs/ocfs2/dlm
parenta84ac334dcb44c76f0b051513a6c27a2d747f883 (diff)
ocfs2/dlm: fix a race between purge and migration
We found a race between purge and migration when doing code review. Node A put lockres to purgelist before receiving the migrate message from node B which is the master. Node A call dlm_mig_lockres_handler to handle this message. dlm_mig_lockres_handler dlm_lookup_lockres >>>>>> race window, dlm_run_purge_list may run and send deref message to master, waiting the response spin_lock(&res->spinlock); res->state |= DLM_LOCK_RES_MIGRATING; spin_unlock(&res->spinlock); dlm_mig_lockres_handler returns >>>>>> dlm_thread receives the response from master for the deref message and triggers the BUG because the lockres has the state DLM_LOCK_RES_MIGRATING with the following message: dlm_purge_lockres:209 ERROR: 6633EB681FA7474A9C280A4E1A836F0F: res M0000000000000000030c0300000000 in use after deref Signed-off-by: Jiufei Xue <xuejiufei@huawei.com> Reviewed-by: Joseph Qi <joseph.qi@huawei.com> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 9e4f862d20fe..86fb53614bf4 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1373,6 +1373,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1373 char *buf = NULL; 1373 char *buf = NULL;
1374 struct dlm_work_item *item = NULL; 1374 struct dlm_work_item *item = NULL;
1375 struct dlm_lock_resource *res = NULL; 1375 struct dlm_lock_resource *res = NULL;
1376 unsigned int hash;
1376 1377
1377 if (!dlm_grab(dlm)) 1378 if (!dlm_grab(dlm))
1378 return -EINVAL; 1379 return -EINVAL;
@@ -1400,7 +1401,10 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1400 /* lookup the lock to see if we have a secondary queue for this 1401 /* lookup the lock to see if we have a secondary queue for this
1401 * already... just add the locks in and this will have its owner 1402 * already... just add the locks in and this will have its owner
1402 * and RECOVERY flag changed when it completes. */ 1403 * and RECOVERY flag changed when it completes. */
1403 res = dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len); 1404 hash = dlm_lockid_hash(mres->lockname, mres->lockname_len);
1405 spin_lock(&dlm->spinlock);
1406 res = __dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len,
1407 hash);
1404 if (res) { 1408 if (res) {
1405 /* this will get a ref on res */ 1409 /* this will get a ref on res */
1406 /* mark it as recovering/migrating and hash it */ 1410 /* mark it as recovering/migrating and hash it */
@@ -1421,13 +1425,16 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1421 mres->lockname_len, mres->lockname); 1425 mres->lockname_len, mres->lockname);
1422 ret = -EFAULT; 1426 ret = -EFAULT;
1423 spin_unlock(&res->spinlock); 1427 spin_unlock(&res->spinlock);
1428 spin_unlock(&dlm->spinlock);
1424 dlm_lockres_put(res); 1429 dlm_lockres_put(res);
1425 goto leave; 1430 goto leave;
1426 } 1431 }
1427 res->state |= DLM_LOCK_RES_MIGRATING; 1432 res->state |= DLM_LOCK_RES_MIGRATING;
1428 } 1433 }
1429 spin_unlock(&res->spinlock); 1434 spin_unlock(&res->spinlock);
1435 spin_unlock(&dlm->spinlock);
1430 } else { 1436 } else {
1437 spin_unlock(&dlm->spinlock);
1431 /* need to allocate, just like if it was 1438 /* need to allocate, just like if it was
1432 * mastered here normally */ 1439 * mastered here normally */
1433 res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len); 1440 res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len);