aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXue jiufei <xuejiufei@huawei.com>2014-06-23 16:22:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-23 19:47:45 -0400
commitac4fef4d23ed879a7fd11ab24ccd2e1464277e9a (patch)
tree6f5139c8f615683b688d27f3d5f628cc140465bc
parentb9aaac5a6b7d228435fcb80963d41c274406011b (diff)
ocfs2/dlm: do not purge lockres that is queued for assert master
When workqueue is delayed, it may occur that a lockres is purged while it is still queued for master assert. it may trigger BUG() as follows. N1 N2 dlm_get_lockres() ->dlm_do_master_requery is the master of lockres, so queue assert_master work dlm_thread() start running and purge the lockres dlm_assert_master_worker() send assert master message to other nodes receiving the assert_master message, set master to N2 dlmlock_remote() send create_lock message to N2, but receive DLM_IVLOCKID, if it is RECOVERY lockres, it triggers the BUG(). Another BUG() is triggered when N3 become the new master and send assert_master to N1, N1 will trigger the BUG() because owner doesn't match. So we should not purge lockres when it is queued for assert master. Signed-off-by: joyce.xue <xuejiufei@huawei.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c43
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/dlmthread.c11
4 files changed, 55 insertions, 6 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index a106b3f2b22a..fae17c640df3 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -331,6 +331,7 @@ struct dlm_lock_resource
331 u16 state; 331 u16 state;
332 char lvb[DLM_LVB_LEN]; 332 char lvb[DLM_LVB_LEN];
333 unsigned int inflight_locks; 333 unsigned int inflight_locks;
334 unsigned int inflight_assert_workers;
334 unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 335 unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
335}; 336};
336 337
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
910void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, 911void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
911 struct dlm_lock_resource *res); 912 struct dlm_lock_resource *res);
912 913
914void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
915 struct dlm_lock_resource *res);
916
913void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 917void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
914void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 918void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
915void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 919void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4f4b00e173f2..82abf0cc9a12 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
581 atomic_set(&res->asts_reserved, 0); 581 atomic_set(&res->asts_reserved, 0);
582 res->migration_pending = 0; 582 res->migration_pending = 0;
583 res->inflight_locks = 0; 583 res->inflight_locks = 0;
584 res->inflight_assert_workers = 0;
584 585
585 res->dlm = dlm; 586 res->dlm = dlm;
586 587
@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
683 wake_up(&res->wq); 684 wake_up(&res->wq);
684} 685}
685 686
687void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
688 struct dlm_lock_resource *res)
689{
690 assert_spin_locked(&res->spinlock);
691 res->inflight_assert_workers++;
692 mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
693 dlm->name, res->lockname.len, res->lockname.name,
694 res->inflight_assert_workers);
695}
696
697static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
698 struct dlm_lock_resource *res)
699{
700 spin_lock(&res->spinlock);
701 __dlm_lockres_grab_inflight_worker(dlm, res);
702 spin_unlock(&res->spinlock);
703}
704
705static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
706 struct dlm_lock_resource *res)
707{
708 assert_spin_locked(&res->spinlock);
709 BUG_ON(res->inflight_assert_workers == 0);
710 res->inflight_assert_workers--;
711 mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
712 dlm->name, res->lockname.len, res->lockname.name,
713 res->inflight_assert_workers);
714}
715
716static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
717 struct dlm_lock_resource *res)
718{
719 spin_lock(&res->spinlock);
720 __dlm_lockres_drop_inflight_worker(dlm, res);
721 spin_unlock(&res->spinlock);
722}
723
686/* 724/*
687 * lookup a lock resource by name. 725 * lookup a lock resource by name.
688 * may already exist in the hashtable. 726 * may already exist in the hashtable.
@@ -1603,7 +1641,8 @@ send_response:
1603 mlog(ML_ERROR, "failed to dispatch assert master work\n"); 1641 mlog(ML_ERROR, "failed to dispatch assert master work\n");
1604 response = DLM_MASTER_RESP_ERROR; 1642 response = DLM_MASTER_RESP_ERROR;
1605 dlm_lockres_put(res); 1643 dlm_lockres_put(res);
1606 } 1644 } else
1645 dlm_lockres_grab_inflight_worker(dlm, res);
1607 } else { 1646 } else {
1608 if (res) 1647 if (res)
1609 dlm_lockres_put(res); 1648 dlm_lockres_put(res);
@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
2118 dlm_lockres_release_ast(dlm, res); 2157 dlm_lockres_release_ast(dlm, res);
2119 2158
2120put: 2159put:
2160 dlm_lockres_drop_inflight_worker(dlm, res);
2161
2121 dlm_lockres_put(res); 2162 dlm_lockres_put(res);
2122 2163
2123 mlog(0, "finished with dlm_assert_master_worker\n"); 2164 mlog(0, "finished with dlm_assert_master_worker\n");
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5de019437ea5..45067faf5695 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1708 mlog_errno(-ENOMEM); 1708 mlog_errno(-ENOMEM);
1709 /* retry!? */ 1709 /* retry!? */
1710 BUG(); 1710 BUG();
1711 } 1711 } else
1712 __dlm_lockres_grab_inflight_worker(dlm, res);
1712 } else /* put.. incase we are not the master */ 1713 } else /* put.. incase we are not the master */
1713 dlm_lockres_put(res); 1714 dlm_lockres_put(res);
1714 spin_unlock(&res->spinlock); 1715 spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index cf53a822f07f..69aac6f088ad 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -259,11 +259,14 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
259 * refs on it. */ 259 * refs on it. */
260 unused = __dlm_lockres_unused(lockres); 260 unused = __dlm_lockres_unused(lockres);
261 if (!unused || 261 if (!unused ||
262 (lockres->state & DLM_LOCK_RES_MIGRATING)) { 262 (lockres->state & DLM_LOCK_RES_MIGRATING) ||
263 (lockres->inflight_assert_workers != 0)) {
263 mlog(0, "%s: res %.*s is in use or being remastered, " 264 mlog(0, "%s: res %.*s is in use or being remastered, "
264 "used %d, state %d\n", dlm->name, 265 "used %d, state %d, assert master workers %u\n",
265 lockres->lockname.len, lockres->lockname.name, 266 dlm->name, lockres->lockname.len,
266 !unused, lockres->state); 267 lockres->lockname.name,
268 !unused, lockres->state,
269 lockres->inflight_assert_workers);
267 list_move_tail(&lockres->purge, &dlm->purge_list); 270 list_move_tail(&lockres->purge, &dlm->purge_list);
268 spin_unlock(&lockres->spinlock); 271 spin_unlock(&lockres->spinlock);
269 continue; 272 continue;