diff options
author | Xue jiufei <xuejiufei@huawei.com> | 2014-06-23 16:22:09 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-23 19:47:45 -0400 |
commit | ac4fef4d23ed879a7fd11ab24ccd2e1464277e9a (patch) | |
tree | 6f5139c8f615683b688d27f3d5f628cc140465bc /fs | |
parent | b9aaac5a6b7d228435fcb80963d41c274406011b (diff) |
ocfs2/dlm: do not purge lockres that is queued for assert master
When workqueue is delayed, it may occur that a lockres is purged while it
is still queued for master assert. it may trigger BUG() as follows.
N1 N2
dlm_get_lockres()
->dlm_do_master_requery
is the master of lockres,
so queue assert_master work
dlm_thread() start running
and purge the lockres
dlm_assert_master_worker()
send assert master message
to other nodes
receiving the assert_master
message, set master to N2
dlmlock_remote() send create_lock message to N2, but receive DLM_IVLOCKID,
if it is RECOVERY lockres, it triggers the BUG().
Another BUG() is triggered when N3 become the new master and send
assert_master to N1, N1 will trigger the BUG() because owner doesn't
match. So we should not purge lockres when it is queued for assert
master.
Signed-off-by: joyce.xue <xuejiufei@huawei.com>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 43 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmthread.c | 11 |
4 files changed, 55 insertions, 6 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a106b3f2b22a..fae17c640df3 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -331,6 +331,7 @@ struct dlm_lock_resource | |||
331 | u16 state; | 331 | u16 state; |
332 | char lvb[DLM_LVB_LEN]; | 332 | char lvb[DLM_LVB_LEN]; |
333 | unsigned int inflight_locks; | 333 | unsigned int inflight_locks; |
334 | unsigned int inflight_assert_workers; | ||
334 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 335 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
335 | }; | 336 | }; |
336 | 337 | ||
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | |||
910 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 911 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
911 | struct dlm_lock_resource *res); | 912 | struct dlm_lock_resource *res); |
912 | 913 | ||
914 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
915 | struct dlm_lock_resource *res); | ||
916 | |||
913 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 917 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
914 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 918 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
915 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 919 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 4f4b00e173f2..82abf0cc9a12 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
581 | atomic_set(&res->asts_reserved, 0); | 581 | atomic_set(&res->asts_reserved, 0); |
582 | res->migration_pending = 0; | 582 | res->migration_pending = 0; |
583 | res->inflight_locks = 0; | 583 | res->inflight_locks = 0; |
584 | res->inflight_assert_workers = 0; | ||
584 | 585 | ||
585 | res->dlm = dlm; | 586 | res->dlm = dlm; |
586 | 587 | ||
@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | |||
683 | wake_up(&res->wq); | 684 | wake_up(&res->wq); |
684 | } | 685 | } |
685 | 686 | ||
687 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
688 | struct dlm_lock_resource *res) | ||
689 | { | ||
690 | assert_spin_locked(&res->spinlock); | ||
691 | res->inflight_assert_workers++; | ||
692 | mlog(0, "%s:%.*s: inflight assert worker++: now %u\n", | ||
693 | dlm->name, res->lockname.len, res->lockname.name, | ||
694 | res->inflight_assert_workers); | ||
695 | } | ||
696 | |||
697 | static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
698 | struct dlm_lock_resource *res) | ||
699 | { | ||
700 | spin_lock(&res->spinlock); | ||
701 | __dlm_lockres_grab_inflight_worker(dlm, res); | ||
702 | spin_unlock(&res->spinlock); | ||
703 | } | ||
704 | |||
705 | static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, | ||
706 | struct dlm_lock_resource *res) | ||
707 | { | ||
708 | assert_spin_locked(&res->spinlock); | ||
709 | BUG_ON(res->inflight_assert_workers == 0); | ||
710 | res->inflight_assert_workers--; | ||
711 | mlog(0, "%s:%.*s: inflight assert worker--: now %u\n", | ||
712 | dlm->name, res->lockname.len, res->lockname.name, | ||
713 | res->inflight_assert_workers); | ||
714 | } | ||
715 | |||
716 | static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, | ||
717 | struct dlm_lock_resource *res) | ||
718 | { | ||
719 | spin_lock(&res->spinlock); | ||
720 | __dlm_lockres_drop_inflight_worker(dlm, res); | ||
721 | spin_unlock(&res->spinlock); | ||
722 | } | ||
723 | |||
686 | /* | 724 | /* |
687 | * lookup a lock resource by name. | 725 | * lookup a lock resource by name. |
688 | * may already exist in the hashtable. | 726 | * may already exist in the hashtable. |
@@ -1603,7 +1641,8 @@ send_response: | |||
1603 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); | 1641 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); |
1604 | response = DLM_MASTER_RESP_ERROR; | 1642 | response = DLM_MASTER_RESP_ERROR; |
1605 | dlm_lockres_put(res); | 1643 | dlm_lockres_put(res); |
1606 | } | 1644 | } else |
1645 | dlm_lockres_grab_inflight_worker(dlm, res); | ||
1607 | } else { | 1646 | } else { |
1608 | if (res) | 1647 | if (res) |
1609 | dlm_lockres_put(res); | 1648 | dlm_lockres_put(res); |
@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) | |||
2118 | dlm_lockres_release_ast(dlm, res); | 2157 | dlm_lockres_release_ast(dlm, res); |
2119 | 2158 | ||
2120 | put: | 2159 | put: |
2160 | dlm_lockres_drop_inflight_worker(dlm, res); | ||
2161 | |||
2121 | dlm_lockres_put(res); | 2162 | dlm_lockres_put(res); |
2122 | 2163 | ||
2123 | mlog(0, "finished with dlm_assert_master_worker\n"); | 2164 | mlog(0, "finished with dlm_assert_master_worker\n"); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 5de019437ea5..45067faf5695 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1708 | mlog_errno(-ENOMEM); | 1708 | mlog_errno(-ENOMEM); |
1709 | /* retry!? */ | 1709 | /* retry!? */ |
1710 | BUG(); | 1710 | BUG(); |
1711 | } | 1711 | } else |
1712 | __dlm_lockres_grab_inflight_worker(dlm, res); | ||
1712 | } else /* put.. incase we are not the master */ | 1713 | } else /* put.. incase we are not the master */ |
1713 | dlm_lockres_put(res); | 1714 | dlm_lockres_put(res); |
1714 | spin_unlock(&res->spinlock); | 1715 | spin_unlock(&res->spinlock); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index cf53a822f07f..69aac6f088ad 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -259,11 +259,14 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
259 | * refs on it. */ | 259 | * refs on it. */ |
260 | unused = __dlm_lockres_unused(lockres); | 260 | unused = __dlm_lockres_unused(lockres); |
261 | if (!unused || | 261 | if (!unused || |
262 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | 262 | (lockres->state & DLM_LOCK_RES_MIGRATING) || |
263 | (lockres->inflight_assert_workers != 0)) { | ||
263 | mlog(0, "%s: res %.*s is in use or being remastered, " | 264 | mlog(0, "%s: res %.*s is in use or being remastered, " |
264 | "used %d, state %d\n", dlm->name, | 265 | "used %d, state %d, assert master workers %u\n", |
265 | lockres->lockname.len, lockres->lockname.name, | 266 | dlm->name, lockres->lockname.len, |
266 | !unused, lockres->state); | 267 | lockres->lockname.name, |
268 | !unused, lockres->state, | ||
269 | lockres->inflight_assert_workers); | ||
267 | list_move_tail(&lockres->purge, &dlm->purge_list); | 270 | list_move_tail(&lockres->purge, &dlm->purge_list); |
268 | spin_unlock(&lockres->spinlock); | 271 | spin_unlock(&lockres->spinlock); |
269 | continue; | 272 | continue; |