aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2011-07-24 13:30:54 -0400
committerSunil Mushran <sunil.mushran@oracle.com>2011-07-24 13:30:54 -0400
commita2c0cc1579176bd0808ef7deea456767dfa80217 (patch)
tree4f797a5fda954ce8a4783e9149da455879ca3641 /fs/ocfs2
parentff0a522e7db79625aa27a433467eb94c5e255718 (diff)
ocfs2/dlm: dlmlock_remote() needs to account for remastery
In dlmlock_remote(), we wait for the resource to stop being active before setting the inprogress flag. Active includes recovery, migration, etc. The problem here is that if the resource was being recovered or migrated, the new owner could very well be that node itself (and thus not a remote node). This problem was observed in Oracle bug#12583620. The error messages observed were as follows: dlm_send_remote_lock_request:337 ERROR: Error -40 (ELOOP) when sending message 503 (key 0xd6d8c7) to node 2 dlmlock_remote:271 ERROR: dlm status = DLM_BADARGS dlmlock:751 ERROR: dlm status = DLM_BADARGS Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/dlm/dlmlock.c18
1 files changed, 8 insertions, 10 deletions
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 3ef2c1adfb8f..f32fcba04923 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -227,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
227 lock->ml.type, res->lockname.len, 227 lock->ml.type, res->lockname.len,
228 res->lockname.name, flags); 228 res->lockname.name, flags);
229 229
230 /*
231 * Wait if resource is getting recovered, remastered, etc.
232 * If the resource was remastered and new owner is self, then exit.
233 */
230 spin_lock(&res->spinlock); 234 spin_lock(&res->spinlock);
231
232 /* will exit this call with spinlock held */
233 __dlm_wait_on_lockres(res); 235 __dlm_wait_on_lockres(res);
236 if (res->owner == dlm->node_num) {
237 spin_unlock(&res->spinlock);
238 return DLM_RECOVERING;
239 }
234 res->state |= DLM_LOCK_RES_IN_PROGRESS; 240 res->state |= DLM_LOCK_RES_IN_PROGRESS;
235 241
236 /* add lock to local (secondary) queue */ 242 /* add lock to local (secondary) queue */
@@ -710,18 +716,10 @@ retry_lock:
710 716
711 if (status == DLM_RECOVERING || status == DLM_MIGRATING || 717 if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
712 status == DLM_FORWARD) { 718 status == DLM_FORWARD) {
713 mlog(0, "retrying lock with migration/"
714 "recovery/in progress\n");
715 msleep(100); 719 msleep(100);
716 /* no waiting for dlm_reco_thread */
717 if (recovery) { 720 if (recovery) {
718 if (status != DLM_RECOVERING) 721 if (status != DLM_RECOVERING)
719 goto retry_lock; 722 goto retry_lock;
720
721 mlog(0, "%s: got RECOVERING "
722 "for $RECOVERY lock, master "
723 "was %u\n", dlm->name,
724 res->owner);
725 /* wait to see the node go down, then 723 /* wait to see the node go down, then
726 * drop down and allow the lockres to 724 * drop down and allow the lockres to
727 * get cleaned up. need to remaster. */ 725 * get cleaned up. need to remaster. */