aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2006-12-13 11:36:37 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2007-02-05 13:35:47 -0500
commitdc200a8848cca8b0e99012996c66f4b379a390ed (patch)
tree8237c1e9d972beabbc674136d33479eddd396e7d
parentc378051177dce4421428fd1691ffdf15ad57c161 (diff)
[DLM] fix resend rcom lock
There's a chance the new master of resource hasn't learned it's the new master before another node sends it a lock during recovery. The node sending the lock needs to resend if this happens. - A sends a master lookup for resource R to C - B sends a master lookup for resource R to C - C receives A's lookup, assigns A to be master of R and sends a reply back to A - C receives B's lookup and sends a reply back to B saying that A is the master - B receives lookup reply from C and sends its lock for R to A - A receives lock from B, doesn't think it's the master of R and sends an error back to B - A receives lookup reply from C and becomes master of R - B gets error back from A and resends its lock back to A (this resending is what this patch does) - A receives lock from B, it now sees it's the master of R and takes the lock Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/dlm/lock.c10
1 files changed, 9 insertions, 1 deletions
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb6..69ada5887078 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3571,6 +3571,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3571 lock_rsb(r); 3571 lock_rsb(r);
3572 3572
3573 switch (error) { 3573 switch (error) {
3574 case -EBADR:
3575 /* There's a chance the new master received our lock before
3576 dlm_recover_master_reply(), this wouldn't happen if we did
3577 a barrier between recover_masters and recover_locks. */
3578 log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
3579 (unsigned long)r, r->res_name);
3580 dlm_send_rcom_lock(r, lkb);
3581 goto out;
3574 case -EEXIST: 3582 case -EEXIST:
3575 log_debug(ls, "master copy exists %x", lkb->lkb_id); 3583 log_debug(ls, "master copy exists %x", lkb->lkb_id);
3576 /* fall through */ 3584 /* fall through */
@@ -3585,7 +3593,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
3585 /* an ack for dlm_recover_locks() which waits for replies from 3593 /* an ack for dlm_recover_locks() which waits for replies from
3586 all the locks it sends to new masters */ 3594 all the locks it sends to new masters */
3587 dlm_recovered_lock(r); 3595 dlm_recovered_lock(r);
3588 3596 out:
3589 unlock_rsb(r); 3597 unlock_rsb(r);
3590 put_rsb(r); 3598 put_rsb(r);
3591 dlm_put_lkb(lkb); 3599 dlm_put_lkb(lkb);