diff options
author | David Teigland <teigland@redhat.com> | 2006-12-13 11:36:37 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-02-05 13:35:47 -0500 |
commit | dc200a8848cca8b0e99012996c66f4b379a390ed (patch) | |
tree | 8237c1e9d972beabbc674136d33479eddd396e7d | |
parent | c378051177dce4421428fd1691ffdf15ad57c161 (diff) |
[DLM] fix resend rcom lock
There's a chance the new master of resource hasn't learned it's the new
master before another node sends it a lock during recovery. The node
sending the lock needs to resend if this happens.
- A sends a master lookup for resource R to C
- B sends a master lookup for resource R to C
- C receives A's lookup, assigns A to be master of R and
sends a reply back to A
- C receives B's lookup and sends a reply back to B saying
that A is the master
- B receives lookup reply from C and sends its lock for R to A
- A receives lock from B, doesn't think it's the master of R
and sends an error back to B
- A receives lookup reply from C and becomes master of R
- B gets error back from A and resends its lock back to A
(this resending is what this patch does)
- A receives lock from B, it now sees it's the master of R
and takes the lock
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r-- | fs/dlm/lock.c | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 30878defaeb6..69ada5887078 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -3571,6 +3571,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
3571 | lock_rsb(r); | 3571 | lock_rsb(r); |
3572 | 3572 | ||
3573 | switch (error) { | 3573 | switch (error) { |
3574 | case -EBADR: | ||
3575 | /* There's a chance the new master received our lock before | ||
3576 | dlm_recover_master_reply(), this wouldn't happen if we did | ||
3577 | a barrier between recover_masters and recover_locks. */ | ||
3578 | log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id, | ||
3579 | (unsigned long)r, r->res_name); | ||
3580 | dlm_send_rcom_lock(r, lkb); | ||
3581 | goto out; | ||
3574 | case -EEXIST: | 3582 | case -EEXIST: |
3575 | log_debug(ls, "master copy exists %x", lkb->lkb_id); | 3583 | log_debug(ls, "master copy exists %x", lkb->lkb_id); |
3576 | /* fall through */ | 3584 | /* fall through */ |
@@ -3585,7 +3593,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
3585 | /* an ack for dlm_recover_locks() which waits for replies from | 3593 | /* an ack for dlm_recover_locks() which waits for replies from |
3586 | all the locks it sends to new masters */ | 3594 | all the locks it sends to new masters */ |
3587 | dlm_recovered_lock(r); | 3595 | dlm_recovered_lock(r); |
3588 | 3596 | out: | |
3589 | unlock_rsb(r); | 3597 | unlock_rsb(r); |
3590 | put_rsb(r); | 3598 | put_rsb(r); |
3591 | dlm_put_lkb(lkb); | 3599 | dlm_put_lkb(lkb); |