aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2008-01-07 17:15:05 -0500
committerDavid Teigland <teigland@redhat.com>2008-01-30 12:04:42 -0500
commit601342ce022b964f756b67f2eb99b605c1afa3ed (patch)
tree78b8c873dbc41bca33ed23dcf61fbea1572a007c
parent8a358ca8e738b6226b004efea462ac28c0a2bbb1 (diff)
dlm: recover locks waiting for overlap replies
When recovery looks at locks waiting for replies, it fails to consider locks that have already received a reply for their first remote operation, but not received a reply for secondary, overlapping unlock/cancel. The appropriate stub reply needs to be called for these waiters. Appears when we start doing recovery in the presence of a many overlapping unlock/cancel ops. Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r--fs/dlm/lock.c37
1 files changed, 32 insertions, 5 deletions
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 43ca2a30c413..a758f1b80e3b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3846,6 +3846,7 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
3846void dlm_recover_waiters_pre(struct dlm_ls *ls) 3846void dlm_recover_waiters_pre(struct dlm_ls *ls)
3847{ 3847{
3848 struct dlm_lkb *lkb, *safe; 3848 struct dlm_lkb *lkb, *safe;
3849 int wait_type, stub_unlock_result, stub_cancel_result;
3849 3850
3850 mutex_lock(&ls->ls_waiters_mutex); 3851 mutex_lock(&ls->ls_waiters_mutex);
3851 3852
@@ -3864,7 +3865,33 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3864 if (!waiter_needs_recovery(ls, lkb)) 3865 if (!waiter_needs_recovery(ls, lkb))
3865 continue; 3866 continue;
3866 3867
3867 switch (lkb->lkb_wait_type) { 3868 wait_type = lkb->lkb_wait_type;
3869 stub_unlock_result = -DLM_EUNLOCK;
3870 stub_cancel_result = -DLM_ECANCEL;
3871
3872 /* Main reply may have been received leaving a zero wait_type,
3873 but a reply for the overlapping op may not have been
3874 received. In that case we need to fake the appropriate
3875 reply for the overlap op. */
3876
3877 if (!wait_type) {
3878 if (is_overlap_cancel(lkb)) {
3879 wait_type = DLM_MSG_CANCEL;
3880 if (lkb->lkb_grmode == DLM_LOCK_IV)
3881 stub_cancel_result = 0;
3882 }
3883 if (is_overlap_unlock(lkb)) {
3884 wait_type = DLM_MSG_UNLOCK;
3885 if (lkb->lkb_grmode == DLM_LOCK_IV)
3886 stub_unlock_result = -ENOENT;
3887 }
3888
3889 log_debug(ls, "rwpre overlap %x %x %d %d %d",
3890 lkb->lkb_id, lkb->lkb_flags, wait_type,
3891 stub_cancel_result, stub_unlock_result);
3892 }
3893
3894 switch (wait_type) {
3868 3895
3869 case DLM_MSG_REQUEST: 3896 case DLM_MSG_REQUEST:
3870 lkb->lkb_flags |= DLM_IFL_RESEND; 3897 lkb->lkb_flags |= DLM_IFL_RESEND;
@@ -3877,7 +3904,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3877 case DLM_MSG_UNLOCK: 3904 case DLM_MSG_UNLOCK:
3878 hold_lkb(lkb); 3905 hold_lkb(lkb);
3879 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; 3906 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
3880 ls->ls_stub_ms.m_result = -DLM_EUNLOCK; 3907 ls->ls_stub_ms.m_result = stub_unlock_result;
3881 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3908 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3882 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 3909 _receive_unlock_reply(lkb, &ls->ls_stub_ms);
3883 dlm_put_lkb(lkb); 3910 dlm_put_lkb(lkb);
@@ -3886,15 +3913,15 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
3886 case DLM_MSG_CANCEL: 3913 case DLM_MSG_CANCEL:
3887 hold_lkb(lkb); 3914 hold_lkb(lkb);
3888 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; 3915 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
3889 ls->ls_stub_ms.m_result = -DLM_ECANCEL; 3916 ls->ls_stub_ms.m_result = stub_cancel_result;
3890 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 3917 ls->ls_stub_ms.m_flags = lkb->lkb_flags;
3891 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 3918 _receive_cancel_reply(lkb, &ls->ls_stub_ms);
3892 dlm_put_lkb(lkb); 3919 dlm_put_lkb(lkb);
3893 break; 3920 break;
3894 3921
3895 default: 3922 default:
3896 log_error(ls, "invalid lkb wait_type %d", 3923 log_error(ls, "invalid lkb wait_type %d %d",
3897 lkb->lkb_wait_type); 3924 lkb->lkb_wait_type, wait_type);
3898 } 3925 }
3899 schedule(); 3926 schedule();
3900 } 3927 }