diff options
author | David Teigland <teigland@redhat.com> | 2007-03-28 10:56:46 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-05-01 04:11:00 -0400 |
commit | ef0c2bb05f40f9a0cd2deae63e199bfa62faa7fa (patch) | |
tree | df73645f93cfec29fe5b854ff5990a69b03d5c1d /fs/dlm/user.c | |
parent | 032067270295cfca11975c0f7b467244aa170c14 (diff) |
[DLM] overlapping cancel and unlock
Full cancel and force-unlock support. In the past, cancel and force-unlock
wouldn't work if there was another operation in progress on the lock. Now,
both cancel and unlock-force can overlap an operation on a lock, meaning there
may be 2 or 3 operations in progress on a lock in parallel. This support is
important not only because cancel and force-unlock are explicit operations
that an app can use, but both are used implicitly when a process exits while
holding locks.
Summary of changes:
- add-to and remove-from waiters functions were rewritten to handle situations
with more than one remote operation outstanding on a lock
- validate_unlock_args detects when an overlapping cancel/unlock-force
can be sent and when it needs to be delayed until a request/lookup
reply is received
- processing request/lookup replies detects when cancel/unlock-force
occured during the op, and carries out the delayed cancel/unlock-force
- manipulation of the "waiters" (remote operation) state of a lock moved under
the standard rsb mutex that protects all the other lock state
- the two recovery routines related to locks on the waiters list changed
according to the way lkb's are now locked before accessing waiters state
- waiters recovery detects when lkb's being recovered have overlapping
cancel/unlock-force, and may not recover such locks
- revert_lock (cancel) returns a value to distinguish cases where it did
nothing vs cases where it actually did a cancel; the cancel completion ast
should only be done when cancel did something
- orphaned locks put on new list so they can be found later for purging
- cancel must be called on a lock when making it an orphan
- flag user locks (ENDOFLIFE) at the end of their useful life (to the
application) so we can return an error for any further cancel/unlock-force
- we weren't setting COMP/BAST ast flags if one was already set, so we'd lose
either a completion or blocking ast
- clear an unread bast on a lock that's become unlocked
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/dlm/user.c')
-rw-r--r-- | fs/dlm/user.c | 77 |
1 files changed, 44 insertions, 33 deletions
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 27a75ce571c..c978c67b1ef 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -128,35 +128,30 @@ static void compat_output(struct dlm_lock_result *res, | |||
128 | } | 128 | } |
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | /* we could possibly check if the cancel of an orphan has resulted in the lkb | ||
132 | being removed and then remove that lkb from the orphans list and free it */ | ||
131 | 133 | ||
132 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | 134 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type) |
133 | { | 135 | { |
134 | struct dlm_ls *ls; | 136 | struct dlm_ls *ls; |
135 | struct dlm_user_args *ua; | 137 | struct dlm_user_args *ua; |
136 | struct dlm_user_proc *proc; | 138 | struct dlm_user_proc *proc; |
137 | int remove_ownqueue = 0; | 139 | int eol = 0, ast_type; |
138 | 140 | ||
139 | /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each | 141 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) |
140 | lkb before dealing with it. We need to check this | ||
141 | flag before taking ls_clear_proc_locks mutex because if | ||
142 | it's set, dlm_clear_proc_locks() holds the mutex. */ | ||
143 | |||
144 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { | ||
145 | /* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */ | ||
146 | return; | 142 | return; |
147 | } | ||
148 | 143 | ||
149 | ls = lkb->lkb_resource->res_ls; | 144 | ls = lkb->lkb_resource->res_ls; |
150 | mutex_lock(&ls->ls_clear_proc_locks); | 145 | mutex_lock(&ls->ls_clear_proc_locks); |
151 | 146 | ||
152 | /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast | 147 | /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast |
153 | can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed | 148 | can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed |
154 | lkb->ua so we can't try to use it. */ | 149 | lkb->ua so we can't try to use it. This second check is necessary |
150 | for cases where a completion ast is received for an operation that | ||
151 | began before clear_proc_locks did its cancel/unlock. */ | ||
155 | 152 | ||
156 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { | 153 | if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) |
157 | /* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */ | ||
158 | goto out; | 154 | goto out; |
159 | } | ||
160 | 155 | ||
161 | DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); | 156 | DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); |
162 | ua = (struct dlm_user_args *)lkb->lkb_astparam; | 157 | ua = (struct dlm_user_args *)lkb->lkb_astparam; |
@@ -166,28 +161,42 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
166 | goto out; | 161 | goto out; |
167 | 162 | ||
168 | spin_lock(&proc->asts_spin); | 163 | spin_lock(&proc->asts_spin); |
169 | if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { | 164 | |
165 | ast_type = lkb->lkb_ast_type; | ||
166 | lkb->lkb_ast_type |= type; | ||
167 | |||
168 | if (!ast_type) { | ||
170 | kref_get(&lkb->lkb_ref); | 169 | kref_get(&lkb->lkb_ref); |
171 | list_add_tail(&lkb->lkb_astqueue, &proc->asts); | 170 | list_add_tail(&lkb->lkb_astqueue, &proc->asts); |
172 | lkb->lkb_ast_type |= type; | ||
173 | wake_up_interruptible(&proc->wait); | 171 | wake_up_interruptible(&proc->wait); |
174 | } | 172 | } |
175 | 173 | if (type == AST_COMP && (ast_type & AST_COMP)) | |
176 | /* noqueue requests that fail may need to be removed from the | 174 | log_debug(ls, "ast overlap %x status %x %x", |
177 | proc's locks list, there should be a better way of detecting | 175 | lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); |
178 | this situation than checking all these things... */ | 176 | |
179 | 177 | /* Figure out if this lock is at the end of its life and no longer | |
180 | if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV && | 178 | available for the application to use. The lkb still exists until |
181 | ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) | 179 | the final ast is read. A lock becomes EOL in three situations: |
182 | remove_ownqueue = 1; | 180 | 1. a noqueue request fails with EAGAIN |
183 | 181 | 2. an unlock completes with EUNLOCK | |
184 | /* unlocks or cancels of waiting requests need to be removed from the | 182 | 3. a cancel of a waiting request completes with ECANCEL |
185 | proc's unlocking list, again there must be a better way... */ | 183 | An EOL lock needs to be removed from the process's list of locks. |
186 | 184 | And we can't allow any new operation on an EOL lock. This is | |
187 | if (ua->lksb.sb_status == -DLM_EUNLOCK || | 185 | not related to the lifetime of the lkb struct which is managed |
186 | entirely by refcount. */ | ||
187 | |||
188 | if (type == AST_COMP && | ||
189 | lkb->lkb_grmode == DLM_LOCK_IV && | ||
190 | ua->lksb.sb_status == -EAGAIN) | ||
191 | eol = 1; | ||
192 | else if (ua->lksb.sb_status == -DLM_EUNLOCK || | ||
188 | (ua->lksb.sb_status == -DLM_ECANCEL && | 193 | (ua->lksb.sb_status == -DLM_ECANCEL && |
189 | lkb->lkb_grmode == DLM_LOCK_IV)) | 194 | lkb->lkb_grmode == DLM_LOCK_IV)) |
190 | remove_ownqueue = 1; | 195 | eol = 1; |
196 | if (eol) { | ||
197 | lkb->lkb_ast_type &= ~AST_BAST; | ||
198 | lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; | ||
199 | } | ||
191 | 200 | ||
192 | /* We want to copy the lvb to userspace when the completion | 201 | /* We want to copy the lvb to userspace when the completion |
193 | ast is read if the status is 0, the lock has an lvb and | 202 | ast is read if the status is 0, the lock has an lvb and |
@@ -204,11 +213,13 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
204 | 213 | ||
205 | spin_unlock(&proc->asts_spin); | 214 | spin_unlock(&proc->asts_spin); |
206 | 215 | ||
207 | if (remove_ownqueue) { | 216 | if (eol) { |
208 | spin_lock(&ua->proc->locks_spin); | 217 | spin_lock(&ua->proc->locks_spin); |
209 | list_del_init(&lkb->lkb_ownqueue); | 218 | if (!list_empty(&lkb->lkb_ownqueue)) { |
219 | list_del_init(&lkb->lkb_ownqueue); | ||
220 | dlm_put_lkb(lkb); | ||
221 | } | ||
210 | spin_unlock(&ua->proc->locks_spin); | 222 | spin_unlock(&ua->proc->locks_spin); |
211 | dlm_put_lkb(lkb); | ||
212 | } | 223 | } |
213 | out: | 224 | out: |
214 | mutex_unlock(&ls->ls_clear_proc_locks); | 225 | mutex_unlock(&ls->ls_clear_proc_locks); |