aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2017-11-13 11:59:50 -0500
committerDavid Howells <dhowells@redhat.com>2017-11-17 05:06:13 -0500
commit0fafdc9f888b42499001b7ca9d9f371c0b2932f4 (patch)
tree188f5a33b0a043704c0a080519581ce1cf4f4083
parentcf9b0772f2e410645fece13b749bd56505b998b8 (diff)
afs: Fix file locking
Fix the AFS file locking whereby the use of the big kernel lock (which could be slept with) was replaced by a spinlock (which couldn't). The problem is that the AFS code was doing stuff inside the critical section that might call schedule(), so this is a broken transformation. Fix this by the following means: (1) Use a state machine with a proper state that can only be changed under the spinlock rather than using a collection of bit flags. (2) Cache the key used for the lock and the lock type in the afs_vnode struct so that the manager work function doesn't have to refer to a file_lock struct that's been dequeued. This makes signal handling safer. (4) Move the unlock from afs_do_unlk() to afs_fl_release_private() which means that unlock is achieved in other circumstances too. (5) Unlock the file on the server before taking the next conflicting lock. Also change: (1) Check the permits on a file before actually trying the lock. (2) fsync the file before effecting an explicit unlock operation. We don't fsync if the lock is erased otherwise as we might not be in a context where we can actually do that. Further fixes: (1) Fixed-fileserver address rotation is made to work. It's only used by the locking functions, so couldn't be tested before. Fixes: 72f98e72551f ("locks: turn lock_flocks into a spinlock") Signed-off-by: David Howells <dhowells@redhat.com> cc: jlayton@redhat.com
-rw-r--r--fs/afs/flock.c548
-rw-r--r--fs/afs/internal.h23
-rw-r--r--fs/afs/rotate.c70
-rw-r--r--fs/afs/security.c4
-rw-r--r--fs/afs/server_list.c2
5 files changed, 385 insertions, 262 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 7571a5dfd5a3..c40ba2fe3cbe 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -170,7 +170,7 @@ void afs_lock_work(struct work_struct *work)
170{ 170{
171 struct afs_vnode *vnode = 171 struct afs_vnode *vnode =
172 container_of(work, struct afs_vnode, lock_work.work); 172 container_of(work, struct afs_vnode, lock_work.work);
173 struct file_lock *fl; 173 struct file_lock *fl, *next;
174 afs_lock_type_t type; 174 afs_lock_type_t type;
175 struct key *key; 175 struct key *key;
176 int ret; 176 int ret;
@@ -179,117 +179,136 @@ void afs_lock_work(struct work_struct *work)
179 179
180 spin_lock(&vnode->lock); 180 spin_lock(&vnode->lock);
181 181
182 if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) { 182again:
183 _debug("wstate %u for %p", vnode->lock_state, vnode);
184 switch (vnode->lock_state) {
185 case AFS_VNODE_LOCK_NEED_UNLOCK:
183 _debug("unlock"); 186 _debug("unlock");
187 vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
184 spin_unlock(&vnode->lock); 188 spin_unlock(&vnode->lock);
185 189
186 /* attempt to release the server lock; if it fails, we just 190 /* attempt to release the server lock; if it fails, we just
187 * wait 5 minutes and it'll time out anyway */ 191 * wait 5 minutes and it'll expire anyway */
188 ret = afs_release_lock(vnode, vnode->unlock_key); 192 ret = afs_release_lock(vnode, vnode->lock_key);
189 if (ret < 0) 193 if (ret < 0)
190 printk(KERN_WARNING "AFS:" 194 printk(KERN_WARNING "AFS:"
191 " Failed to release lock on {%x:%x} error %d\n", 195 " Failed to release lock on {%x:%x} error %d\n",
192 vnode->fid.vid, vnode->fid.vnode, ret); 196 vnode->fid.vid, vnode->fid.vnode, ret);
193 197
194 spin_lock(&vnode->lock); 198 spin_lock(&vnode->lock);
195 key_put(vnode->unlock_key); 199 key_put(vnode->lock_key);
196 vnode->unlock_key = NULL; 200 vnode->lock_key = NULL;
197 clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags); 201 vnode->lock_state = AFS_VNODE_LOCK_NONE;
198 } 202
203 if (list_empty(&vnode->pending_locks)) {
204 spin_unlock(&vnode->lock);
205 return;
206 }
207
208 /* The new front of the queue now owns the state variables. */
209 next = list_entry(vnode->pending_locks.next,
210 struct file_lock, fl_u.afs.link);
211 vnode->lock_key = afs_file_key(next->fl_file);
212 vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
213 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
214 goto again;
199 215
200 /* if we've got a lock, then it must be time to extend that lock as AFS 216 /* If we've already got a lock, then it must be time to extend that
201 * locks time out after 5 minutes */ 217 * lock as AFS locks time out after 5 minutes.
202 if (!list_empty(&vnode->granted_locks)) { 218 */
219 case AFS_VNODE_LOCK_GRANTED:
203 _debug("extend"); 220 _debug("extend");
204 221
205 if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags)) 222 ASSERT(!list_empty(&vnode->granted_locks));
206 BUG(); 223
207 fl = list_entry(vnode->granted_locks.next, 224 key = key_get(vnode->lock_key);
208 struct file_lock, fl_u.afs.link); 225 vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
209 key = key_get(afs_file_key(fl->fl_file));
210 spin_unlock(&vnode->lock); 226 spin_unlock(&vnode->lock);
211 227
212 ret = afs_extend_lock(vnode, key); 228 ret = afs_extend_lock(vnode, key); /* RPC */
213 clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
214 key_put(key); 229 key_put(key);
215 switch (ret) { 230
216 case 0: 231 if (ret < 0)
232 pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
233 vnode->fid.vid, vnode->fid.vnode, ret);
234
235 spin_lock(&vnode->lock);
236
237 if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
238 goto again;
239 vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
240
241 if (ret == 0)
217 afs_schedule_lock_extension(vnode); 242 afs_schedule_lock_extension(vnode);
218 break; 243 else
219 default:
220 /* ummm... we failed to extend the lock - retry
221 * extension shortly */
222 printk(KERN_WARNING "AFS:"
223 " Failed to extend lock on {%x:%x} error %d\n",
224 vnode->fid.vid, vnode->fid.vnode, ret);
225 queue_delayed_work(afs_lock_manager, &vnode->lock_work, 244 queue_delayed_work(afs_lock_manager, &vnode->lock_work,
226 HZ * 10); 245 HZ * 10);
227 break; 246 spin_unlock(&vnode->lock);
228 } 247 _leave(" [ext]");
229 _leave(" [extend]");
230 return; 248 return;
231 }
232 249
233 /* if we don't have a granted lock, then we must've been called back by 250 /* If we don't have a granted lock, then we must've been called
234 * the server, and so if might be possible to get a lock we're 251 * back by the server, and so if might be possible to get a
235 * currently waiting for */ 252 * lock we're currently waiting for.
236 if (!list_empty(&vnode->pending_locks)) { 253 */
254 case AFS_VNODE_LOCK_WAITING_FOR_CB:
237 _debug("get"); 255 _debug("get");
238 256
239 if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags)) 257 key = key_get(vnode->lock_key);
240 BUG(); 258 type = vnode->lock_type;
241 fl = list_entry(vnode->pending_locks.next, 259 vnode->lock_state = AFS_VNODE_LOCK_SETTING;
242 struct file_lock, fl_u.afs.link);
243 key = key_get(afs_file_key(fl->fl_file));
244 type = (fl->fl_type == F_RDLCK) ?
245 AFS_LOCK_READ : AFS_LOCK_WRITE;
246 spin_unlock(&vnode->lock); 260 spin_unlock(&vnode->lock);
247 261
248 ret = afs_set_lock(vnode, key, type); 262 ret = afs_set_lock(vnode, key, type); /* RPC */
249 clear_bit(AFS_VNODE_LOCKING, &vnode->flags); 263 key_put(key);
264
265 spin_lock(&vnode->lock);
250 switch (ret) { 266 switch (ret) {
251 case -EWOULDBLOCK: 267 case -EWOULDBLOCK:
252 _debug("blocked"); 268 _debug("blocked");
253 break; 269 break;
254 case 0: 270 case 0:
255 _debug("acquired"); 271 _debug("acquired");
256 if (type == AFS_LOCK_READ) 272 vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
257 set_bit(AFS_VNODE_READLOCKED, &vnode->flags); 273 /* Fall through */
258 else
259 set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
260 ret = AFS_LOCK_GRANTED;
261 default: 274 default:
262 spin_lock(&vnode->lock); 275 /* Pass the lock or the error onto the first locker in
263 /* the pending lock may have been withdrawn due to a 276 * the list - if they're looking for this type of lock.
264 * signal */ 277 * If they're not, we assume that whoever asked for it
265 if (list_entry(vnode->pending_locks.next, 278 * took a signal.
266 struct file_lock, fl_u.afs.link) == fl) { 279 */
267 fl->fl_u.afs.state = ret; 280 if (list_empty(&vnode->pending_locks)) {
268 if (ret == AFS_LOCK_GRANTED)
269 afs_grant_locks(vnode, fl);
270 else
271 list_del_init(&fl->fl_u.afs.link);
272 wake_up(&fl->fl_wait);
273 spin_unlock(&vnode->lock);
274 } else {
275 _debug("withdrawn"); 281 _debug("withdrawn");
276 clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); 282 vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
277 clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); 283 goto again;
278 spin_unlock(&vnode->lock);
279 afs_release_lock(vnode, key);
280 if (!list_empty(&vnode->pending_locks))
281 afs_lock_may_be_available(vnode);
282 } 284 }
283 break; 285
286 fl = list_entry(vnode->pending_locks.next,
287 struct file_lock, fl_u.afs.link);
288 type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
289 if (vnode->lock_type != type) {
290 _debug("changed");
291 vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
292 goto again;
293 }
294
295 fl->fl_u.afs.state = ret;
296 if (ret == 0)
297 afs_grant_locks(vnode, fl);
298 else
299 list_del_init(&fl->fl_u.afs.link);
300 wake_up(&fl->fl_wait);
301 spin_unlock(&vnode->lock);
302 _leave(" [granted]");
303 return;
284 } 304 }
285 key_put(key); 305
286 _leave(" [pend]"); 306 default:
307 /* Looks like a lock request was withdrawn. */
308 spin_unlock(&vnode->lock);
309 _leave(" [no]");
287 return; 310 return;
288 } 311 }
289
290 /* looks like the lock request was withdrawn on a signal */
291 spin_unlock(&vnode->lock);
292 _leave(" [no locks]");
293} 312}
294 313
295/* 314/*
@@ -298,15 +317,105 @@ void afs_lock_work(struct work_struct *work)
298 * AF_RXRPC 317 * AF_RXRPC
299 * - the caller must hold the vnode lock 318 * - the caller must hold the vnode lock
300 */ 319 */
301static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) 320static void afs_defer_unlock(struct afs_vnode *vnode)
302{ 321{
303 cancel_delayed_work(&vnode->lock_work); 322 _enter("");
304 if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) && 323
305 !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags)) 324 if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
306 BUG(); 325 vnode->lock_state == AFS_VNODE_LOCK_EXTENDING) {
307 if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) 326 cancel_delayed_work(&vnode->lock_work);
308 BUG(); 327
309 vnode->unlock_key = key_get(key); 328 vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
329 afs_lock_may_be_available(vnode);
330 }
331}
332
333/*
334 * Check that our view of the file metadata is up to date and check to see
335 * whether we think that we have a locking permit.
336 */
337static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
338 afs_lock_type_t type, bool can_sleep)
339{
340 afs_access_t access;
341 int ret;
342
343 /* Make sure we've got a callback on this file and that our view of the
344 * data version is up to date.
345 */
346 ret = afs_validate(vnode, key);
347 if (ret < 0)
348 return ret;
349
350 /* Check the permission set to see if we're actually going to be
351 * allowed to get a lock on this file.
352 */
353 ret = afs_check_permit(vnode, key, &access);
354 if (ret < 0)
355 return ret;
356
357 /* At a rough estimation, you need LOCK, WRITE or INSERT perm to
358 * read-lock a file and WRITE or INSERT perm to write-lock a file.
359 *
360 * We can't rely on the server to do this for us since if we want to
361 * share a read lock that we already have, we won't go the server.
362 */
363 if (type == AFS_LOCK_READ) {
364 if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
365 return -EACCES;
366 if (vnode->status.lock_count == -1 && !can_sleep)
367 return -EAGAIN; /* Write locked */
368 } else {
369 if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
370 return -EACCES;
371 if (vnode->status.lock_count != 0 && !can_sleep)
372 return -EAGAIN; /* Locked */
373 }
374
375 return 0;
376}
377
378/*
379 * Remove the front runner from the pending queue.
380 * - The caller must hold vnode->lock.
381 */
382static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
383{
384 struct file_lock *next;
385
386 _enter("");
387
388 /* ->lock_type, ->lock_key and ->lock_state only belong to this
389 * file_lock if we're at the front of the pending queue or if we have
390 * the lock granted or if the lock_state is NEED_UNLOCK or UNLOCKING.
391 */
392 if (vnode->granted_locks.next == &fl->fl_u.afs.link &&
393 vnode->granted_locks.prev == &fl->fl_u.afs.link) {
394 list_del_init(&fl->fl_u.afs.link);
395 afs_defer_unlock(vnode);
396 return;
397 }
398
399 if (!list_empty(&vnode->granted_locks) ||
400 vnode->pending_locks.next != &fl->fl_u.afs.link) {
401 list_del_init(&fl->fl_u.afs.link);
402 return;
403 }
404
405 list_del_init(&fl->fl_u.afs.link);
406 key_put(vnode->lock_key);
407 vnode->lock_key = NULL;
408 vnode->lock_state = AFS_VNODE_LOCK_NONE;
409
410 if (list_empty(&vnode->pending_locks))
411 return;
412
413 /* The new front of the queue now owns the state variables. */
414 next = list_entry(vnode->pending_locks.next,
415 struct file_lock, fl_u.afs.link);
416 vnode->lock_key = afs_file_key(next->fl_file);
417 vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
418 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
310 afs_lock_may_be_available(vnode); 419 afs_lock_may_be_available(vnode);
311} 420}
312 421
@@ -315,7 +424,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
315 */ 424 */
316static int afs_do_setlk(struct file *file, struct file_lock *fl) 425static int afs_do_setlk(struct file *file, struct file_lock *fl)
317{ 426{
318 struct inode *inode = file_inode(file); 427 struct inode *inode = locks_inode(file);
319 struct afs_vnode *vnode = AFS_FS_I(inode); 428 struct afs_vnode *vnode = AFS_FS_I(inode);
320 afs_lock_type_t type; 429 afs_lock_type_t type;
321 struct key *key = afs_file_key(file); 430 struct key *key = afs_file_key(file);
@@ -333,165 +442,136 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
333 442
334 type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; 443 type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
335 444
336 spin_lock(&inode->i_lock); 445 ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
337
338 /* make sure we've got a callback on this file and that our view of the
339 * data version is up to date */
340 ret = afs_validate(vnode, key);
341 if (ret < 0) 446 if (ret < 0)
342 goto error; 447 return ret;
343
344 if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
345 ret = -EAGAIN;
346 goto error;
347 }
348 448
349 spin_lock(&vnode->lock); 449 spin_lock(&vnode->lock);
350 450
351 /* if we've already got a readlock on the server then we can instantly 451 /* If we've already got a readlock on the server then we instantly
352 * grant another readlock, irrespective of whether there are any 452 * grant another readlock, irrespective of whether there are any
353 * pending writelocks */ 453 * pending writelocks.
454 */
354 if (type == AFS_LOCK_READ && 455 if (type == AFS_LOCK_READ &&
355 vnode->flags & (1 << AFS_VNODE_READLOCKED)) { 456 vnode->lock_state == AFS_VNODE_LOCK_GRANTED &&
457 vnode->lock_type == AFS_LOCK_READ) {
356 _debug("instant readlock"); 458 _debug("instant readlock");
357 ASSERTCMP(vnode->flags &
358 ((1 << AFS_VNODE_LOCKING) |
359 (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
360 ASSERT(!list_empty(&vnode->granted_locks)); 459 ASSERT(!list_empty(&vnode->granted_locks));
361 goto sharing_existing_lock; 460 goto share_existing_lock;
362 } 461 }
363 462
364 /* if there's no-one else with a lock on this vnode, then we need to 463 list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
365 * ask the server for a lock */
366 if (list_empty(&vnode->pending_locks) &&
367 list_empty(&vnode->granted_locks)) {
368 _debug("not locked");
369 ASSERTCMP(vnode->flags &
370 ((1 << AFS_VNODE_LOCKING) |
371 (1 << AFS_VNODE_READLOCKED) |
372 (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
373 list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
374 set_bit(AFS_VNODE_LOCKING, &vnode->flags);
375 spin_unlock(&vnode->lock);
376 464
377 ret = afs_set_lock(vnode, key, type); 465 if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
378 clear_bit(AFS_VNODE_LOCKING, &vnode->flags); 466 goto need_to_wait;
379 switch (ret) {
380 case 0:
381 _debug("acquired");
382 goto acquired_server_lock;
383 case -EWOULDBLOCK:
384 _debug("would block");
385 spin_lock(&vnode->lock);
386 ASSERT(list_empty(&vnode->granted_locks));
387 ASSERTCMP(vnode->pending_locks.next, ==,
388 &fl->fl_u.afs.link);
389 goto wait;
390 default:
391 spin_lock(&vnode->lock);
392 list_del_init(&fl->fl_u.afs.link);
393 spin_unlock(&vnode->lock);
394 goto error;
395 }
396 }
397 467
398 /* otherwise, we need to wait for a local lock to become available */ 468 /* We don't have a lock on this vnode and we aren't currently waiting
399 _debug("wait local"); 469 * for one either, so ask the server for a lock.
400 list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks); 470 *
401wait: 471 * Note that we need to be careful if we get interrupted by a signal
402 if (!(fl->fl_flags & FL_SLEEP)) { 472 * after dispatching the request as we may still get the lock, even
403 _debug("noblock"); 473 * though we don't wait for the reply (it's not too bad a problem - the
404 ret = -EAGAIN; 474 * lock will expire in 10 mins anyway).
405 goto abort_attempt; 475 */
406 } 476 _debug("not locked");
477 vnode->lock_key = key_get(key);
478 vnode->lock_type = type;
479 vnode->lock_state = AFS_VNODE_LOCK_SETTING;
407 spin_unlock(&vnode->lock); 480 spin_unlock(&vnode->lock);
408 481
409 /* now we need to sleep and wait for the lock manager thread to get the 482 ret = afs_set_lock(vnode, key, type); /* RPC */
410 * lock from the server */
411 _debug("sleep");
412 ret = wait_event_interruptible(fl->fl_wait,
413 fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
414 if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
415 ret = fl->fl_u.afs.state;
416 if (ret < 0)
417 goto error;
418 spin_lock(&vnode->lock);
419 goto given_lock;
420 }
421
422 /* we were interrupted, but someone may still be in the throes of
423 * giving us the lock */
424 _debug("intr");
425 ASSERTCMP(ret, ==, -ERESTARTSYS);
426 483
427 spin_lock(&vnode->lock); 484 spin_lock(&vnode->lock);
428 if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) { 485 switch (ret) {
429 ret = fl->fl_u.afs.state; 486 default:
430 if (ret < 0) { 487 goto abort_attempt;
431 spin_unlock(&vnode->lock);
432 goto error;
433 }
434 goto given_lock;
435 }
436 488
437abort_attempt: 489 case -EWOULDBLOCK:
438 /* we aren't going to get the lock, either because we're unwilling to 490 /* The server doesn't have a lock-waiting queue, so the client
439 * wait, or because some signal happened */ 491 * will have to retry. The server will break the outstanding
440 _debug("abort"); 492 * callbacks on a file when a lock is released.
441 if (list_empty(&vnode->granted_locks) && 493 */
442 vnode->pending_locks.next == &fl->fl_u.afs.link) { 494 _debug("would block");
443 if (vnode->pending_locks.prev != &fl->fl_u.afs.link) { 495 ASSERT(list_empty(&vnode->granted_locks));
444 /* kick the next pending lock into having a go */ 496 ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
445 list_del_init(&fl->fl_u.afs.link); 497 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
446 afs_lock_may_be_available(vnode); 498 goto need_to_wait;
447 } 499
448 } else { 500 case 0:
449 list_del_init(&fl->fl_u.afs.link); 501 _debug("acquired");
502 break;
450 } 503 }
451 spin_unlock(&vnode->lock);
452 goto error;
453 504
454acquired_server_lock:
455 /* we've acquired a server lock, but it needs to be renewed after 5 505 /* we've acquired a server lock, but it needs to be renewed after 5
456 * mins */ 506 * mins */
457 spin_lock(&vnode->lock); 507 vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
458 afs_schedule_lock_extension(vnode); 508 afs_schedule_lock_extension(vnode);
459 if (type == AFS_LOCK_READ) 509
460 set_bit(AFS_VNODE_READLOCKED, &vnode->flags); 510share_existing_lock:
461 else
462 set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
463sharing_existing_lock:
464 /* the lock has been granted as far as we're concerned... */ 511 /* the lock has been granted as far as we're concerned... */
465 fl->fl_u.afs.state = AFS_LOCK_GRANTED; 512 fl->fl_u.afs.state = AFS_LOCK_GRANTED;
466 list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks); 513 list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
514
467given_lock: 515given_lock:
468 /* ... but we do still need to get the VFS's blessing */ 516 /* ... but we do still need to get the VFS's blessing */
469 ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING))); 517 spin_unlock(&vnode->lock);
470 ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) | 518
471 (1 << AFS_VNODE_WRITELOCKED))) != 0);
472 ret = posix_lock_file(file, fl, NULL); 519 ret = posix_lock_file(file, fl, NULL);
473 if (ret < 0) 520 if (ret < 0)
474 goto vfs_rejected_lock; 521 goto vfs_rejected_lock;
475 spin_unlock(&vnode->lock);
476 522
477 /* again, make sure we've got a callback on this file and, again, make 523 /* Again, make sure we've got a callback on this file and, again, make
478 * sure that our view of the data version is up to date (we ignore 524 * sure that our view of the data version is up to date (we ignore
479 * errors incurred here and deal with the consequences elsewhere) */ 525 * errors incurred here and deal with the consequences elsewhere).
526 */
480 afs_validate(vnode, key); 527 afs_validate(vnode, key);
528 _leave(" = 0");
529 return 0;
481 530
482error: 531need_to_wait:
483 spin_unlock(&inode->i_lock); 532 /* We're going to have to wait. Either this client doesn't have a lock
533 * on the server yet and we need to wait for a callback to occur, or
534 * the client does have a lock on the server, but it belongs to some
535 * other process(es) and is incompatible with the lock we want.
536 */
537 ret = -EAGAIN;
538 if (fl->fl_flags & FL_SLEEP) {
539 spin_unlock(&vnode->lock);
540
541 _debug("sleep");
542 ret = wait_event_interruptible(fl->fl_wait,
543 fl->fl_u.afs.state != AFS_LOCK_PENDING);
544
545 spin_lock(&vnode->lock);
546 }
547
548 if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
549 goto given_lock;
550 if (fl->fl_u.afs.state < 0)
551 ret = fl->fl_u.afs.state;
552
553abort_attempt:
554 /* we aren't going to get the lock, either because we're unwilling to
555 * wait, or because some signal happened */
556 _debug("abort");
557 afs_dequeue_lock(vnode, fl);
558
559error_unlock:
560 spin_unlock(&vnode->lock);
484 _leave(" = %d", ret); 561 _leave(" = %d", ret);
485 return ret; 562 return ret;
486 563
487vfs_rejected_lock: 564vfs_rejected_lock:
488 /* the VFS rejected the lock we just obtained, so we have to discard 565 /* The VFS rejected the lock we just obtained, so we have to discard
489 * what we just got */ 566 * what we just got. We defer this to the lock manager work item to
567 * deal with.
568 */
490 _debug("vfs refused %d", ret); 569 _debug("vfs refused %d", ret);
570 spin_lock(&vnode->lock);
491 list_del_init(&fl->fl_u.afs.link); 571 list_del_init(&fl->fl_u.afs.link);
492 if (list_empty(&vnode->granted_locks)) 572 if (list_empty(&vnode->granted_locks))
493 afs_defer_unlock(vnode, key); 573 afs_defer_unlock(vnode);
494 goto abort_attempt; 574 goto error_unlock;
495} 575}
496 576
497/* 577/*
@@ -499,34 +579,21 @@ vfs_rejected_lock:
499 */ 579 */
500static int afs_do_unlk(struct file *file, struct file_lock *fl) 580static int afs_do_unlk(struct file *file, struct file_lock *fl)
501{ 581{
502 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); 582 struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
503 struct key *key = afs_file_key(file);
504 int ret; 583 int ret;
505 584
506 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); 585 _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
507 586
587 /* Flush all pending writes before doing anything with locks. */
588 vfs_fsync(file, 0);
589
508 /* only whole-file unlocks are supported */ 590 /* only whole-file unlocks are supported */
509 if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) 591 if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
510 return -EINVAL; 592 return -EINVAL;
511 593
512 fl->fl_ops = &afs_lock_ops;
513 INIT_LIST_HEAD(&fl->fl_u.afs.link);
514 fl->fl_u.afs.state = AFS_LOCK_PENDING;
515
516 spin_lock(&vnode->lock);
517 ret = posix_lock_file(file, fl, NULL); 594 ret = posix_lock_file(file, fl, NULL);
518 if (ret < 0) { 595 _leave(" = %d [%u]", ret, vnode->lock_state);
519 spin_unlock(&vnode->lock); 596 return ret;
520 _leave(" = %d [vfs]", ret);
521 return ret;
522 }
523
524 /* discard the server lock only if all granted locks are gone */
525 if (list_empty(&vnode->granted_locks))
526 afs_defer_unlock(vnode, key);
527 spin_unlock(&vnode->lock);
528 _leave(" = 0");
529 return 0;
530} 597}
531 598
532/* 599/*
@@ -534,7 +601,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
534 */ 601 */
535static int afs_do_getlk(struct file *file, struct file_lock *fl) 602static int afs_do_getlk(struct file *file, struct file_lock *fl)
536{ 603{
537 struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); 604 struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
538 struct key *key = afs_file_key(file); 605 struct key *key = afs_file_key(file);
539 int ret, lock_count; 606 int ret, lock_count;
540 607
@@ -542,29 +609,25 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
542 609
543 fl->fl_type = F_UNLCK; 610 fl->fl_type = F_UNLCK;
544 611
545 inode_lock(&vnode->vfs_inode);
546
547 /* check local lock records first */ 612 /* check local lock records first */
548 ret = 0;
549 posix_test_lock(file, fl); 613 posix_test_lock(file, fl);
550 if (fl->fl_type == F_UNLCK) { 614 if (fl->fl_type == F_UNLCK) {
551 /* no local locks; consult the server */ 615 /* no local locks; consult the server */
552 ret = afs_fetch_status(vnode, key); 616 ret = afs_fetch_status(vnode, key);
553 if (ret < 0) 617 if (ret < 0)
554 goto error; 618 goto error;
555 lock_count = vnode->status.lock_count; 619
556 if (lock_count) { 620 lock_count = READ_ONCE(vnode->status.lock_count);
557 if (lock_count > 0) 621 if (lock_count > 0)
558 fl->fl_type = F_RDLCK; 622 fl->fl_type = F_RDLCK;
559 else 623 else
560 fl->fl_type = F_WRLCK; 624 fl->fl_type = F_WRLCK;
561 fl->fl_start = 0; 625 fl->fl_start = 0;
562 fl->fl_end = OFFSET_MAX; 626 fl->fl_end = OFFSET_MAX;
563 }
564 } 627 }
565 628
629 ret = 0;
566error: 630error:
567 inode_unlock(&vnode->vfs_inode);
568 _leave(" = %d [%hd]", ret, fl->fl_type); 631 _leave(" = %d [%hd]", ret, fl->fl_type);
569 return ret; 632 return ret;
570} 633}
@@ -574,7 +637,7 @@ error:
574 */ 637 */
575int afs_lock(struct file *file, int cmd, struct file_lock *fl) 638int afs_lock(struct file *file, int cmd, struct file_lock *fl)
576{ 639{
577 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 640 struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
578 641
579 _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", 642 _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
580 vnode->fid.vid, vnode->fid.vnode, cmd, 643 vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -597,7 +660,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
597 */ 660 */
598int afs_flock(struct file *file, int cmd, struct file_lock *fl) 661int afs_flock(struct file *file, int cmd, struct file_lock *fl)
599{ 662{
600 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 663 struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
601 664
602 _enter("{%x:%u},%d,{t=%x,fl=%x}", 665 _enter("{%x:%u},%d,{t=%x,fl=%x}",
603 vnode->fid.vid, vnode->fid.vnode, cmd, 666 vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -627,9 +690,13 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
627 */ 690 */
628static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl) 691static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
629{ 692{
693 struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
694
630 _enter(""); 695 _enter("");
631 696
697 spin_lock(&vnode->lock);
632 list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link); 698 list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
699 spin_unlock(&vnode->lock);
633} 700}
634 701
635/* 702/*
@@ -638,7 +705,12 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
638 */ 705 */
639static void afs_fl_release_private(struct file_lock *fl) 706static void afs_fl_release_private(struct file_lock *fl)
640{ 707{
708 struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
709
641 _enter(""); 710 _enter("");
642 711
643 list_del_init(&fl->fl_u.afs.link); 712 spin_lock(&vnode->lock);
713 afs_dequeue_lock(vnode, fl);
714 _debug("state %u for %p", vnode->lock_state, vnode);
715 spin_unlock(&vnode->lock);
644} 716}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bd8dcee7e066..e03910cebdd4 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -430,6 +430,16 @@ struct afs_volume {
430 u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ 430 u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
431}; 431};
432 432
433enum afs_lock_state {
434 AFS_VNODE_LOCK_NONE, /* The vnode has no lock on the server */
435 AFS_VNODE_LOCK_WAITING_FOR_CB, /* We're waiting for the server to break the callback */
436 AFS_VNODE_LOCK_SETTING, /* We're asking the server for a lock */
437 AFS_VNODE_LOCK_GRANTED, /* We have a lock on the server */
438 AFS_VNODE_LOCK_EXTENDING, /* We're extending a lock on the server */
439 AFS_VNODE_LOCK_NEED_UNLOCK, /* We need to unlock on the server */
440 AFS_VNODE_LOCK_UNLOCKING, /* We're telling the server to unlock */
441};
442
433/* 443/*
434 * AFS inode private data 444 * AFS inode private data
435 */ 445 */
@@ -454,18 +464,16 @@ struct afs_vnode {
454#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ 464#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
455#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ 465#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
456#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ 466#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
457#define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */ 467#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
458#define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ 468#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
459#define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */
460#define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */
461#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
462#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
463 469
464 struct list_head wb_keys; /* List of keys available for writeback */ 470 struct list_head wb_keys; /* List of keys available for writeback */
465 struct list_head pending_locks; /* locks waiting to be granted */ 471 struct list_head pending_locks; /* locks waiting to be granted */
466 struct list_head granted_locks; /* locks granted on this file */ 472 struct list_head granted_locks; /* locks granted on this file */
467 struct delayed_work lock_work; /* work to be done in locking */ 473 struct delayed_work lock_work; /* work to be done in locking */
468 struct key *unlock_key; /* key to be used in unlocking */ 474 struct key *lock_key; /* Key to be used in lock ops */
475 enum afs_lock_state lock_state : 8;
476 afs_lock_type_t lock_type : 8;
469 477
470 /* outstanding callback notification on this file */ 478 /* outstanding callback notification on this file */
471 struct afs_cb_interest *cb_interest; /* Server on which this resides */ 479 struct afs_cb_interest *cb_interest; /* Server on which this resides */
@@ -843,6 +851,7 @@ extern void afs_clear_permits(struct afs_vnode *);
843extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int); 851extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
844extern void afs_zap_permits(struct rcu_head *); 852extern void afs_zap_permits(struct rcu_head *);
845extern struct key *afs_request_key(struct afs_cell *); 853extern struct key *afs_request_key(struct afs_cell *);
854extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
846extern int afs_permission(struct inode *, int); 855extern int afs_permission(struct inode *, int);
847extern void __exit afs_clean_up_permit_cache(void); 856extern void __exit afs_clean_up_permit_cache(void);
848 857
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index e728ca1776c9..d04511fb3879 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -46,8 +46,7 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
46 return false; 46 return false;
47 } 47 }
48 48
49 if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) || 49 if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
50 test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
51 fc->flags |= AFS_FS_CURSOR_CUR_ONLY; 50 fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
52 return true; 51 return true;
53} 52}
@@ -117,7 +116,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
117 case VSALVAGING: m = "being salvaged"; break; 116 case VSALVAGING: m = "being salvaged"; break;
118 default: m = "busy"; break; 117 default: m = "busy"; break;
119 } 118 }
120 119
121 pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); 120 pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
122} 121}
123 122
@@ -438,24 +437,67 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
438 437
439 _enter(""); 438 _enter("");
440 439
441 if (!cbi) { 440 switch (fc->ac.error) {
442 fc->ac.error = -ESTALE; 441 case SHRT_MAX:
442 if (!cbi) {
443 fc->ac.error = -ESTALE;
444 fc->flags |= AFS_FS_CURSOR_STOP;
445 return false;
446 }
447
448 fc->cbi = afs_get_cb_interest(vnode->cb_interest);
449
450 read_lock(&cbi->server->fs_lock);
451 alist = rcu_dereference_protected(cbi->server->addresses,
452 lockdep_is_held(&cbi->server->fs_lock));
453 afs_get_addrlist(alist);
454 read_unlock(&cbi->server->fs_lock);
455 if (!alist) {
456 fc->ac.error = -ESTALE;
457 fc->flags |= AFS_FS_CURSOR_STOP;
458 return false;
459 }
460
461 fc->ac.alist = alist;
462 fc->ac.addr = NULL;
463 fc->ac.start = READ_ONCE(alist->index);
464 fc->ac.index = fc->ac.start;
465 fc->ac.error = 0;
466 fc->ac.begun = false;
467 goto iterate_address;
468
469 case 0:
470 default:
471 /* Success or local failure. Stop. */
443 fc->flags |= AFS_FS_CURSOR_STOP; 472 fc->flags |= AFS_FS_CURSOR_STOP;
473 _leave(" = f [okay/local %d]", fc->ac.error);
444 return false; 474 return false;
445 }
446 475
447 read_lock(&cbi->server->fs_lock); 476 case -ECONNABORTED:
448 alist = afs_get_addrlist(cbi->server->addresses);
449 read_unlock(&cbi->server->fs_lock);
450 if (!alist) {
451 fc->ac.error = -ESTALE;
452 fc->flags |= AFS_FS_CURSOR_STOP; 477 fc->flags |= AFS_FS_CURSOR_STOP;
478 _leave(" = f [abort]");
453 return false; 479 return false;
480
481 case -ENETUNREACH:
482 case -EHOSTUNREACH:
483 case -ECONNREFUSED:
484 case -ETIMEDOUT:
485 case -ETIME:
486 _debug("no conn");
487 goto iterate_address;
454 } 488 }
455 489
456 fc->ac.alist = alist; 490iterate_address:
457 fc->ac.error = 0; 491 /* Iterate over the current server's address list to try and find an
458 return true; 492 * address on which it will respond to us.
493 */
494 if (afs_iterate_addresses(&fc->ac)) {
495 _leave(" = t");
496 return true;
497 }
498
499 afs_end_cursor(&fc->ac);
500 return false;
459} 501}
460 502
461/* 503/*
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 46a881a4d08f..2b00097101b3 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,8 +284,8 @@ someone_else_changed_it:
284 * permitted to be accessed with this authorisation, and if so, what access it 284 * permitted to be accessed with this authorisation, and if so, what access it
285 * is granted 285 * is granted
286 */ 286 */
287static int afs_check_permit(struct afs_vnode *vnode, struct key *key, 287int afs_check_permit(struct afs_vnode *vnode, struct key *key,
288 afs_access_t *_access) 288 afs_access_t *_access)
289{ 289{
290 struct afs_permits *permits; 290 struct afs_permits *permits;
291 bool valid = false; 291 bool valid = false;
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 26bad7032bba..0ab3f8457839 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
17{ 17{
18 int i; 18 int i;
19 19
20 if (refcount_dec_and_test(&slist->usage)) { 20 if (slist && refcount_dec_and_test(&slist->usage)) {
21 for (i = 0; i < slist->nr_servers; i++) { 21 for (i = 0; i < slist->nr_servers; i++) {
22 afs_put_cb_interest(net, slist->servers[i].cb_interest); 22 afs_put_cb_interest(net, slist->servers[i].cb_interest);
23 afs_put_server(net, slist->servers[i].server); 23 afs_put_server(net, slist->servers[i].server);