aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-08 21:13:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-08 21:13:49 -0400
commite5c832d5558826cc6e9a24746cfdec8e7780063a (patch)
tree2b40ee4754dc80b81018ac91282ade4bdcd3c562 /fs
parent0d98439ea3c6ffb2af931f6de4480e744634e2c5 (diff)
vfs: fix dentry RCU to refcounting possibly sleeping dput()
This is the fix that the last two commits indirectly led up to - making sure that we don't call dput() in a bad context on the dentries we've looked up in RCU mode after the sequence count validation fails. This basically expands d_rcu_to_refcount() into the callers, and then fixes the callers to delay the dput() in the failure case until _after_ we've dropped all locks and are no longer in an RCU-locked region. The case of 'complete_walk()' was trivial, since its failure case did the unlock_rcu_walk() directly after the call to d_rcu_to_refcount(), and as such that is just a pure expansion of the function with a trivial movement of the resulting dput() to after 'unlock_rcu_walk()'. In contrast, the unlazy_walk() case was much more complicated, because not only does convert two different dentries from RCU to be reference counted, but it used to not call unlock_rcu_walk() at all, and instead just returned an error and let the caller clean everything up in "terminate_walk()". Happily, one of the dentries in question (called "parent" inside unlazy_walk()) is the dentry of "nd->path", which terminate_walk() wants a refcount to anyway for the non-RCU case. So what the new and improved unlazy_walk() does is to first turn that dentry into a refcounted one, and once that is set up, the error cases can continue to use the terminate_walk() helper for cleanup, but for the non-RCU case. Which makes it possible to drop out of RCU mode if we actually hit the sequence number failure case. Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/namei.c102
1 files changed, 49 insertions, 53 deletions
diff --git a/fs/namei.c b/fs/namei.c
index cc4bcfaa8624..56e4f4d537d0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,37 +494,6 @@ static inline void unlock_rcu_walk(void)
494 br_read_unlock(&vfsmount_lock); 494 br_read_unlock(&vfsmount_lock);
495} 495}
496 496
497/*
498 * When we move over from the RCU domain to properly refcounted
499 * long-lived dentries, we need to check the sequence numbers
500 * we got before lookup very carefully.
501 *
502 * We cannot blindly increment a dentry refcount - even if it
503 * is not locked - if it is zero, because it may have gone
504 * through the final d_kill() logic already.
505 *
506 * So for a zero refcount, we need to get the spinlock (which is
507 * safe even for a dead dentry because the de-allocation is
508 * RCU-delayed), and check the sequence count under the lock.
509 *
510 * Once we have checked the sequence count, we know it is live,
511 * and since we hold the spinlock it cannot die from under us.
512 *
513 * In contrast, if the reference count wasn't zero, we can just
514 * increment the lockref without having to take the spinlock.
515 * Even if the sequence number ends up being stale, we haven't
516 * gone through the final dput() and killed the dentry yet.
517 */
518static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
519{
520 if (likely(lockref_get_not_dead(&dentry->d_lockref))) {
521 if (!read_seqcount_retry(validate, seq))
522 return 0;
523 dput(dentry);
524 }
525 return -ECHILD;
526}
527
528/** 497/**
529 * unlazy_walk - try to switch to ref-walk mode. 498 * unlazy_walk - try to switch to ref-walk mode.
530 * @nd: nameidata pathwalk data 499 * @nd: nameidata pathwalk data
@@ -539,16 +508,29 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
539{ 508{
540 struct fs_struct *fs = current->fs; 509 struct fs_struct *fs = current->fs;
541 struct dentry *parent = nd->path.dentry; 510 struct dentry *parent = nd->path.dentry;
542 int want_root = 0;
543 511
544 BUG_ON(!(nd->flags & LOOKUP_RCU)); 512 BUG_ON(!(nd->flags & LOOKUP_RCU));
545 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 513
546 want_root = 1; 514 /*
547 spin_lock(&fs->lock); 515 * Get a reference to the parent first: we're
548 if (nd->root.mnt != fs->root.mnt || 516 * going to make "path_put(nd->path)" valid in
549 nd->root.dentry != fs->root.dentry) 517 * non-RCU context for "terminate_walk()".
550 goto err_root; 518 *
551 } 519 * If this doesn't work, return immediately with
520 * RCU walking still active (and then we will do
521 * the RCU walk cleanup in terminate_walk()).
522 */
523 if (!lockref_get_not_dead(&parent->d_lockref))
524 return -ECHILD;
525
526 /*
527 * After the mntget(), we terminate_walk() will do
528 * the right thing for non-RCU mode, and all our
529 * subsequent exit cases should unlock_rcu_walk()
530 * before returning.
531 */
532 mntget(nd->path.mnt);
533 nd->flags &= ~LOOKUP_RCU;
552 534
553 /* 535 /*
554 * For a negative lookup, the lookup sequence point is the parents 536 * For a negative lookup, the lookup sequence point is the parents
@@ -562,30 +544,39 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
562 * be valid if the child sequence number is still valid. 544 * be valid if the child sequence number is still valid.
563 */ 545 */
564 if (!dentry) { 546 if (!dentry) {
565 if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0) 547 if (read_seqcount_retry(&parent->d_seq, nd->seq))
566 goto err_root; 548 goto out;
567 BUG_ON(nd->inode != parent->d_inode); 549 BUG_ON(nd->inode != parent->d_inode);
568 } else { 550 } else {
569 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) 551 if (!lockref_get_not_dead(&dentry->d_lockref))
570 goto err_root; 552 goto out;
571 if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0) 553 if (read_seqcount_retry(&dentry->d_seq, nd->seq))
572 goto err_parent; 554 goto drop_dentry;
573 } 555 }
574 if (want_root) { 556
557 /*
558 * Sequence counts matched. Now make sure that the root is
559 * still valid and get it if required.
560 */
561 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
562 spin_lock(&fs->lock);
563 if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
564 goto unlock_and_drop_dentry;
575 path_get(&nd->root); 565 path_get(&nd->root);
576 spin_unlock(&fs->lock); 566 spin_unlock(&fs->lock);
577 } 567 }
578 mntget(nd->path.mnt);
579 568
580 unlock_rcu_walk(); 569 unlock_rcu_walk();
581 nd->flags &= ~LOOKUP_RCU;
582 return 0; 570 return 0;
583 571
584err_parent: 572unlock_and_drop_dentry:
573 spin_unlock(&fs->lock);
574drop_dentry:
575 unlock_rcu_walk();
585 dput(dentry); 576 dput(dentry);
586err_root: 577 return -ECHILD;
587 if (want_root) 578out:
588 spin_unlock(&fs->lock); 579 unlock_rcu_walk();
589 return -ECHILD; 580 return -ECHILD;
590} 581}
591 582
@@ -614,10 +605,15 @@ static int complete_walk(struct nameidata *nd)
614 if (!(nd->flags & LOOKUP_ROOT)) 605 if (!(nd->flags & LOOKUP_ROOT))
615 nd->root.mnt = NULL; 606 nd->root.mnt = NULL;
616 607
617 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) { 608 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
618 unlock_rcu_walk(); 609 unlock_rcu_walk();
619 return -ECHILD; 610 return -ECHILD;
620 } 611 }
612 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
613 unlock_rcu_walk();
614 dput(dentry);
615 return -ECHILD;
616 }
621 mntget(nd->path.mnt); 617 mntget(nd->path.mnt);
622 unlock_rcu_walk(); 618 unlock_rcu_walk();
623 } 619 }