aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-02 14:38:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-02 14:38:06 -0400
commit15570086b590a69d59183b08a7770e316cca20a7 (patch)
tree12c1494a59a3786bc99674567aa9b77af40a3698 /fs/namei.c
parentdf3d0bbcdb2cafa23a70223d806655bd37e64a9b (diff)
vfs: reimplement d_rcu_to_refcount() using lockref_get_or_lock()
This moves __d_rcu_to_refcount() from <linux/dcache.h> into fs/namei.c and re-implements it using the lockref infrastructure instead. It also adds a lot of comments about what is actually going on, because turning a dentry that was looked up using RCU into a long-lived reference counted entry is one of the more subtle parts of the rcu walk. We also used to be _particularly_ subtle in unlazy_walk() where we re-validate both the dentry and its parent using the same sequence count. We used to do it by nesting the locks and then verifying the sequence count just once. That was silly, because nested locking is expensive, but the sequence count check is not. So this just re-validates the dentry and the parent separately, avoiding the nested locking, and making the lockref lookup possible. Acked-by: Waiman Long <waiman.long@hp.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c90
1 files changed, 64 insertions, 26 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd5277b..2c30c84d4ea1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
494 br_read_unlock(&vfsmount_lock); 494 br_read_unlock(&vfsmount_lock);
495} 495}
496 496
497/*
498 * When we move over from the RCU domain to properly refcounted
499 * long-lived dentries, we need to check the sequence numbers
500 * we got before lookup very carefully.
501 *
502 * We cannot blindly increment a dentry refcount - even if it
503 * is not locked - if it is zero, because it may have gone
504 * through the final d_kill() logic already.
505 *
506 * So for a zero refcount, we need to get the spinlock (which is
507 * safe even for a dead dentry because the de-allocation is
508 * RCU-delayed), and check the sequence count under the lock.
509 *
510 * Once we have checked the sequence count, we know it is live,
511 * and since we hold the spinlock it cannot die from under us.
512 *
513 * In contrast, if the reference count wasn't zero, we can just
514 * increment the lockref without having to take the spinlock.
515 * Even if the sequence number ends up being stale, we haven't
516 * gone through the final dput() and killed the dentry yet.
517 */
518static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
519{
520 int gotref;
521
522 gotref = lockref_get_or_lock(&dentry->d_lockref);
523
524 /* Does the sequence number still match? */
525 if (read_seqcount_retry(validate, seq)) {
526 if (gotref)
527 dput(dentry);
528 else
529 spin_unlock(&dentry->d_lock);
530 return -ECHILD;
531 }
532
533 /* Get the ref now, if we couldn't get it originally */
534 if (!gotref) {
535 dentry->d_lockref.count++;
536 spin_unlock(&dentry->d_lock);
537 }
538 return 0;
539}
540
497/** 541/**
498 * unlazy_walk - try to switch to ref-walk mode. 542 * unlazy_walk - try to switch to ref-walk mode.
499 * @nd: nameidata pathwalk data 543 * @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
518 nd->root.dentry != fs->root.dentry) 562 nd->root.dentry != fs->root.dentry)
519 goto err_root; 563 goto err_root;
520 } 564 }
521 spin_lock(&parent->d_lock); 565
566 /*
567 * For a negative lookup, the lookup sequence point is the parents
568 * sequence point, and it only needs to revalidate the parent dentry.
569 *
570 * For a positive lookup, we need to move both the parent and the
571 * dentry from the RCU domain to be properly refcounted. And the
572 * sequence number in the dentry validates *both* dentry counters,
573 * since we checked the sequence number of the parent after we got
574 * the child sequence number. So we know the parent must still
575 * be valid if the child sequence number is still valid.
576 */
522 if (!dentry) { 577 if (!dentry) {
523 if (!__d_rcu_to_refcount(parent, nd->seq)) 578 if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
524 goto err_parent; 579 goto err_root;
525 BUG_ON(nd->inode != parent->d_inode); 580 BUG_ON(nd->inode != parent->d_inode);
526 } else { 581 } else {
527 if (dentry->d_parent != parent) 582 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
583 goto err_root;
584 if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
528 goto err_parent; 585 goto err_parent;
529 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
530 if (!__d_rcu_to_refcount(dentry, nd->seq))
531 goto err_child;
532 /*
533 * If the sequence check on the child dentry passed, then
534 * the child has not been removed from its parent. This
535 * means the parent dentry must be valid and able to take
536 * a reference at this point.
537 */
538 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
539 BUG_ON(!parent->d_lockref.count);
540 parent->d_lockref.count++;
541 spin_unlock(&dentry->d_lock);
542 } 586 }
543 spin_unlock(&parent->d_lock);
544 if (want_root) { 587 if (want_root) {
545 path_get(&nd->root); 588 path_get(&nd->root);
546 spin_unlock(&fs->lock); 589 spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
551 nd->flags &= ~LOOKUP_RCU; 594 nd->flags &= ~LOOKUP_RCU;
552 return 0; 595 return 0;
553 596
554err_child:
555 spin_unlock(&dentry->d_lock);
556err_parent: 597err_parent:
557 spin_unlock(&parent->d_lock); 598 dput(dentry);
558err_root: 599err_root:
559 if (want_root) 600 if (want_root)
560 spin_unlock(&fs->lock); 601 spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
585 nd->flags &= ~LOOKUP_RCU; 626 nd->flags &= ~LOOKUP_RCU;
586 if (!(nd->flags & LOOKUP_ROOT)) 627 if (!(nd->flags & LOOKUP_ROOT))
587 nd->root.mnt = NULL; 628 nd->root.mnt = NULL;
588 spin_lock(&dentry->d_lock); 629
589 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { 630 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
590 spin_unlock(&dentry->d_lock);
591 unlock_rcu_walk(); 631 unlock_rcu_walk();
592 return -ECHILD; 632 return -ECHILD;
593 } 633 }
594 BUG_ON(nd->inode != dentry->d_inode);
595 spin_unlock(&dentry->d_lock);
596 mntget(nd->path.mnt); 634 mntget(nd->path.mnt);
597 unlock_rcu_walk(); 635 unlock_rcu_walk();
598 } 636 }