aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c90
1 files changed, 64 insertions, 26 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd5277b..2c30c84d4ea1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
494 br_read_unlock(&vfsmount_lock); 494 br_read_unlock(&vfsmount_lock);
495} 495}
496 496
497/*
498 * When we move over from the RCU domain to properly refcounted
499 * long-lived dentries, we need to check the sequence numbers
500 * we got before lookup very carefully.
501 *
502 * We cannot blindly increment a dentry refcount - even if it
503 * is not locked - if it is zero, because it may have gone
504 * through the final d_kill() logic already.
505 *
506 * So for a zero refcount, we need to get the spinlock (which is
507 * safe even for a dead dentry because the de-allocation is
508 * RCU-delayed), and check the sequence count under the lock.
509 *
510 * Once we have checked the sequence count, we know it is live,
511 * and since we hold the spinlock it cannot die from under us.
512 *
513 * In contrast, if the reference count wasn't zero, we can just
514 * increment the lockref without having to take the spinlock.
515 * Even if the sequence number ends up being stale, we haven't
516 * gone through the final dput() and killed the dentry yet.
517 */
518static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
519{
520 int gotref;
521
522 gotref = lockref_get_or_lock(&dentry->d_lockref);
523
524 /* Does the sequence number still match? */
525 if (read_seqcount_retry(validate, seq)) {
526 if (gotref)
527 dput(dentry);
528 else
529 spin_unlock(&dentry->d_lock);
530 return -ECHILD;
531 }
532
533 /* Get the ref now, if we couldn't get it originally */
534 if (!gotref) {
535 dentry->d_lockref.count++;
536 spin_unlock(&dentry->d_lock);
537 }
538 return 0;
539}
540
497/** 541/**
498 * unlazy_walk - try to switch to ref-walk mode. 542 * unlazy_walk - try to switch to ref-walk mode.
499 * @nd: nameidata pathwalk data 543 * @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
518 nd->root.dentry != fs->root.dentry) 562 nd->root.dentry != fs->root.dentry)
519 goto err_root; 563 goto err_root;
520 } 564 }
521 spin_lock(&parent->d_lock); 565
566 /*
567 * For a negative lookup, the lookup sequence point is the parents
568 * sequence point, and it only needs to revalidate the parent dentry.
569 *
570 * For a positive lookup, we need to move both the parent and the
571 * dentry from the RCU domain to be properly refcounted. And the
572 * sequence number in the dentry validates *both* dentry counters,
573 * since we checked the sequence number of the parent after we got
574 * the child sequence number. So we know the parent must still
575 * be valid if the child sequence number is still valid.
576 */
522 if (!dentry) { 577 if (!dentry) {
523 if (!__d_rcu_to_refcount(parent, nd->seq)) 578 if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
524 goto err_parent; 579 goto err_root;
525 BUG_ON(nd->inode != parent->d_inode); 580 BUG_ON(nd->inode != parent->d_inode);
526 } else { 581 } else {
527 if (dentry->d_parent != parent) 582 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
583 goto err_root;
584 if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
528 goto err_parent; 585 goto err_parent;
529 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
530 if (!__d_rcu_to_refcount(dentry, nd->seq))
531 goto err_child;
532 /*
533 * If the sequence check on the child dentry passed, then
534 * the child has not been removed from its parent. This
535 * means the parent dentry must be valid and able to take
536 * a reference at this point.
537 */
538 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
539 BUG_ON(!parent->d_lockref.count);
540 parent->d_lockref.count++;
541 spin_unlock(&dentry->d_lock);
542 } 586 }
543 spin_unlock(&parent->d_lock);
544 if (want_root) { 587 if (want_root) {
545 path_get(&nd->root); 588 path_get(&nd->root);
546 spin_unlock(&fs->lock); 589 spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
551 nd->flags &= ~LOOKUP_RCU; 594 nd->flags &= ~LOOKUP_RCU;
552 return 0; 595 return 0;
553 596
554err_child:
555 spin_unlock(&dentry->d_lock);
556err_parent: 597err_parent:
557 spin_unlock(&parent->d_lock); 598 dput(dentry);
558err_root: 599err_root:
559 if (want_root) 600 if (want_root)
560 spin_unlock(&fs->lock); 601 spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
585 nd->flags &= ~LOOKUP_RCU; 626 nd->flags &= ~LOOKUP_RCU;
586 if (!(nd->flags & LOOKUP_ROOT)) 627 if (!(nd->flags & LOOKUP_ROOT))
587 nd->root.mnt = NULL; 628 nd->root.mnt = NULL;
588 spin_lock(&dentry->d_lock); 629
589 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { 630 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
590 spin_unlock(&dentry->d_lock);
591 unlock_rcu_walk(); 631 unlock_rcu_walk();
592 return -ECHILD; 632 return -ECHILD;
593 } 633 }
594 BUG_ON(nd->inode != dentry->d_inode);
595 spin_unlock(&dentry->d_lock);
596 mntget(nd->path.mnt); 634 mntget(nd->path.mnt);
597 unlock_rcu_walk(); 635 unlock_rcu_walk();
598 } 636 }