aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/namei.c90
-rw-r--r--include/linux/dcache.h22
3 files changed, 65 insertions, 49 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 2d244227999d..96655f4f4574 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1786,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
1786 * without taking d_lock and checking d_seq sequence count against @seq 1786 * without taking d_lock and checking d_seq sequence count against @seq
1787 * returned here. 1787 * returned here.
1788 * 1788 *
1789 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1789 * A refcount may be taken on the found dentry with the d_rcu_to_refcount
1790 * function. 1790 * function.
1791 * 1791 *
1792 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1792 * Alternatively, __d_lookup_rcu may be called again to look up the child of
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd5277b..2c30c84d4ea1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
494 br_read_unlock(&vfsmount_lock); 494 br_read_unlock(&vfsmount_lock);
495} 495}
496 496
497/*
498 * When we move over from the RCU domain to properly refcounted
499 * long-lived dentries, we need to check the sequence numbers
500 * we got before lookup very carefully.
501 *
502 * We cannot blindly increment a dentry refcount - even if it
503 * is not locked - if it is zero, because it may have gone
504 * through the final d_kill() logic already.
505 *
506 * So for a zero refcount, we need to get the spinlock (which is
507 * safe even for a dead dentry because the de-allocation is
508 * RCU-delayed), and check the sequence count under the lock.
509 *
510 * Once we have checked the sequence count, we know it is live,
511 * and since we hold the spinlock it cannot die from under us.
512 *
513 * In contrast, if the reference count wasn't zero, we can just
514 * increment the lockref without having to take the spinlock.
515 * Even if the sequence number ends up being stale, we haven't
516 * gone through the final dput() and killed the dentry yet.
517 */
518static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
519{
520 int gotref;
521
522 gotref = lockref_get_or_lock(&dentry->d_lockref);
523
524 /* Does the sequence number still match? */
525 if (read_seqcount_retry(validate, seq)) {
526 if (gotref)
527 dput(dentry);
528 else
529 spin_unlock(&dentry->d_lock);
530 return -ECHILD;
531 }
532
533 /* Get the ref now, if we couldn't get it originally */
534 if (!gotref) {
535 dentry->d_lockref.count++;
536 spin_unlock(&dentry->d_lock);
537 }
538 return 0;
539}
540
497/** 541/**
498 * unlazy_walk - try to switch to ref-walk mode. 542 * unlazy_walk - try to switch to ref-walk mode.
499 * @nd: nameidata pathwalk data 543 * @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
518 nd->root.dentry != fs->root.dentry) 562 nd->root.dentry != fs->root.dentry)
519 goto err_root; 563 goto err_root;
520 } 564 }
521 spin_lock(&parent->d_lock); 565
566 /*
567 * For a negative lookup, the lookup sequence point is the parents
568 * sequence point, and it only needs to revalidate the parent dentry.
569 *
570 * For a positive lookup, we need to move both the parent and the
571 * dentry from the RCU domain to be properly refcounted. And the
572 * sequence number in the dentry validates *both* dentry counters,
573 * since we checked the sequence number of the parent after we got
574 * the child sequence number. So we know the parent must still
575 * be valid if the child sequence number is still valid.
576 */
522 if (!dentry) { 577 if (!dentry) {
523 if (!__d_rcu_to_refcount(parent, nd->seq)) 578 if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
524 goto err_parent; 579 goto err_root;
525 BUG_ON(nd->inode != parent->d_inode); 580 BUG_ON(nd->inode != parent->d_inode);
526 } else { 581 } else {
527 if (dentry->d_parent != parent) 582 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
583 goto err_root;
584 if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
528 goto err_parent; 585 goto err_parent;
529 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
530 if (!__d_rcu_to_refcount(dentry, nd->seq))
531 goto err_child;
532 /*
533 * If the sequence check on the child dentry passed, then
534 * the child has not been removed from its parent. This
535 * means the parent dentry must be valid and able to take
536 * a reference at this point.
537 */
538 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
539 BUG_ON(!parent->d_lockref.count);
540 parent->d_lockref.count++;
541 spin_unlock(&dentry->d_lock);
542 } 586 }
543 spin_unlock(&parent->d_lock);
544 if (want_root) { 587 if (want_root) {
545 path_get(&nd->root); 588 path_get(&nd->root);
546 spin_unlock(&fs->lock); 589 spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
551 nd->flags &= ~LOOKUP_RCU; 594 nd->flags &= ~LOOKUP_RCU;
552 return 0; 595 return 0;
553 596
554err_child:
555 spin_unlock(&dentry->d_lock);
556err_parent: 597err_parent:
557 spin_unlock(&parent->d_lock); 598 dput(dentry);
558err_root: 599err_root:
559 if (want_root) 600 if (want_root)
560 spin_unlock(&fs->lock); 601 spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
585 nd->flags &= ~LOOKUP_RCU; 626 nd->flags &= ~LOOKUP_RCU;
586 if (!(nd->flags & LOOKUP_ROOT)) 627 if (!(nd->flags & LOOKUP_ROOT))
587 nd->root.mnt = NULL; 628 nd->root.mnt = NULL;
588 spin_lock(&dentry->d_lock); 629
589 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { 630 if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
590 spin_unlock(&dentry->d_lock);
591 unlock_rcu_walk(); 631 unlock_rcu_walk();
592 return -ECHILD; 632 return -ECHILD;
593 } 633 }
594 BUG_ON(nd->inode != dentry->d_inode);
595 spin_unlock(&dentry->d_lock);
596 mntget(nd->path.mnt); 634 mntget(nd->path.mnt);
597 unlock_rcu_walk(); 635 unlock_rcu_walk();
598 } 636 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index efdc94434c30..9169b91ea2d2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
304extern struct dentry *__d_lookup_rcu(const struct dentry *parent, 304extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
305 const struct qstr *name, unsigned *seq); 305 const struct qstr *name, unsigned *seq);
306 306
307/**
308 * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
309 * @dentry: dentry to take a ref on
310 * @seq: seqcount to verify against
311 * Returns: 0 on failure, else 1.
312 *
313 * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
314 * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
315 */
316static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
317{
318 int ret = 0;
319
320 assert_spin_locked(&dentry->d_lock);
321 if (!read_seqcount_retry(&dentry->d_seq, seq)) {
322 ret = 1;
323 dentry->d_lockref.count++;
324 }
325
326 return ret;
327}
328
329static inline unsigned d_count(const struct dentry *dentry) 307static inline unsigned d_count(const struct dentry *dentry)
330{ 308{
331 return dentry->d_lockref.count; 309 return dentry->d_lockref.count;