diff options
-rw-r--r-- | fs/dcache.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 90 | ||||
-rw-r--r-- | include/linux/dcache.h | 22 |
3 files changed, 65 insertions, 49 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 2d244227999d..96655f4f4574 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1786,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
1786 | * without taking d_lock and checking d_seq sequence count against @seq | 1786 | * without taking d_lock and checking d_seq sequence count against @seq |
1787 | * returned here. | 1787 | * returned here. |
1788 | * | 1788 | * |
1789 | * A refcount may be taken on the found dentry with the __d_rcu_to_refcount | 1789 | * A refcount may be taken on the found dentry with the d_rcu_to_refcount |
1790 | * function. | 1790 | * function. |
1791 | * | 1791 | * |
1792 | * Alternatively, __d_lookup_rcu may be called again to look up the child of | 1792 | * Alternatively, __d_lookup_rcu may be called again to look up the child of |
diff --git a/fs/namei.c b/fs/namei.c index 7720fbd5277b..2c30c84d4ea1 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void) | |||
494 | br_read_unlock(&vfsmount_lock); | 494 | br_read_unlock(&vfsmount_lock); |
495 | } | 495 | } |
496 | 496 | ||
497 | /* | ||
498 | * When we move over from the RCU domain to properly refcounted | ||
499 | * long-lived dentries, we need to check the sequence numbers | ||
500 | * we got before lookup very carefully. | ||
501 | * | ||
502 | * We cannot blindly increment a dentry refcount - even if it | ||
503 | * is not locked - if it is zero, because it may have gone | ||
504 | * through the final d_kill() logic already. | ||
505 | * | ||
506 | * So for a zero refcount, we need to get the spinlock (which is | ||
507 | * safe even for a dead dentry because the de-allocation is | ||
508 | * RCU-delayed), and check the sequence count under the lock. | ||
509 | * | ||
510 | * Once we have checked the sequence count, we know it is live, | ||
511 | * and since we hold the spinlock it cannot die from under us. | ||
512 | * | ||
513 | * In contrast, if the reference count wasn't zero, we can just | ||
514 | * increment the lockref without having to take the spinlock. | ||
515 | * Even if the sequence number ends up being stale, we haven't | ||
516 | * gone through the final dput() and killed the dentry yet. | ||
517 | */ | ||
518 | static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq) | ||
519 | { | ||
520 | int gotref; | ||
521 | |||
522 | gotref = lockref_get_or_lock(&dentry->d_lockref); | ||
523 | |||
524 | /* Does the sequence number still match? */ | ||
525 | if (read_seqcount_retry(validate, seq)) { | ||
526 | if (gotref) | ||
527 | dput(dentry); | ||
528 | else | ||
529 | spin_unlock(&dentry->d_lock); | ||
530 | return -ECHILD; | ||
531 | } | ||
532 | |||
533 | /* Get the ref now, if we couldn't get it originally */ | ||
534 | if (!gotref) { | ||
535 | dentry->d_lockref.count++; | ||
536 | spin_unlock(&dentry->d_lock); | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
497 | /** | 541 | /** |
498 | * unlazy_walk - try to switch to ref-walk mode. | 542 | * unlazy_walk - try to switch to ref-walk mode. |
499 | * @nd: nameidata pathwalk data | 543 | * @nd: nameidata pathwalk data |
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
518 | nd->root.dentry != fs->root.dentry) | 562 | nd->root.dentry != fs->root.dentry) |
519 | goto err_root; | 563 | goto err_root; |
520 | } | 564 | } |
521 | spin_lock(&parent->d_lock); | 565 | |
566 | /* | ||
567 | * For a negative lookup, the lookup sequence point is the parents | ||
568 | * sequence point, and it only needs to revalidate the parent dentry. | ||
569 | * | ||
570 | * For a positive lookup, we need to move both the parent and the | ||
571 | * dentry from the RCU domain to be properly refcounted. And the | ||
572 | * sequence number in the dentry validates *both* dentry counters, | ||
573 | * since we checked the sequence number of the parent after we got | ||
574 | * the child sequence number. So we know the parent must still | ||
575 | * be valid if the child sequence number is still valid. | ||
576 | */ | ||
522 | if (!dentry) { | 577 | if (!dentry) { |
523 | if (!__d_rcu_to_refcount(parent, nd->seq)) | 578 | if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0) |
524 | goto err_parent; | 579 | goto err_root; |
525 | BUG_ON(nd->inode != parent->d_inode); | 580 | BUG_ON(nd->inode != parent->d_inode); |
526 | } else { | 581 | } else { |
527 | if (dentry->d_parent != parent) | 582 | if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) |
583 | goto err_root; | ||
584 | if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0) | ||
528 | goto err_parent; | 585 | goto err_parent; |
529 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
530 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
531 | goto err_child; | ||
532 | /* | ||
533 | * If the sequence check on the child dentry passed, then | ||
534 | * the child has not been removed from its parent. This | ||
535 | * means the parent dentry must be valid and able to take | ||
536 | * a reference at this point. | ||
537 | */ | ||
538 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | ||
539 | BUG_ON(!parent->d_lockref.count); | ||
540 | parent->d_lockref.count++; | ||
541 | spin_unlock(&dentry->d_lock); | ||
542 | } | 586 | } |
543 | spin_unlock(&parent->d_lock); | ||
544 | if (want_root) { | 587 | if (want_root) { |
545 | path_get(&nd->root); | 588 | path_get(&nd->root); |
546 | spin_unlock(&fs->lock); | 589 | spin_unlock(&fs->lock); |
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
551 | nd->flags &= ~LOOKUP_RCU; | 594 | nd->flags &= ~LOOKUP_RCU; |
552 | return 0; | 595 | return 0; |
553 | 596 | ||
554 | err_child: | ||
555 | spin_unlock(&dentry->d_lock); | ||
556 | err_parent: | 597 | err_parent: |
557 | spin_unlock(&parent->d_lock); | 598 | dput(dentry); |
558 | err_root: | 599 | err_root: |
559 | if (want_root) | 600 | if (want_root) |
560 | spin_unlock(&fs->lock); | 601 | spin_unlock(&fs->lock); |
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd) | |||
585 | nd->flags &= ~LOOKUP_RCU; | 626 | nd->flags &= ~LOOKUP_RCU; |
586 | if (!(nd->flags & LOOKUP_ROOT)) | 627 | if (!(nd->flags & LOOKUP_ROOT)) |
587 | nd->root.mnt = NULL; | 628 | nd->root.mnt = NULL; |
588 | spin_lock(&dentry->d_lock); | 629 | |
589 | if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { | 630 | if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) { |
590 | spin_unlock(&dentry->d_lock); | ||
591 | unlock_rcu_walk(); | 631 | unlock_rcu_walk(); |
592 | return -ECHILD; | 632 | return -ECHILD; |
593 | } | 633 | } |
594 | BUG_ON(nd->inode != dentry->d_inode); | ||
595 | spin_unlock(&dentry->d_lock); | ||
596 | mntget(nd->path.mnt); | 634 | mntget(nd->path.mnt); |
597 | unlock_rcu_walk(); | 635 | unlock_rcu_walk(); |
598 | } | 636 | } |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index efdc94434c30..9169b91ea2d2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); | |||
304 | extern struct dentry *__d_lookup_rcu(const struct dentry *parent, | 304 | extern struct dentry *__d_lookup_rcu(const struct dentry *parent, |
305 | const struct qstr *name, unsigned *seq); | 305 | const struct qstr *name, unsigned *seq); |
306 | 306 | ||
307 | /** | ||
308 | * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok | ||
309 | * @dentry: dentry to take a ref on | ||
310 | * @seq: seqcount to verify against | ||
311 | * Returns: 0 on failure, else 1. | ||
312 | * | ||
313 | * __d_rcu_to_refcount operates on a dentry,seq pair that was returned | ||
314 | * by __d_lookup_rcu, to get a reference on an rcu-walk dentry. | ||
315 | */ | ||
316 | static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) | ||
317 | { | ||
318 | int ret = 0; | ||
319 | |||
320 | assert_spin_locked(&dentry->d_lock); | ||
321 | if (!read_seqcount_retry(&dentry->d_seq, seq)) { | ||
322 | ret = 1; | ||
323 | dentry->d_lockref.count++; | ||
324 | } | ||
325 | |||
326 | return ret; | ||
327 | } | ||
328 | |||
329 | static inline unsigned d_count(const struct dentry *dentry) | 307 | static inline unsigned d_count(const struct dentry *dentry) |
330 | { | 308 | { |
331 | return dentry->d_lockref.count; | 309 | return dentry->d_lockref.count; |