diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 90 |
1 files changed, 64 insertions, 26 deletions
diff --git a/fs/namei.c b/fs/namei.c index 7720fbd5277b..2c30c84d4ea1 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void) | |||
494 | br_read_unlock(&vfsmount_lock); | 494 | br_read_unlock(&vfsmount_lock); |
495 | } | 495 | } |
496 | 496 | ||
497 | /* | ||
498 | * When we move over from the RCU domain to properly refcounted | ||
499 | * long-lived dentries, we need to check the sequence numbers | ||
500 | * we got before lookup very carefully. | ||
501 | * | ||
502 | * We cannot blindly increment a dentry refcount - even if it | ||
503 | * is not locked - if it is zero, because it may have gone | ||
504 | * through the final d_kill() logic already. | ||
505 | * | ||
506 | * So for a zero refcount, we need to get the spinlock (which is | ||
507 | * safe even for a dead dentry because the de-allocation is | ||
508 | * RCU-delayed), and check the sequence count under the lock. | ||
509 | * | ||
510 | * Once we have checked the sequence count, we know it is live, | ||
511 | * and since we hold the spinlock it cannot die from under us. | ||
512 | * | ||
513 | * In contrast, if the reference count wasn't zero, we can just | ||
514 | * increment the lockref without having to take the spinlock. | ||
515 | * Even if the sequence number ends up being stale, we haven't | ||
516 | * gone through the final dput() and killed the dentry yet. | ||
517 | */ | ||
518 | static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq) | ||
519 | { | ||
520 | int gotref; | ||
521 | |||
522 | gotref = lockref_get_or_lock(&dentry->d_lockref); | ||
523 | |||
524 | /* Does the sequence number still match? */ | ||
525 | if (read_seqcount_retry(validate, seq)) { | ||
526 | if (gotref) | ||
527 | dput(dentry); | ||
528 | else | ||
529 | spin_unlock(&dentry->d_lock); | ||
530 | return -ECHILD; | ||
531 | } | ||
532 | |||
533 | /* Get the ref now, if we couldn't get it originally */ | ||
534 | if (!gotref) { | ||
535 | dentry->d_lockref.count++; | ||
536 | spin_unlock(&dentry->d_lock); | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
497 | /** | 541 | /** |
498 | * unlazy_walk - try to switch to ref-walk mode. | 542 | * unlazy_walk - try to switch to ref-walk mode. |
499 | * @nd: nameidata pathwalk data | 543 | * @nd: nameidata pathwalk data |
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
518 | nd->root.dentry != fs->root.dentry) | 562 | nd->root.dentry != fs->root.dentry) |
519 | goto err_root; | 563 | goto err_root; |
520 | } | 564 | } |
521 | spin_lock(&parent->d_lock); | 565 | |
566 | /* | ||
567 | * For a negative lookup, the lookup sequence point is the parents | ||
568 | * sequence point, and it only needs to revalidate the parent dentry. | ||
569 | * | ||
570 | * For a positive lookup, we need to move both the parent and the | ||
571 | * dentry from the RCU domain to be properly refcounted. And the | ||
572 | * sequence number in the dentry validates *both* dentry counters, | ||
573 | * since we checked the sequence number of the parent after we got | ||
574 | * the child sequence number. So we know the parent must still | ||
575 | * be valid if the child sequence number is still valid. | ||
576 | */ | ||
522 | if (!dentry) { | 577 | if (!dentry) { |
523 | if (!__d_rcu_to_refcount(parent, nd->seq)) | 578 | if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0) |
524 | goto err_parent; | 579 | goto err_root; |
525 | BUG_ON(nd->inode != parent->d_inode); | 580 | BUG_ON(nd->inode != parent->d_inode); |
526 | } else { | 581 | } else { |
527 | if (dentry->d_parent != parent) | 582 | if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) |
583 | goto err_root; | ||
584 | if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0) | ||
528 | goto err_parent; | 585 | goto err_parent; |
529 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
530 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
531 | goto err_child; | ||
532 | /* | ||
533 | * If the sequence check on the child dentry passed, then | ||
534 | * the child has not been removed from its parent. This | ||
535 | * means the parent dentry must be valid and able to take | ||
536 | * a reference at this point. | ||
537 | */ | ||
538 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | ||
539 | BUG_ON(!parent->d_lockref.count); | ||
540 | parent->d_lockref.count++; | ||
541 | spin_unlock(&dentry->d_lock); | ||
542 | } | 586 | } |
543 | spin_unlock(&parent->d_lock); | ||
544 | if (want_root) { | 587 | if (want_root) { |
545 | path_get(&nd->root); | 588 | path_get(&nd->root); |
546 | spin_unlock(&fs->lock); | 589 | spin_unlock(&fs->lock); |
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
551 | nd->flags &= ~LOOKUP_RCU; | 594 | nd->flags &= ~LOOKUP_RCU; |
552 | return 0; | 595 | return 0; |
553 | 596 | ||
554 | err_child: | ||
555 | spin_unlock(&dentry->d_lock); | ||
556 | err_parent: | 597 | err_parent: |
557 | spin_unlock(&parent->d_lock); | 598 | dput(dentry); |
558 | err_root: | 599 | err_root: |
559 | if (want_root) | 600 | if (want_root) |
560 | spin_unlock(&fs->lock); | 601 | spin_unlock(&fs->lock); |
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd) | |||
585 | nd->flags &= ~LOOKUP_RCU; | 626 | nd->flags &= ~LOOKUP_RCU; |
586 | if (!(nd->flags & LOOKUP_ROOT)) | 627 | if (!(nd->flags & LOOKUP_ROOT)) |
587 | nd->root.mnt = NULL; | 628 | nd->root.mnt = NULL; |
588 | spin_lock(&dentry->d_lock); | 629 | |
589 | if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { | 630 | if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) { |
590 | spin_unlock(&dentry->d_lock); | ||
591 | unlock_rcu_walk(); | 631 | unlock_rcu_walk(); |
592 | return -ECHILD; | 632 | return -ECHILD; |
593 | } | 633 | } |
594 | BUG_ON(nd->inode != dentry->d_inode); | ||
595 | spin_unlock(&dentry->d_lock); | ||
596 | mntget(nd->path.mnt); | 634 | mntget(nd->path.mnt); |
597 | unlock_rcu_walk(); | 635 | unlock_rcu_walk(); |
598 | } | 636 | } |