diff options
Diffstat (limited to 'fs/dcache.c')
-rw-r--r-- | fs/dcache.c | 762 |
1 files changed, 470 insertions, 292 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 83cfb834db03..99d4d7226203 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock); | |||
88 | 88 | ||
89 | static struct kmem_cache *dentry_cache __read_mostly; | 89 | static struct kmem_cache *dentry_cache __read_mostly; |
90 | 90 | ||
91 | /** | ||
92 | * read_seqbegin_or_lock - begin a sequence number check or locking block | ||
93 | * @lock: sequence lock | ||
94 | * @seq : sequence number to be checked | ||
95 | * | ||
96 | * First try it once optimistically without taking the lock. If that fails, | ||
97 | * take the lock. The sequence number is also used as a marker for deciding | ||
98 | * whether to be a reader (even) or writer (odd). | ||
99 | * N.B. seq must be initialized to an even number to begin with. | ||
100 | */ | ||
101 | static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) | ||
102 | { | ||
103 | if (!(*seq & 1)) /* Even */ | ||
104 | *seq = read_seqbegin(lock); | ||
105 | else /* Odd */ | ||
106 | read_seqlock_excl(lock); | ||
107 | } | ||
108 | |||
109 | static inline int need_seqretry(seqlock_t *lock, int seq) | ||
110 | { | ||
111 | return !(seq & 1) && read_seqretry(lock, seq); | ||
112 | } | ||
113 | |||
114 | static inline void done_seqretry(seqlock_t *lock, int seq) | ||
115 | { | ||
116 | if (seq & 1) | ||
117 | read_sequnlock_excl(lock); | ||
118 | } | ||
119 | |||
91 | /* | 120 | /* |
92 | * This is the single most critical data structure when it comes | 121 | * This is the single most critical data structure when it comes |
93 | * to the dcache: the hashtable for lookups. Somebody should try | 122 | * to the dcache: the hashtable for lookups. Somebody should try |
@@ -229,7 +258,7 @@ static void __d_free(struct rcu_head *head) | |||
229 | */ | 258 | */ |
230 | static void d_free(struct dentry *dentry) | 259 | static void d_free(struct dentry *dentry) |
231 | { | 260 | { |
232 | BUG_ON(dentry->d_count); | 261 | BUG_ON((int)dentry->d_lockref.count > 0); |
233 | this_cpu_dec(nr_dentry); | 262 | this_cpu_dec(nr_dentry); |
234 | if (dentry->d_op && dentry->d_op->d_release) | 263 | if (dentry->d_op && dentry->d_op->d_release) |
235 | dentry->d_op->d_release(dentry); | 264 | dentry->d_op->d_release(dentry); |
@@ -308,8 +337,9 @@ static void dentry_unlink_inode(struct dentry * dentry) | |||
308 | */ | 337 | */ |
309 | static void dentry_lru_add(struct dentry *dentry) | 338 | static void dentry_lru_add(struct dentry *dentry) |
310 | { | 339 | { |
311 | if (list_empty(&dentry->d_lru)) { | 340 | if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { |
312 | spin_lock(&dcache_lru_lock); | 341 | spin_lock(&dcache_lru_lock); |
342 | dentry->d_flags |= DCACHE_LRU_LIST; | ||
313 | list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | 343 | list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); |
314 | dentry->d_sb->s_nr_dentry_unused++; | 344 | dentry->d_sb->s_nr_dentry_unused++; |
315 | dentry_stat.nr_unused++; | 345 | dentry_stat.nr_unused++; |
@@ -320,7 +350,7 @@ static void dentry_lru_add(struct dentry *dentry) | |||
320 | static void __dentry_lru_del(struct dentry *dentry) | 350 | static void __dentry_lru_del(struct dentry *dentry) |
321 | { | 351 | { |
322 | list_del_init(&dentry->d_lru); | 352 | list_del_init(&dentry->d_lru); |
323 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; | 353 | dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); |
324 | dentry->d_sb->s_nr_dentry_unused--; | 354 | dentry->d_sb->s_nr_dentry_unused--; |
325 | dentry_stat.nr_unused--; | 355 | dentry_stat.nr_unused--; |
326 | } | 356 | } |
@@ -341,6 +371,7 @@ static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) | |||
341 | { | 371 | { |
342 | spin_lock(&dcache_lru_lock); | 372 | spin_lock(&dcache_lru_lock); |
343 | if (list_empty(&dentry->d_lru)) { | 373 | if (list_empty(&dentry->d_lru)) { |
374 | dentry->d_flags |= DCACHE_LRU_LIST; | ||
344 | list_add_tail(&dentry->d_lru, list); | 375 | list_add_tail(&dentry->d_lru, list); |
345 | dentry->d_sb->s_nr_dentry_unused++; | 376 | dentry->d_sb->s_nr_dentry_unused++; |
346 | dentry_stat.nr_unused++; | 377 | dentry_stat.nr_unused++; |
@@ -443,7 +474,7 @@ EXPORT_SYMBOL(d_drop); | |||
443 | * If ref is non-zero, then decrement the refcount too. | 474 | * If ref is non-zero, then decrement the refcount too. |
444 | * Returns dentry requiring refcount drop, or NULL if we're done. | 475 | * Returns dentry requiring refcount drop, or NULL if we're done. |
445 | */ | 476 | */ |
446 | static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) | 477 | static inline struct dentry *dentry_kill(struct dentry *dentry) |
447 | __releases(dentry->d_lock) | 478 | __releases(dentry->d_lock) |
448 | { | 479 | { |
449 | struct inode *inode; | 480 | struct inode *inode; |
@@ -466,13 +497,16 @@ relock: | |||
466 | goto relock; | 497 | goto relock; |
467 | } | 498 | } |
468 | 499 | ||
469 | if (ref) | 500 | /* |
470 | dentry->d_count--; | 501 | * The dentry is now unrecoverably dead to the world. |
502 | */ | ||
503 | lockref_mark_dead(&dentry->d_lockref); | ||
504 | |||
471 | /* | 505 | /* |
472 | * inform the fs via d_prune that this dentry is about to be | 506 | * inform the fs via d_prune that this dentry is about to be |
473 | * unhashed and destroyed. | 507 | * unhashed and destroyed. |
474 | */ | 508 | */ |
475 | if (dentry->d_flags & DCACHE_OP_PRUNE) | 509 | if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) |
476 | dentry->d_op->d_prune(dentry); | 510 | dentry->d_op->d_prune(dentry); |
477 | 511 | ||
478 | dentry_lru_del(dentry); | 512 | dentry_lru_del(dentry); |
@@ -509,38 +543,31 @@ relock: | |||
509 | */ | 543 | */ |
510 | void dput(struct dentry *dentry) | 544 | void dput(struct dentry *dentry) |
511 | { | 545 | { |
512 | if (!dentry) | 546 | if (unlikely(!dentry)) |
513 | return; | 547 | return; |
514 | 548 | ||
515 | repeat: | 549 | repeat: |
516 | if (dentry->d_count == 1) | 550 | if (lockref_put_or_lock(&dentry->d_lockref)) |
517 | might_sleep(); | ||
518 | spin_lock(&dentry->d_lock); | ||
519 | BUG_ON(!dentry->d_count); | ||
520 | if (dentry->d_count > 1) { | ||
521 | dentry->d_count--; | ||
522 | spin_unlock(&dentry->d_lock); | ||
523 | return; | 551 | return; |
524 | } | ||
525 | 552 | ||
526 | if (dentry->d_flags & DCACHE_OP_DELETE) { | 553 | /* Unreachable? Get rid of it */ |
554 | if (unlikely(d_unhashed(dentry))) | ||
555 | goto kill_it; | ||
556 | |||
557 | if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { | ||
527 | if (dentry->d_op->d_delete(dentry)) | 558 | if (dentry->d_op->d_delete(dentry)) |
528 | goto kill_it; | 559 | goto kill_it; |
529 | } | 560 | } |
530 | 561 | ||
531 | /* Unreachable? Get rid of it */ | ||
532 | if (d_unhashed(dentry)) | ||
533 | goto kill_it; | ||
534 | |||
535 | dentry->d_flags |= DCACHE_REFERENCED; | 562 | dentry->d_flags |= DCACHE_REFERENCED; |
536 | dentry_lru_add(dentry); | 563 | dentry_lru_add(dentry); |
537 | 564 | ||
538 | dentry->d_count--; | 565 | dentry->d_lockref.count--; |
539 | spin_unlock(&dentry->d_lock); | 566 | spin_unlock(&dentry->d_lock); |
540 | return; | 567 | return; |
541 | 568 | ||
542 | kill_it: | 569 | kill_it: |
543 | dentry = dentry_kill(dentry, 1); | 570 | dentry = dentry_kill(dentry); |
544 | if (dentry) | 571 | if (dentry) |
545 | goto repeat; | 572 | goto repeat; |
546 | } | 573 | } |
@@ -590,7 +617,7 @@ int d_invalidate(struct dentry * dentry) | |||
590 | * We also need to leave mountpoints alone, | 617 | * We also need to leave mountpoints alone, |
591 | * directory or not. | 618 | * directory or not. |
592 | */ | 619 | */ |
593 | if (dentry->d_count > 1 && dentry->d_inode) { | 620 | if (dentry->d_lockref.count > 1 && dentry->d_inode) { |
594 | if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { | 621 | if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { |
595 | spin_unlock(&dentry->d_lock); | 622 | spin_unlock(&dentry->d_lock); |
596 | return -EBUSY; | 623 | return -EBUSY; |
@@ -606,20 +633,33 @@ EXPORT_SYMBOL(d_invalidate); | |||
606 | /* This must be called with d_lock held */ | 633 | /* This must be called with d_lock held */ |
607 | static inline void __dget_dlock(struct dentry *dentry) | 634 | static inline void __dget_dlock(struct dentry *dentry) |
608 | { | 635 | { |
609 | dentry->d_count++; | 636 | dentry->d_lockref.count++; |
610 | } | 637 | } |
611 | 638 | ||
612 | static inline void __dget(struct dentry *dentry) | 639 | static inline void __dget(struct dentry *dentry) |
613 | { | 640 | { |
614 | spin_lock(&dentry->d_lock); | 641 | lockref_get(&dentry->d_lockref); |
615 | __dget_dlock(dentry); | ||
616 | spin_unlock(&dentry->d_lock); | ||
617 | } | 642 | } |
618 | 643 | ||
619 | struct dentry *dget_parent(struct dentry *dentry) | 644 | struct dentry *dget_parent(struct dentry *dentry) |
620 | { | 645 | { |
646 | int gotref; | ||
621 | struct dentry *ret; | 647 | struct dentry *ret; |
622 | 648 | ||
649 | /* | ||
650 | * Do optimistic parent lookup without any | ||
651 | * locking. | ||
652 | */ | ||
653 | rcu_read_lock(); | ||
654 | ret = ACCESS_ONCE(dentry->d_parent); | ||
655 | gotref = lockref_get_not_zero(&ret->d_lockref); | ||
656 | rcu_read_unlock(); | ||
657 | if (likely(gotref)) { | ||
658 | if (likely(ret == ACCESS_ONCE(dentry->d_parent))) | ||
659 | return ret; | ||
660 | dput(ret); | ||
661 | } | ||
662 | |||
623 | repeat: | 663 | repeat: |
624 | /* | 664 | /* |
625 | * Don't need rcu_dereference because we re-check it was correct under | 665 | * Don't need rcu_dereference because we re-check it was correct under |
@@ -634,8 +674,8 @@ repeat: | |||
634 | goto repeat; | 674 | goto repeat; |
635 | } | 675 | } |
636 | rcu_read_unlock(); | 676 | rcu_read_unlock(); |
637 | BUG_ON(!ret->d_count); | 677 | BUG_ON(!ret->d_lockref.count); |
638 | ret->d_count++; | 678 | ret->d_lockref.count++; |
639 | spin_unlock(&ret->d_lock); | 679 | spin_unlock(&ret->d_lock); |
640 | return ret; | 680 | return ret; |
641 | } | 681 | } |
@@ -718,7 +758,15 @@ restart: | |||
718 | spin_lock(&inode->i_lock); | 758 | spin_lock(&inode->i_lock); |
719 | hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { | 759 | hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { |
720 | spin_lock(&dentry->d_lock); | 760 | spin_lock(&dentry->d_lock); |
721 | if (!dentry->d_count) { | 761 | if (!dentry->d_lockref.count) { |
762 | /* | ||
763 | * inform the fs via d_prune that this dentry | ||
764 | * is about to be unhashed and destroyed. | ||
765 | */ | ||
766 | if ((dentry->d_flags & DCACHE_OP_PRUNE) && | ||
767 | !d_unhashed(dentry)) | ||
768 | dentry->d_op->d_prune(dentry); | ||
769 | |||
722 | __dget_dlock(dentry); | 770 | __dget_dlock(dentry); |
723 | __d_drop(dentry); | 771 | __d_drop(dentry); |
724 | spin_unlock(&dentry->d_lock); | 772 | spin_unlock(&dentry->d_lock); |
@@ -744,7 +792,7 @@ static void try_prune_one_dentry(struct dentry *dentry) | |||
744 | { | 792 | { |
745 | struct dentry *parent; | 793 | struct dentry *parent; |
746 | 794 | ||
747 | parent = dentry_kill(dentry, 0); | 795 | parent = dentry_kill(dentry); |
748 | /* | 796 | /* |
749 | * If dentry_kill returns NULL, we have nothing more to do. | 797 | * If dentry_kill returns NULL, we have nothing more to do. |
750 | * if it returns the same dentry, trylocks failed. In either | 798 | * if it returns the same dentry, trylocks failed. In either |
@@ -763,13 +811,9 @@ static void try_prune_one_dentry(struct dentry *dentry) | |||
763 | /* Prune ancestors. */ | 811 | /* Prune ancestors. */ |
764 | dentry = parent; | 812 | dentry = parent; |
765 | while (dentry) { | 813 | while (dentry) { |
766 | spin_lock(&dentry->d_lock); | 814 | if (lockref_put_or_lock(&dentry->d_lockref)) |
767 | if (dentry->d_count > 1) { | ||
768 | dentry->d_count--; | ||
769 | spin_unlock(&dentry->d_lock); | ||
770 | return; | 815 | return; |
771 | } | 816 | dentry = dentry_kill(dentry); |
772 | dentry = dentry_kill(dentry, 1); | ||
773 | } | 817 | } |
774 | } | 818 | } |
775 | 819 | ||
@@ -793,7 +837,7 @@ static void shrink_dentry_list(struct list_head *list) | |||
793 | * the LRU because of laziness during lookup. Do not free | 837 | * the LRU because of laziness during lookup. Do not free |
794 | * it - just keep it off the LRU list. | 838 | * it - just keep it off the LRU list. |
795 | */ | 839 | */ |
796 | if (dentry->d_count) { | 840 | if (dentry->d_lockref.count) { |
797 | dentry_lru_del(dentry); | 841 | dentry_lru_del(dentry); |
798 | spin_unlock(&dentry->d_lock); | 842 | spin_unlock(&dentry->d_lock); |
799 | continue; | 843 | continue; |
@@ -907,13 +951,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
907 | * inform the fs that this dentry is about to be | 951 | * inform the fs that this dentry is about to be |
908 | * unhashed and destroyed. | 952 | * unhashed and destroyed. |
909 | */ | 953 | */ |
910 | if (dentry->d_flags & DCACHE_OP_PRUNE) | 954 | if ((dentry->d_flags & DCACHE_OP_PRUNE) && |
955 | !d_unhashed(dentry)) | ||
911 | dentry->d_op->d_prune(dentry); | 956 | dentry->d_op->d_prune(dentry); |
912 | 957 | ||
913 | dentry_lru_del(dentry); | 958 | dentry_lru_del(dentry); |
914 | __d_shrink(dentry); | 959 | __d_shrink(dentry); |
915 | 960 | ||
916 | if (dentry->d_count != 0) { | 961 | if (dentry->d_lockref.count != 0) { |
917 | printk(KERN_ERR | 962 | printk(KERN_ERR |
918 | "BUG: Dentry %p{i=%lx,n=%s}" | 963 | "BUG: Dentry %p{i=%lx,n=%s}" |
919 | " still in use (%d)" | 964 | " still in use (%d)" |
@@ -922,7 +967,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
922 | dentry->d_inode ? | 967 | dentry->d_inode ? |
923 | dentry->d_inode->i_ino : 0UL, | 968 | dentry->d_inode->i_ino : 0UL, |
924 | dentry->d_name.name, | 969 | dentry->d_name.name, |
925 | dentry->d_count, | 970 | dentry->d_lockref.count, |
926 | dentry->d_sb->s_type->name, | 971 | dentry->d_sb->s_type->name, |
927 | dentry->d_sb->s_id); | 972 | dentry->d_sb->s_id); |
928 | BUG(); | 973 | BUG(); |
@@ -933,7 +978,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
933 | list_del(&dentry->d_u.d_child); | 978 | list_del(&dentry->d_u.d_child); |
934 | } else { | 979 | } else { |
935 | parent = dentry->d_parent; | 980 | parent = dentry->d_parent; |
936 | parent->d_count--; | 981 | parent->d_lockref.count--; |
937 | list_del(&dentry->d_u.d_child); | 982 | list_del(&dentry->d_u.d_child); |
938 | } | 983 | } |
939 | 984 | ||
@@ -981,7 +1026,7 @@ void shrink_dcache_for_umount(struct super_block *sb) | |||
981 | 1026 | ||
982 | dentry = sb->s_root; | 1027 | dentry = sb->s_root; |
983 | sb->s_root = NULL; | 1028 | sb->s_root = NULL; |
984 | dentry->d_count--; | 1029 | dentry->d_lockref.count--; |
985 | shrink_dcache_for_umount_subtree(dentry); | 1030 | shrink_dcache_for_umount_subtree(dentry); |
986 | 1031 | ||
987 | while (!hlist_bl_empty(&sb->s_anon)) { | 1032 | while (!hlist_bl_empty(&sb->s_anon)) { |
@@ -996,7 +1041,7 @@ void shrink_dcache_for_umount(struct super_block *sb) | |||
996 | * the parenthood after dropping the lock and check | 1041 | * the parenthood after dropping the lock and check |
997 | * that the sequence number still matches. | 1042 | * that the sequence number still matches. |
998 | */ | 1043 | */ |
999 | static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) | 1044 | static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) |
1000 | { | 1045 | { |
1001 | struct dentry *new = old->d_parent; | 1046 | struct dentry *new = old->d_parent; |
1002 | 1047 | ||
@@ -1010,7 +1055,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq | |||
1010 | */ | 1055 | */ |
1011 | if (new != old->d_parent || | 1056 | if (new != old->d_parent || |
1012 | (old->d_flags & DCACHE_DENTRY_KILLED) || | 1057 | (old->d_flags & DCACHE_DENTRY_KILLED) || |
1013 | (!locked && read_seqretry(&rename_lock, seq))) { | 1058 | need_seqretry(&rename_lock, seq)) { |
1014 | spin_unlock(&new->d_lock); | 1059 | spin_unlock(&new->d_lock); |
1015 | new = NULL; | 1060 | new = NULL; |
1016 | } | 1061 | } |
@@ -1018,34 +1063,55 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq | |||
1018 | return new; | 1063 | return new; |
1019 | } | 1064 | } |
1020 | 1065 | ||
1066 | /** | ||
1067 | * enum d_walk_ret - action to talke during tree walk | ||
1068 | * @D_WALK_CONTINUE: contrinue walk | ||
1069 | * @D_WALK_QUIT: quit walk | ||
1070 | * @D_WALK_NORETRY: quit when retry is needed | ||
1071 | * @D_WALK_SKIP: skip this dentry and its children | ||
1072 | */ | ||
1073 | enum d_walk_ret { | ||
1074 | D_WALK_CONTINUE, | ||
1075 | D_WALK_QUIT, | ||
1076 | D_WALK_NORETRY, | ||
1077 | D_WALK_SKIP, | ||
1078 | }; | ||
1021 | 1079 | ||
1022 | /* | ||
1023 | * Search for at least 1 mount point in the dentry's subdirs. | ||
1024 | * We descend to the next level whenever the d_subdirs | ||
1025 | * list is non-empty and continue searching. | ||
1026 | */ | ||
1027 | |||
1028 | /** | 1080 | /** |
1029 | * have_submounts - check for mounts over a dentry | 1081 | * d_walk - walk the dentry tree |
1030 | * @parent: dentry to check. | 1082 | * @parent: start of walk |
1083 | * @data: data passed to @enter() and @finish() | ||
1084 | * @enter: callback when first entering the dentry | ||
1085 | * @finish: callback when successfully finished the walk | ||
1031 | * | 1086 | * |
1032 | * Return true if the parent or its subdirectories contain | 1087 | * The @enter() and @finish() callbacks are called with d_lock held. |
1033 | * a mount point | ||
1034 | */ | 1088 | */ |
1035 | int have_submounts(struct dentry *parent) | 1089 | static void d_walk(struct dentry *parent, void *data, |
1090 | enum d_walk_ret (*enter)(void *, struct dentry *), | ||
1091 | void (*finish)(void *)) | ||
1036 | { | 1092 | { |
1037 | struct dentry *this_parent; | 1093 | struct dentry *this_parent; |
1038 | struct list_head *next; | 1094 | struct list_head *next; |
1039 | unsigned seq; | 1095 | unsigned seq = 0; |
1040 | int locked = 0; | 1096 | enum d_walk_ret ret; |
1097 | bool retry = true; | ||
1041 | 1098 | ||
1042 | seq = read_seqbegin(&rename_lock); | ||
1043 | again: | 1099 | again: |
1100 | read_seqbegin_or_lock(&rename_lock, &seq); | ||
1044 | this_parent = parent; | 1101 | this_parent = parent; |
1045 | |||
1046 | if (d_mountpoint(parent)) | ||
1047 | goto positive; | ||
1048 | spin_lock(&this_parent->d_lock); | 1102 | spin_lock(&this_parent->d_lock); |
1103 | |||
1104 | ret = enter(data, this_parent); | ||
1105 | switch (ret) { | ||
1106 | case D_WALK_CONTINUE: | ||
1107 | break; | ||
1108 | case D_WALK_QUIT: | ||
1109 | case D_WALK_SKIP: | ||
1110 | goto out_unlock; | ||
1111 | case D_WALK_NORETRY: | ||
1112 | retry = false; | ||
1113 | break; | ||
1114 | } | ||
1049 | repeat: | 1115 | repeat: |
1050 | next = this_parent->d_subdirs.next; | 1116 | next = this_parent->d_subdirs.next; |
1051 | resume: | 1117 | resume: |
@@ -1055,12 +1121,22 @@ resume: | |||
1055 | next = tmp->next; | 1121 | next = tmp->next; |
1056 | 1122 | ||
1057 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 1123 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
1058 | /* Have we found a mount point ? */ | 1124 | |
1059 | if (d_mountpoint(dentry)) { | 1125 | ret = enter(data, dentry); |
1126 | switch (ret) { | ||
1127 | case D_WALK_CONTINUE: | ||
1128 | break; | ||
1129 | case D_WALK_QUIT: | ||
1060 | spin_unlock(&dentry->d_lock); | 1130 | spin_unlock(&dentry->d_lock); |
1061 | spin_unlock(&this_parent->d_lock); | 1131 | goto out_unlock; |
1062 | goto positive; | 1132 | case D_WALK_NORETRY: |
1133 | retry = false; | ||
1134 | break; | ||
1135 | case D_WALK_SKIP: | ||
1136 | spin_unlock(&dentry->d_lock); | ||
1137 | continue; | ||
1063 | } | 1138 | } |
1139 | |||
1064 | if (!list_empty(&dentry->d_subdirs)) { | 1140 | if (!list_empty(&dentry->d_subdirs)) { |
1065 | spin_unlock(&this_parent->d_lock); | 1141 | spin_unlock(&this_parent->d_lock); |
1066 | spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); | 1142 | spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); |
@@ -1075,35 +1151,99 @@ resume: | |||
1075 | */ | 1151 | */ |
1076 | if (this_parent != parent) { | 1152 | if (this_parent != parent) { |
1077 | struct dentry *child = this_parent; | 1153 | struct dentry *child = this_parent; |
1078 | this_parent = try_to_ascend(this_parent, locked, seq); | 1154 | this_parent = try_to_ascend(this_parent, seq); |
1079 | if (!this_parent) | 1155 | if (!this_parent) |
1080 | goto rename_retry; | 1156 | goto rename_retry; |
1081 | next = child->d_u.d_child.next; | 1157 | next = child->d_u.d_child.next; |
1082 | goto resume; | 1158 | goto resume; |
1083 | } | 1159 | } |
1084 | spin_unlock(&this_parent->d_lock); | 1160 | if (need_seqretry(&rename_lock, seq)) { |
1085 | if (!locked && read_seqretry(&rename_lock, seq)) | 1161 | spin_unlock(&this_parent->d_lock); |
1086 | goto rename_retry; | ||
1087 | if (locked) | ||
1088 | write_sequnlock(&rename_lock); | ||
1089 | return 0; /* No mount points found in tree */ | ||
1090 | positive: | ||
1091 | if (!locked && read_seqretry(&rename_lock, seq)) | ||
1092 | goto rename_retry; | 1162 | goto rename_retry; |
1093 | if (locked) | 1163 | } |
1094 | write_sequnlock(&rename_lock); | 1164 | if (finish) |
1095 | return 1; | 1165 | finish(data); |
1166 | |||
1167 | out_unlock: | ||
1168 | spin_unlock(&this_parent->d_lock); | ||
1169 | done_seqretry(&rename_lock, seq); | ||
1170 | return; | ||
1096 | 1171 | ||
1097 | rename_retry: | 1172 | rename_retry: |
1098 | if (locked) | 1173 | if (!retry) |
1099 | goto again; | 1174 | return; |
1100 | locked = 1; | 1175 | seq = 1; |
1101 | write_seqlock(&rename_lock); | ||
1102 | goto again; | 1176 | goto again; |
1103 | } | 1177 | } |
1178 | |||
1179 | /* | ||
1180 | * Search for at least 1 mount point in the dentry's subdirs. | ||
1181 | * We descend to the next level whenever the d_subdirs | ||
1182 | * list is non-empty and continue searching. | ||
1183 | */ | ||
1184 | |||
1185 | /** | ||
1186 | * have_submounts - check for mounts over a dentry | ||
1187 | * @parent: dentry to check. | ||
1188 | * | ||
1189 | * Return true if the parent or its subdirectories contain | ||
1190 | * a mount point | ||
1191 | */ | ||
1192 | |||
1193 | static enum d_walk_ret check_mount(void *data, struct dentry *dentry) | ||
1194 | { | ||
1195 | int *ret = data; | ||
1196 | if (d_mountpoint(dentry)) { | ||
1197 | *ret = 1; | ||
1198 | return D_WALK_QUIT; | ||
1199 | } | ||
1200 | return D_WALK_CONTINUE; | ||
1201 | } | ||
1202 | |||
1203 | int have_submounts(struct dentry *parent) | ||
1204 | { | ||
1205 | int ret = 0; | ||
1206 | |||
1207 | d_walk(parent, &ret, check_mount, NULL); | ||
1208 | |||
1209 | return ret; | ||
1210 | } | ||
1104 | EXPORT_SYMBOL(have_submounts); | 1211 | EXPORT_SYMBOL(have_submounts); |
1105 | 1212 | ||
1106 | /* | 1213 | /* |
1214 | * Called by mount code to set a mountpoint and check if the mountpoint is | ||
1215 | * reachable (e.g. NFS can unhash a directory dentry and then the complete | ||
1216 | * subtree can become unreachable). | ||
1217 | * | ||
1218 | * Only one of check_submounts_and_drop() and d_set_mounted() must succeed. For | ||
1219 | * this reason take rename_lock and d_lock on dentry and ancestors. | ||
1220 | */ | ||
1221 | int d_set_mounted(struct dentry *dentry) | ||
1222 | { | ||
1223 | struct dentry *p; | ||
1224 | int ret = -ENOENT; | ||
1225 | write_seqlock(&rename_lock); | ||
1226 | for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { | ||
1227 | /* Need exclusion wrt. check_submounts_and_drop() */ | ||
1228 | spin_lock(&p->d_lock); | ||
1229 | if (unlikely(d_unhashed(p))) { | ||
1230 | spin_unlock(&p->d_lock); | ||
1231 | goto out; | ||
1232 | } | ||
1233 | spin_unlock(&p->d_lock); | ||
1234 | } | ||
1235 | spin_lock(&dentry->d_lock); | ||
1236 | if (!d_unlinked(dentry)) { | ||
1237 | dentry->d_flags |= DCACHE_MOUNTED; | ||
1238 | ret = 0; | ||
1239 | } | ||
1240 | spin_unlock(&dentry->d_lock); | ||
1241 | out: | ||
1242 | write_sequnlock(&rename_lock); | ||
1243 | return ret; | ||
1244 | } | ||
1245 | |||
1246 | /* | ||
1107 | * Search the dentry child list of the specified parent, | 1247 | * Search the dentry child list of the specified parent, |
1108 | * and move any unused dentries to the end of the unused | 1248 | * and move any unused dentries to the end of the unused |
1109 | * list for prune_dcache(). We descend to the next level | 1249 | * list for prune_dcache(). We descend to the next level |
@@ -1117,93 +1257,46 @@ EXPORT_SYMBOL(have_submounts); | |||
1117 | * drop the lock and return early due to latency | 1257 | * drop the lock and return early due to latency |
1118 | * constraints. | 1258 | * constraints. |
1119 | */ | 1259 | */ |
1120 | static int select_parent(struct dentry *parent, struct list_head *dispose) | ||
1121 | { | ||
1122 | struct dentry *this_parent; | ||
1123 | struct list_head *next; | ||
1124 | unsigned seq; | ||
1125 | int found = 0; | ||
1126 | int locked = 0; | ||
1127 | |||
1128 | seq = read_seqbegin(&rename_lock); | ||
1129 | again: | ||
1130 | this_parent = parent; | ||
1131 | spin_lock(&this_parent->d_lock); | ||
1132 | repeat: | ||
1133 | next = this_parent->d_subdirs.next; | ||
1134 | resume: | ||
1135 | while (next != &this_parent->d_subdirs) { | ||
1136 | struct list_head *tmp = next; | ||
1137 | struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); | ||
1138 | next = tmp->next; | ||
1139 | 1260 | ||
1140 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 1261 | struct select_data { |
1262 | struct dentry *start; | ||
1263 | struct list_head dispose; | ||
1264 | int found; | ||
1265 | }; | ||
1141 | 1266 | ||
1142 | /* | 1267 | static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) |
1143 | * move only zero ref count dentries to the dispose list. | 1268 | { |
1144 | * | 1269 | struct select_data *data = _data; |
1145 | * Those which are presently on the shrink list, being processed | 1270 | enum d_walk_ret ret = D_WALK_CONTINUE; |
1146 | * by shrink_dentry_list(), shouldn't be moved. Otherwise the | ||
1147 | * loop in shrink_dcache_parent() might not make any progress | ||
1148 | * and loop forever. | ||
1149 | */ | ||
1150 | if (dentry->d_count) { | ||
1151 | dentry_lru_del(dentry); | ||
1152 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { | ||
1153 | dentry_lru_move_list(dentry, dispose); | ||
1154 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
1155 | found++; | ||
1156 | } | ||
1157 | /* | ||
1158 | * We can return to the caller if we have found some (this | ||
1159 | * ensures forward progress). We'll be coming back to find | ||
1160 | * the rest. | ||
1161 | */ | ||
1162 | if (found && need_resched()) { | ||
1163 | spin_unlock(&dentry->d_lock); | ||
1164 | goto out; | ||
1165 | } | ||
1166 | 1271 | ||
1167 | /* | 1272 | if (data->start == dentry) |
1168 | * Descend a level if the d_subdirs list is non-empty. | 1273 | goto out; |
1169 | */ | ||
1170 | if (!list_empty(&dentry->d_subdirs)) { | ||
1171 | spin_unlock(&this_parent->d_lock); | ||
1172 | spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); | ||
1173 | this_parent = dentry; | ||
1174 | spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); | ||
1175 | goto repeat; | ||
1176 | } | ||
1177 | 1274 | ||
1178 | spin_unlock(&dentry->d_lock); | ||
1179 | } | ||
1180 | /* | 1275 | /* |
1181 | * All done at this level ... ascend and resume the search. | 1276 | * move only zero ref count dentries to the dispose list. |
1277 | * | ||
1278 | * Those which are presently on the shrink list, being processed | ||
1279 | * by shrink_dentry_list(), shouldn't be moved. Otherwise the | ||
1280 | * loop in shrink_dcache_parent() might not make any progress | ||
1281 | * and loop forever. | ||
1182 | */ | 1282 | */ |
1183 | if (this_parent != parent) { | 1283 | if (dentry->d_lockref.count) { |
1184 | struct dentry *child = this_parent; | 1284 | dentry_lru_del(dentry); |
1185 | this_parent = try_to_ascend(this_parent, locked, seq); | 1285 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { |
1186 | if (!this_parent) | 1286 | dentry_lru_move_list(dentry, &data->dispose); |
1187 | goto rename_retry; | 1287 | dentry->d_flags |= DCACHE_SHRINK_LIST; |
1188 | next = child->d_u.d_child.next; | 1288 | data->found++; |
1189 | goto resume; | 1289 | ret = D_WALK_NORETRY; |
1190 | } | 1290 | } |
1291 | /* | ||
1292 | * We can return to the caller if we have found some (this | ||
1293 | * ensures forward progress). We'll be coming back to find | ||
1294 | * the rest. | ||
1295 | */ | ||
1296 | if (data->found && need_resched()) | ||
1297 | ret = D_WALK_QUIT; | ||
1191 | out: | 1298 | out: |
1192 | spin_unlock(&this_parent->d_lock); | 1299 | return ret; |
1193 | if (!locked && read_seqretry(&rename_lock, seq)) | ||
1194 | goto rename_retry; | ||
1195 | if (locked) | ||
1196 | write_sequnlock(&rename_lock); | ||
1197 | return found; | ||
1198 | |||
1199 | rename_retry: | ||
1200 | if (found) | ||
1201 | return found; | ||
1202 | if (locked) | ||
1203 | goto again; | ||
1204 | locked = 1; | ||
1205 | write_seqlock(&rename_lock); | ||
1206 | goto again; | ||
1207 | } | 1300 | } |
1208 | 1301 | ||
1209 | /** | 1302 | /** |
@@ -1212,18 +1305,90 @@ rename_retry: | |||
1212 | * | 1305 | * |
1213 | * Prune the dcache to remove unused children of the parent dentry. | 1306 | * Prune the dcache to remove unused children of the parent dentry. |
1214 | */ | 1307 | */ |
1215 | void shrink_dcache_parent(struct dentry * parent) | 1308 | void shrink_dcache_parent(struct dentry *parent) |
1216 | { | 1309 | { |
1217 | LIST_HEAD(dispose); | 1310 | for (;;) { |
1218 | int found; | 1311 | struct select_data data; |
1312 | |||
1313 | INIT_LIST_HEAD(&data.dispose); | ||
1314 | data.start = parent; | ||
1315 | data.found = 0; | ||
1219 | 1316 | ||
1220 | while ((found = select_parent(parent, &dispose)) != 0) { | 1317 | d_walk(parent, &data, select_collect, NULL); |
1221 | shrink_dentry_list(&dispose); | 1318 | if (!data.found) |
1319 | break; | ||
1320 | |||
1321 | shrink_dentry_list(&data.dispose); | ||
1222 | cond_resched(); | 1322 | cond_resched(); |
1223 | } | 1323 | } |
1224 | } | 1324 | } |
1225 | EXPORT_SYMBOL(shrink_dcache_parent); | 1325 | EXPORT_SYMBOL(shrink_dcache_parent); |
1226 | 1326 | ||
1327 | static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) | ||
1328 | { | ||
1329 | struct select_data *data = _data; | ||
1330 | |||
1331 | if (d_mountpoint(dentry)) { | ||
1332 | data->found = -EBUSY; | ||
1333 | return D_WALK_QUIT; | ||
1334 | } | ||
1335 | |||
1336 | return select_collect(_data, dentry); | ||
1337 | } | ||
1338 | |||
1339 | static void check_and_drop(void *_data) | ||
1340 | { | ||
1341 | struct select_data *data = _data; | ||
1342 | |||
1343 | if (d_mountpoint(data->start)) | ||
1344 | data->found = -EBUSY; | ||
1345 | if (!data->found) | ||
1346 | __d_drop(data->start); | ||
1347 | } | ||
1348 | |||
1349 | /** | ||
1350 | * check_submounts_and_drop - prune dcache, check for submounts and drop | ||
1351 | * | ||
1352 | * All done as a single atomic operation relative to has_unlinked_ancestor(). | ||
1353 | * Returns 0 if successfully unhashed @parent. If there were submounts then | ||
1354 | * return -EBUSY. | ||
1355 | * | ||
1356 | * @dentry: dentry to prune and drop | ||
1357 | */ | ||
1358 | int check_submounts_and_drop(struct dentry *dentry) | ||
1359 | { | ||
1360 | int ret = 0; | ||
1361 | |||
1362 | /* Negative dentries can be dropped without further checks */ | ||
1363 | if (!dentry->d_inode) { | ||
1364 | d_drop(dentry); | ||
1365 | goto out; | ||
1366 | } | ||
1367 | |||
1368 | for (;;) { | ||
1369 | struct select_data data; | ||
1370 | |||
1371 | INIT_LIST_HEAD(&data.dispose); | ||
1372 | data.start = dentry; | ||
1373 | data.found = 0; | ||
1374 | |||
1375 | d_walk(dentry, &data, check_and_collect, check_and_drop); | ||
1376 | ret = data.found; | ||
1377 | |||
1378 | if (!list_empty(&data.dispose)) | ||
1379 | shrink_dentry_list(&data.dispose); | ||
1380 | |||
1381 | if (ret <= 0) | ||
1382 | break; | ||
1383 | |||
1384 | cond_resched(); | ||
1385 | } | ||
1386 | |||
1387 | out: | ||
1388 | return ret; | ||
1389 | } | ||
1390 | EXPORT_SYMBOL(check_submounts_and_drop); | ||
1391 | |||
1227 | /** | 1392 | /** |
1228 | * __d_alloc - allocate a dcache entry | 1393 | * __d_alloc - allocate a dcache entry |
1229 | * @sb: filesystem it will belong to | 1394 | * @sb: filesystem it will belong to |
@@ -1269,7 +1434,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1269 | smp_wmb(); | 1434 | smp_wmb(); |
1270 | dentry->d_name.name = dname; | 1435 | dentry->d_name.name = dname; |
1271 | 1436 | ||
1272 | dentry->d_count = 1; | 1437 | dentry->d_lockref.count = 1; |
1273 | dentry->d_flags = 0; | 1438 | dentry->d_flags = 0; |
1274 | spin_lock_init(&dentry->d_lock); | 1439 | spin_lock_init(&dentry->d_lock); |
1275 | seqcount_init(&dentry->d_seq); | 1440 | seqcount_init(&dentry->d_seq); |
@@ -1782,7 +1947,7 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
1782 | * without taking d_lock and checking d_seq sequence count against @seq | 1947 | * without taking d_lock and checking d_seq sequence count against @seq |
1783 | * returned here. | 1948 | * returned here. |
1784 | * | 1949 | * |
1785 | * A refcount may be taken on the found dentry with the __d_rcu_to_refcount | 1950 | * A refcount may be taken on the found dentry with the d_rcu_to_refcount |
1786 | * function. | 1951 | * function. |
1787 | * | 1952 | * |
1788 | * Alternatively, __d_lookup_rcu may be called again to look up the child of | 1953 | * Alternatively, __d_lookup_rcu may be called again to look up the child of |
@@ -1970,7 +2135,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) | |||
1970 | goto next; | 2135 | goto next; |
1971 | } | 2136 | } |
1972 | 2137 | ||
1973 | dentry->d_count++; | 2138 | dentry->d_lockref.count++; |
1974 | found = dentry; | 2139 | found = dentry; |
1975 | spin_unlock(&dentry->d_lock); | 2140 | spin_unlock(&dentry->d_lock); |
1976 | break; | 2141 | break; |
@@ -2069,7 +2234,7 @@ again: | |||
2069 | spin_lock(&dentry->d_lock); | 2234 | spin_lock(&dentry->d_lock); |
2070 | inode = dentry->d_inode; | 2235 | inode = dentry->d_inode; |
2071 | isdir = S_ISDIR(inode->i_mode); | 2236 | isdir = S_ISDIR(inode->i_mode); |
2072 | if (dentry->d_count == 1) { | 2237 | if (dentry->d_lockref.count == 1) { |
2073 | if (!spin_trylock(&inode->i_lock)) { | 2238 | if (!spin_trylock(&inode->i_lock)) { |
2074 | spin_unlock(&dentry->d_lock); | 2239 | spin_unlock(&dentry->d_lock); |
2075 | cpu_relax(); | 2240 | cpu_relax(); |
@@ -2506,9 +2671,39 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) | |||
2506 | return 0; | 2671 | return 0; |
2507 | } | 2672 | } |
2508 | 2673 | ||
2674 | /** | ||
2675 | * prepend_name - prepend a pathname in front of current buffer pointer | ||
2676 | * @buffer: buffer pointer | ||
2677 | * @buflen: allocated length of the buffer | ||
2678 | * @name: name string and length qstr structure | ||
2679 | * | ||
2680 | * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to | ||
2681 | * make sure that either the old or the new name pointer and length are | ||
2682 | * fetched. However, there may be mismatch between length and pointer. | ||
2683 | * The length cannot be trusted, we need to copy it byte-by-byte until | ||
2684 | * the length is reached or a null byte is found. It also prepends "/" at | ||
2685 | * the beginning of the name. The sequence number check at the caller will | ||
2686 | * retry it again when a d_move() does happen. So any garbage in the buffer | ||
2687 | * due to mismatched pointer and length will be discarded. | ||
2688 | */ | ||
2509 | static int prepend_name(char **buffer, int *buflen, struct qstr *name) | 2689 | static int prepend_name(char **buffer, int *buflen, struct qstr *name) |
2510 | { | 2690 | { |
2511 | return prepend(buffer, buflen, name->name, name->len); | 2691 | const char *dname = ACCESS_ONCE(name->name); |
2692 | u32 dlen = ACCESS_ONCE(name->len); | ||
2693 | char *p; | ||
2694 | |||
2695 | if (*buflen < dlen + 1) | ||
2696 | return -ENAMETOOLONG; | ||
2697 | *buflen -= dlen + 1; | ||
2698 | p = *buffer -= dlen + 1; | ||
2699 | *p++ = '/'; | ||
2700 | while (dlen--) { | ||
2701 | char c = *dname++; | ||
2702 | if (!c) | ||
2703 | break; | ||
2704 | *p++ = c; | ||
2705 | } | ||
2706 | return 0; | ||
2512 | } | 2707 | } |
2513 | 2708 | ||
2514 | /** | 2709 | /** |
@@ -2518,7 +2713,15 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) | |||
2518 | * @buffer: pointer to the end of the buffer | 2713 | * @buffer: pointer to the end of the buffer |
2519 | * @buflen: pointer to buffer length | 2714 | * @buflen: pointer to buffer length |
2520 | * | 2715 | * |
2521 | * Caller holds the rename_lock. | 2716 | * The function will first try to write out the pathname without taking any |
2717 | * lock other than the RCU read lock to make sure that dentries won't go away. | ||
2718 | * It only checks the sequence number of the global rename_lock as any change | ||
2719 | * in the dentry's d_seq will be preceded by changes in the rename_lock | ||
2720 | * sequence number. If the sequence number had been changed, it will restart | ||
2721 | * the whole pathname back-tracing sequence again by taking the rename_lock. | ||
2722 | * In this case, there is no need to take the RCU read lock as the recursive | ||
2723 | * parent pointer references will keep the dentry chain alive as long as no | ||
2724 | * rename operation is performed. | ||
2522 | */ | 2725 | */ |
2523 | static int prepend_path(const struct path *path, | 2726 | static int prepend_path(const struct path *path, |
2524 | const struct path *root, | 2727 | const struct path *root, |
@@ -2527,54 +2730,66 @@ static int prepend_path(const struct path *path, | |||
2527 | struct dentry *dentry = path->dentry; | 2730 | struct dentry *dentry = path->dentry; |
2528 | struct vfsmount *vfsmnt = path->mnt; | 2731 | struct vfsmount *vfsmnt = path->mnt; |
2529 | struct mount *mnt = real_mount(vfsmnt); | 2732 | struct mount *mnt = real_mount(vfsmnt); |
2530 | bool slash = false; | ||
2531 | int error = 0; | 2733 | int error = 0; |
2734 | unsigned seq = 0; | ||
2735 | char *bptr; | ||
2736 | int blen; | ||
2532 | 2737 | ||
2738 | rcu_read_lock(); | ||
2739 | restart: | ||
2740 | bptr = *buffer; | ||
2741 | blen = *buflen; | ||
2742 | read_seqbegin_or_lock(&rename_lock, &seq); | ||
2533 | while (dentry != root->dentry || vfsmnt != root->mnt) { | 2743 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
2534 | struct dentry * parent; | 2744 | struct dentry * parent; |
2535 | 2745 | ||
2536 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { | 2746 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { |
2537 | /* Global root? */ | 2747 | /* Global root? */ |
2538 | if (!mnt_has_parent(mnt)) | 2748 | if (mnt_has_parent(mnt)) { |
2539 | goto global_root; | 2749 | dentry = mnt->mnt_mountpoint; |
2540 | dentry = mnt->mnt_mountpoint; | 2750 | mnt = mnt->mnt_parent; |
2541 | mnt = mnt->mnt_parent; | 2751 | vfsmnt = &mnt->mnt; |
2542 | vfsmnt = &mnt->mnt; | 2752 | continue; |
2543 | continue; | 2753 | } |
2754 | /* | ||
2755 | * Filesystems needing to implement special "root names" | ||
2756 | * should do so with ->d_dname() | ||
2757 | */ | ||
2758 | if (IS_ROOT(dentry) && | ||
2759 | (dentry->d_name.len != 1 || | ||
2760 | dentry->d_name.name[0] != '/')) { | ||
2761 | WARN(1, "Root dentry has weird name <%.*s>\n", | ||
2762 | (int) dentry->d_name.len, | ||
2763 | dentry->d_name.name); | ||
2764 | } | ||
2765 | if (!error) | ||
2766 | error = is_mounted(vfsmnt) ? 1 : 2; | ||
2767 | break; | ||
2544 | } | 2768 | } |
2545 | parent = dentry->d_parent; | 2769 | parent = dentry->d_parent; |
2546 | prefetch(parent); | 2770 | prefetch(parent); |
2547 | spin_lock(&dentry->d_lock); | 2771 | error = prepend_name(&bptr, &blen, &dentry->d_name); |
2548 | error = prepend_name(buffer, buflen, &dentry->d_name); | ||
2549 | spin_unlock(&dentry->d_lock); | ||
2550 | if (!error) | ||
2551 | error = prepend(buffer, buflen, "/", 1); | ||
2552 | if (error) | 2772 | if (error) |
2553 | break; | 2773 | break; |
2554 | 2774 | ||
2555 | slash = true; | ||
2556 | dentry = parent; | 2775 | dentry = parent; |
2557 | } | 2776 | } |
2777 | if (!(seq & 1)) | ||
2778 | rcu_read_unlock(); | ||
2779 | if (need_seqretry(&rename_lock, seq)) { | ||
2780 | seq = 1; | ||
2781 | goto restart; | ||
2782 | } | ||
2783 | done_seqretry(&rename_lock, seq); | ||
2558 | 2784 | ||
2559 | if (!error && !slash) | 2785 | if (error >= 0 && bptr == *buffer) { |
2560 | error = prepend(buffer, buflen, "/", 1); | 2786 | if (--blen < 0) |
2561 | 2787 | error = -ENAMETOOLONG; | |
2562 | return error; | 2788 | else |
2563 | 2789 | *--bptr = '/'; | |
2564 | global_root: | 2790 | } |
2565 | /* | 2791 | *buffer = bptr; |
2566 | * Filesystems needing to implement special "root names" | 2792 | *buflen = blen; |
2567 | * should do so with ->d_dname() | ||
2568 | */ | ||
2569 | if (IS_ROOT(dentry) && | ||
2570 | (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { | ||
2571 | WARN(1, "Root dentry has weird name <%.*s>\n", | ||
2572 | (int) dentry->d_name.len, dentry->d_name.name); | ||
2573 | } | ||
2574 | if (!slash) | ||
2575 | error = prepend(buffer, buflen, "/", 1); | ||
2576 | if (!error) | ||
2577 | error = is_mounted(vfsmnt) ? 1 : 2; | ||
2578 | return error; | 2793 | return error; |
2579 | } | 2794 | } |
2580 | 2795 | ||
@@ -2603,9 +2818,7 @@ char *__d_path(const struct path *path, | |||
2603 | 2818 | ||
2604 | prepend(&res, &buflen, "\0", 1); | 2819 | prepend(&res, &buflen, "\0", 1); |
2605 | br_read_lock(&vfsmount_lock); | 2820 | br_read_lock(&vfsmount_lock); |
2606 | write_seqlock(&rename_lock); | ||
2607 | error = prepend_path(path, root, &res, &buflen); | 2821 | error = prepend_path(path, root, &res, &buflen); |
2608 | write_sequnlock(&rename_lock); | ||
2609 | br_read_unlock(&vfsmount_lock); | 2822 | br_read_unlock(&vfsmount_lock); |
2610 | 2823 | ||
2611 | if (error < 0) | 2824 | if (error < 0) |
@@ -2624,9 +2837,7 @@ char *d_absolute_path(const struct path *path, | |||
2624 | 2837 | ||
2625 | prepend(&res, &buflen, "\0", 1); | 2838 | prepend(&res, &buflen, "\0", 1); |
2626 | br_read_lock(&vfsmount_lock); | 2839 | br_read_lock(&vfsmount_lock); |
2627 | write_seqlock(&rename_lock); | ||
2628 | error = prepend_path(path, &root, &res, &buflen); | 2840 | error = prepend_path(path, &root, &res, &buflen); |
2629 | write_sequnlock(&rename_lock); | ||
2630 | br_read_unlock(&vfsmount_lock); | 2841 | br_read_unlock(&vfsmount_lock); |
2631 | 2842 | ||
2632 | if (error > 1) | 2843 | if (error > 1) |
@@ -2692,9 +2903,7 @@ char *d_path(const struct path *path, char *buf, int buflen) | |||
2692 | 2903 | ||
2693 | get_fs_root(current->fs, &root); | 2904 | get_fs_root(current->fs, &root); |
2694 | br_read_lock(&vfsmount_lock); | 2905 | br_read_lock(&vfsmount_lock); |
2695 | write_seqlock(&rename_lock); | ||
2696 | error = path_with_deleted(path, &root, &res, &buflen); | 2906 | error = path_with_deleted(path, &root, &res, &buflen); |
2697 | write_sequnlock(&rename_lock); | ||
2698 | br_read_unlock(&vfsmount_lock); | 2907 | br_read_unlock(&vfsmount_lock); |
2699 | if (error < 0) | 2908 | if (error < 0) |
2700 | res = ERR_PTR(error); | 2909 | res = ERR_PTR(error); |
@@ -2729,10 +2938,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) | |||
2729 | char *end = buffer + buflen; | 2938 | char *end = buffer + buflen; |
2730 | /* these dentries are never renamed, so d_lock is not needed */ | 2939 | /* these dentries are never renamed, so d_lock is not needed */ |
2731 | if (prepend(&end, &buflen, " (deleted)", 11) || | 2940 | if (prepend(&end, &buflen, " (deleted)", 11) || |
2732 | prepend_name(&end, &buflen, &dentry->d_name) || | 2941 | prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || |
2733 | prepend(&end, &buflen, "/", 1)) | 2942 | prepend(&end, &buflen, "/", 1)) |
2734 | end = ERR_PTR(-ENAMETOOLONG); | 2943 | end = ERR_PTR(-ENAMETOOLONG); |
2735 | return end; | 2944 | return end; |
2736 | } | 2945 | } |
2737 | 2946 | ||
2738 | /* | 2947 | /* |
@@ -2740,30 +2949,42 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) | |||
2740 | */ | 2949 | */ |
2741 | static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) | 2950 | static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) |
2742 | { | 2951 | { |
2743 | char *end = buf + buflen; | 2952 | char *end, *retval; |
2744 | char *retval; | 2953 | int len, seq = 0; |
2954 | int error = 0; | ||
2745 | 2955 | ||
2746 | prepend(&end, &buflen, "\0", 1); | 2956 | rcu_read_lock(); |
2957 | restart: | ||
2958 | end = buf + buflen; | ||
2959 | len = buflen; | ||
2960 | prepend(&end, &len, "\0", 1); | ||
2747 | if (buflen < 1) | 2961 | if (buflen < 1) |
2748 | goto Elong; | 2962 | goto Elong; |
2749 | /* Get '/' right */ | 2963 | /* Get '/' right */ |
2750 | retval = end-1; | 2964 | retval = end-1; |
2751 | *retval = '/'; | 2965 | *retval = '/'; |
2752 | 2966 | read_seqbegin_or_lock(&rename_lock, &seq); | |
2753 | while (!IS_ROOT(dentry)) { | 2967 | while (!IS_ROOT(dentry)) { |
2754 | struct dentry *parent = dentry->d_parent; | 2968 | struct dentry *parent = dentry->d_parent; |
2755 | int error; | 2969 | int error; |
2756 | 2970 | ||
2757 | prefetch(parent); | 2971 | prefetch(parent); |
2758 | spin_lock(&dentry->d_lock); | 2972 | error = prepend_name(&end, &len, &dentry->d_name); |
2759 | error = prepend_name(&end, &buflen, &dentry->d_name); | 2973 | if (error) |
2760 | spin_unlock(&dentry->d_lock); | 2974 | break; |
2761 | if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) | ||
2762 | goto Elong; | ||
2763 | 2975 | ||
2764 | retval = end; | 2976 | retval = end; |
2765 | dentry = parent; | 2977 | dentry = parent; |
2766 | } | 2978 | } |
2979 | if (!(seq & 1)) | ||
2980 | rcu_read_unlock(); | ||
2981 | if (need_seqretry(&rename_lock, seq)) { | ||
2982 | seq = 1; | ||
2983 | goto restart; | ||
2984 | } | ||
2985 | done_seqretry(&rename_lock, seq); | ||
2986 | if (error) | ||
2987 | goto Elong; | ||
2767 | return retval; | 2988 | return retval; |
2768 | Elong: | 2989 | Elong: |
2769 | return ERR_PTR(-ENAMETOOLONG); | 2990 | return ERR_PTR(-ENAMETOOLONG); |
@@ -2771,13 +2992,7 @@ Elong: | |||
2771 | 2992 | ||
2772 | char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) | 2993 | char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) |
2773 | { | 2994 | { |
2774 | char *retval; | 2995 | return __dentry_path(dentry, buf, buflen); |
2775 | |||
2776 | write_seqlock(&rename_lock); | ||
2777 | retval = __dentry_path(dentry, buf, buflen); | ||
2778 | write_sequnlock(&rename_lock); | ||
2779 | |||
2780 | return retval; | ||
2781 | } | 2996 | } |
2782 | EXPORT_SYMBOL(dentry_path_raw); | 2997 | EXPORT_SYMBOL(dentry_path_raw); |
2783 | 2998 | ||
@@ -2786,7 +3001,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) | |||
2786 | char *p = NULL; | 3001 | char *p = NULL; |
2787 | char *retval; | 3002 | char *retval; |
2788 | 3003 | ||
2789 | write_seqlock(&rename_lock); | ||
2790 | if (d_unlinked(dentry)) { | 3004 | if (d_unlinked(dentry)) { |
2791 | p = buf + buflen; | 3005 | p = buf + buflen; |
2792 | if (prepend(&p, &buflen, "//deleted", 10) != 0) | 3006 | if (prepend(&p, &buflen, "//deleted", 10) != 0) |
@@ -2794,7 +3008,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) | |||
2794 | buflen++; | 3008 | buflen++; |
2795 | } | 3009 | } |
2796 | retval = __dentry_path(dentry, buf, buflen); | 3010 | retval = __dentry_path(dentry, buf, buflen); |
2797 | write_sequnlock(&rename_lock); | ||
2798 | if (!IS_ERR(retval) && p) | 3011 | if (!IS_ERR(retval) && p) |
2799 | *p = '/'; /* restore '/' overriden with '\0' */ | 3012 | *p = '/'; /* restore '/' overriden with '\0' */ |
2800 | return retval; | 3013 | return retval; |
@@ -2802,6 +3015,18 @@ Elong: | |||
2802 | return ERR_PTR(-ENAMETOOLONG); | 3015 | return ERR_PTR(-ENAMETOOLONG); |
2803 | } | 3016 | } |
2804 | 3017 | ||
3018 | static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, | ||
3019 | struct path *pwd) | ||
3020 | { | ||
3021 | unsigned seq; | ||
3022 | |||
3023 | do { | ||
3024 | seq = read_seqcount_begin(&fs->seq); | ||
3025 | *root = fs->root; | ||
3026 | *pwd = fs->pwd; | ||
3027 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
3028 | } | ||
3029 | |||
2805 | /* | 3030 | /* |
2806 | * NOTE! The user-level library version returns a | 3031 | * NOTE! The user-level library version returns a |
2807 | * character pointer. The kernel system call just | 3032 | * character pointer. The kernel system call just |
@@ -2829,11 +3054,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
2829 | if (!page) | 3054 | if (!page) |
2830 | return -ENOMEM; | 3055 | return -ENOMEM; |
2831 | 3056 | ||
2832 | get_fs_root_and_pwd(current->fs, &root, &pwd); | 3057 | rcu_read_lock(); |
3058 | get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); | ||
2833 | 3059 | ||
2834 | error = -ENOENT; | 3060 | error = -ENOENT; |
2835 | br_read_lock(&vfsmount_lock); | 3061 | br_read_lock(&vfsmount_lock); |
2836 | write_seqlock(&rename_lock); | ||
2837 | if (!d_unlinked(pwd.dentry)) { | 3062 | if (!d_unlinked(pwd.dentry)) { |
2838 | unsigned long len; | 3063 | unsigned long len; |
2839 | char *cwd = page + PAGE_SIZE; | 3064 | char *cwd = page + PAGE_SIZE; |
@@ -2841,7 +3066,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
2841 | 3066 | ||
2842 | prepend(&cwd, &buflen, "\0", 1); | 3067 | prepend(&cwd, &buflen, "\0", 1); |
2843 | error = prepend_path(&pwd, &root, &cwd, &buflen); | 3068 | error = prepend_path(&pwd, &root, &cwd, &buflen); |
2844 | write_sequnlock(&rename_lock); | ||
2845 | br_read_unlock(&vfsmount_lock); | 3069 | br_read_unlock(&vfsmount_lock); |
2846 | 3070 | ||
2847 | if (error < 0) | 3071 | if (error < 0) |
@@ -2862,13 +3086,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
2862 | error = -EFAULT; | 3086 | error = -EFAULT; |
2863 | } | 3087 | } |
2864 | } else { | 3088 | } else { |
2865 | write_sequnlock(&rename_lock); | ||
2866 | br_read_unlock(&vfsmount_lock); | 3089 | br_read_unlock(&vfsmount_lock); |
2867 | } | 3090 | } |
2868 | 3091 | ||
2869 | out: | 3092 | out: |
2870 | path_put(&pwd); | 3093 | rcu_read_unlock(); |
2871 | path_put(&root); | ||
2872 | free_page((unsigned long) page); | 3094 | free_page((unsigned long) page); |
2873 | return error; | 3095 | return error; |
2874 | } | 3096 | } |
@@ -2915,68 +3137,24 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) | |||
2915 | return result; | 3137 | return result; |
2916 | } | 3138 | } |
2917 | 3139 | ||
2918 | void d_genocide(struct dentry *root) | 3140 | static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) |
2919 | { | 3141 | { |
2920 | struct dentry *this_parent; | 3142 | struct dentry *root = data; |
2921 | struct list_head *next; | 3143 | if (dentry != root) { |
2922 | unsigned seq; | 3144 | if (d_unhashed(dentry) || !dentry->d_inode) |
2923 | int locked = 0; | 3145 | return D_WALK_SKIP; |
2924 | |||
2925 | seq = read_seqbegin(&rename_lock); | ||
2926 | again: | ||
2927 | this_parent = root; | ||
2928 | spin_lock(&this_parent->d_lock); | ||
2929 | repeat: | ||
2930 | next = this_parent->d_subdirs.next; | ||
2931 | resume: | ||
2932 | while (next != &this_parent->d_subdirs) { | ||
2933 | struct list_head *tmp = next; | ||
2934 | struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); | ||
2935 | next = tmp->next; | ||
2936 | 3146 | ||
2937 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
2938 | if (d_unhashed(dentry) || !dentry->d_inode) { | ||
2939 | spin_unlock(&dentry->d_lock); | ||
2940 | continue; | ||
2941 | } | ||
2942 | if (!list_empty(&dentry->d_subdirs)) { | ||
2943 | spin_unlock(&this_parent->d_lock); | ||
2944 | spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); | ||
2945 | this_parent = dentry; | ||
2946 | spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); | ||
2947 | goto repeat; | ||
2948 | } | ||
2949 | if (!(dentry->d_flags & DCACHE_GENOCIDE)) { | 3147 | if (!(dentry->d_flags & DCACHE_GENOCIDE)) { |
2950 | dentry->d_flags |= DCACHE_GENOCIDE; | 3148 | dentry->d_flags |= DCACHE_GENOCIDE; |
2951 | dentry->d_count--; | 3149 | dentry->d_lockref.count--; |
2952 | } | ||
2953 | spin_unlock(&dentry->d_lock); | ||
2954 | } | ||
2955 | if (this_parent != root) { | ||
2956 | struct dentry *child = this_parent; | ||
2957 | if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { | ||
2958 | this_parent->d_flags |= DCACHE_GENOCIDE; | ||
2959 | this_parent->d_count--; | ||
2960 | } | 3150 | } |
2961 | this_parent = try_to_ascend(this_parent, locked, seq); | ||
2962 | if (!this_parent) | ||
2963 | goto rename_retry; | ||
2964 | next = child->d_u.d_child.next; | ||
2965 | goto resume; | ||
2966 | } | 3151 | } |
2967 | spin_unlock(&this_parent->d_lock); | 3152 | return D_WALK_CONTINUE; |
2968 | if (!locked && read_seqretry(&rename_lock, seq)) | 3153 | } |
2969 | goto rename_retry; | ||
2970 | if (locked) | ||
2971 | write_sequnlock(&rename_lock); | ||
2972 | return; | ||
2973 | 3154 | ||
2974 | rename_retry: | 3155 | void d_genocide(struct dentry *parent) |
2975 | if (locked) | 3156 | { |
2976 | goto again; | 3157 | d_walk(parent, parent, d_genocide_kill, NULL); |
2977 | locked = 1; | ||
2978 | write_seqlock(&rename_lock); | ||
2979 | goto again; | ||
2980 | } | 3158 | } |
2981 | 3159 | ||
2982 | void d_tmpfile(struct dentry *dentry, struct inode *inode) | 3160 | void d_tmpfile(struct dentry *dentry, struct inode *inode) |