aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c762
1 files changed, 470 insertions, 292 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 83cfb834db03..99d4d7226203 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock);
88 88
89static struct kmem_cache *dentry_cache __read_mostly; 89static struct kmem_cache *dentry_cache __read_mostly;
90 90
91/**
92 * read_seqbegin_or_lock - begin a sequence number check or locking block
93 * @lock: sequence lock
94 * @seq : sequence number to be checked
95 *
96 * First try it once optimistically without taking the lock. If that fails,
97 * take the lock. The sequence number is also used as a marker for deciding
98 * whether to be a reader (even) or writer (odd).
99 * N.B. seq must be initialized to an even number to begin with.
100 */
101static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
102{
103 if (!(*seq & 1)) /* Even */
104 *seq = read_seqbegin(lock);
105 else /* Odd */
106 read_seqlock_excl(lock);
107}
108
109static inline int need_seqretry(seqlock_t *lock, int seq)
110{
111 return !(seq & 1) && read_seqretry(lock, seq);
112}
113
114static inline void done_seqretry(seqlock_t *lock, int seq)
115{
116 if (seq & 1)
117 read_sequnlock_excl(lock);
118}
119
91/* 120/*
92 * This is the single most critical data structure when it comes 121 * This is the single most critical data structure when it comes
93 * to the dcache: the hashtable for lookups. Somebody should try 122 * to the dcache: the hashtable for lookups. Somebody should try
@@ -229,7 +258,7 @@ static void __d_free(struct rcu_head *head)
229 */ 258 */
230static void d_free(struct dentry *dentry) 259static void d_free(struct dentry *dentry)
231{ 260{
232 BUG_ON(dentry->d_count); 261 BUG_ON((int)dentry->d_lockref.count > 0);
233 this_cpu_dec(nr_dentry); 262 this_cpu_dec(nr_dentry);
234 if (dentry->d_op && dentry->d_op->d_release) 263 if (dentry->d_op && dentry->d_op->d_release)
235 dentry->d_op->d_release(dentry); 264 dentry->d_op->d_release(dentry);
@@ -308,8 +337,9 @@ static void dentry_unlink_inode(struct dentry * dentry)
308 */ 337 */
309static void dentry_lru_add(struct dentry *dentry) 338static void dentry_lru_add(struct dentry *dentry)
310{ 339{
311 if (list_empty(&dentry->d_lru)) { 340 if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
312 spin_lock(&dcache_lru_lock); 341 spin_lock(&dcache_lru_lock);
342 dentry->d_flags |= DCACHE_LRU_LIST;
313 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 343 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
314 dentry->d_sb->s_nr_dentry_unused++; 344 dentry->d_sb->s_nr_dentry_unused++;
315 dentry_stat.nr_unused++; 345 dentry_stat.nr_unused++;
@@ -320,7 +350,7 @@ static void dentry_lru_add(struct dentry *dentry)
320static void __dentry_lru_del(struct dentry *dentry) 350static void __dentry_lru_del(struct dentry *dentry)
321{ 351{
322 list_del_init(&dentry->d_lru); 352 list_del_init(&dentry->d_lru);
323 dentry->d_flags &= ~DCACHE_SHRINK_LIST; 353 dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
324 dentry->d_sb->s_nr_dentry_unused--; 354 dentry->d_sb->s_nr_dentry_unused--;
325 dentry_stat.nr_unused--; 355 dentry_stat.nr_unused--;
326} 356}
@@ -341,6 +371,7 @@ static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
341{ 371{
342 spin_lock(&dcache_lru_lock); 372 spin_lock(&dcache_lru_lock);
343 if (list_empty(&dentry->d_lru)) { 373 if (list_empty(&dentry->d_lru)) {
374 dentry->d_flags |= DCACHE_LRU_LIST;
344 list_add_tail(&dentry->d_lru, list); 375 list_add_tail(&dentry->d_lru, list);
345 dentry->d_sb->s_nr_dentry_unused++; 376 dentry->d_sb->s_nr_dentry_unused++;
346 dentry_stat.nr_unused++; 377 dentry_stat.nr_unused++;
@@ -443,7 +474,7 @@ EXPORT_SYMBOL(d_drop);
443 * If ref is non-zero, then decrement the refcount too. 474 * If ref is non-zero, then decrement the refcount too.
444 * Returns dentry requiring refcount drop, or NULL if we're done. 475 * Returns dentry requiring refcount drop, or NULL if we're done.
445 */ 476 */
446static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) 477static inline struct dentry *dentry_kill(struct dentry *dentry)
447 __releases(dentry->d_lock) 478 __releases(dentry->d_lock)
448{ 479{
449 struct inode *inode; 480 struct inode *inode;
@@ -466,13 +497,16 @@ relock:
466 goto relock; 497 goto relock;
467 } 498 }
468 499
469 if (ref) 500 /*
470 dentry->d_count--; 501 * The dentry is now unrecoverably dead to the world.
502 */
503 lockref_mark_dead(&dentry->d_lockref);
504
471 /* 505 /*
472 * inform the fs via d_prune that this dentry is about to be 506 * inform the fs via d_prune that this dentry is about to be
473 * unhashed and destroyed. 507 * unhashed and destroyed.
474 */ 508 */
475 if (dentry->d_flags & DCACHE_OP_PRUNE) 509 if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
476 dentry->d_op->d_prune(dentry); 510 dentry->d_op->d_prune(dentry);
477 511
478 dentry_lru_del(dentry); 512 dentry_lru_del(dentry);
@@ -509,38 +543,31 @@ relock:
509 */ 543 */
510void dput(struct dentry *dentry) 544void dput(struct dentry *dentry)
511{ 545{
512 if (!dentry) 546 if (unlikely(!dentry))
513 return; 547 return;
514 548
515repeat: 549repeat:
516 if (dentry->d_count == 1) 550 if (lockref_put_or_lock(&dentry->d_lockref))
517 might_sleep();
518 spin_lock(&dentry->d_lock);
519 BUG_ON(!dentry->d_count);
520 if (dentry->d_count > 1) {
521 dentry->d_count--;
522 spin_unlock(&dentry->d_lock);
523 return; 551 return;
524 }
525 552
526 if (dentry->d_flags & DCACHE_OP_DELETE) { 553 /* Unreachable? Get rid of it */
554 if (unlikely(d_unhashed(dentry)))
555 goto kill_it;
556
557 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
527 if (dentry->d_op->d_delete(dentry)) 558 if (dentry->d_op->d_delete(dentry))
528 goto kill_it; 559 goto kill_it;
529 } 560 }
530 561
531 /* Unreachable? Get rid of it */
532 if (d_unhashed(dentry))
533 goto kill_it;
534
535 dentry->d_flags |= DCACHE_REFERENCED; 562 dentry->d_flags |= DCACHE_REFERENCED;
536 dentry_lru_add(dentry); 563 dentry_lru_add(dentry);
537 564
538 dentry->d_count--; 565 dentry->d_lockref.count--;
539 spin_unlock(&dentry->d_lock); 566 spin_unlock(&dentry->d_lock);
540 return; 567 return;
541 568
542kill_it: 569kill_it:
543 dentry = dentry_kill(dentry, 1); 570 dentry = dentry_kill(dentry);
544 if (dentry) 571 if (dentry)
545 goto repeat; 572 goto repeat;
546} 573}
@@ -590,7 +617,7 @@ int d_invalidate(struct dentry * dentry)
590 * We also need to leave mountpoints alone, 617 * We also need to leave mountpoints alone,
591 * directory or not. 618 * directory or not.
592 */ 619 */
593 if (dentry->d_count > 1 && dentry->d_inode) { 620 if (dentry->d_lockref.count > 1 && dentry->d_inode) {
594 if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { 621 if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
595 spin_unlock(&dentry->d_lock); 622 spin_unlock(&dentry->d_lock);
596 return -EBUSY; 623 return -EBUSY;
@@ -606,20 +633,33 @@ EXPORT_SYMBOL(d_invalidate);
606/* This must be called with d_lock held */ 633/* This must be called with d_lock held */
607static inline void __dget_dlock(struct dentry *dentry) 634static inline void __dget_dlock(struct dentry *dentry)
608{ 635{
609 dentry->d_count++; 636 dentry->d_lockref.count++;
610} 637}
611 638
612static inline void __dget(struct dentry *dentry) 639static inline void __dget(struct dentry *dentry)
613{ 640{
614 spin_lock(&dentry->d_lock); 641 lockref_get(&dentry->d_lockref);
615 __dget_dlock(dentry);
616 spin_unlock(&dentry->d_lock);
617} 642}
618 643
619struct dentry *dget_parent(struct dentry *dentry) 644struct dentry *dget_parent(struct dentry *dentry)
620{ 645{
646 int gotref;
621 struct dentry *ret; 647 struct dentry *ret;
622 648
649 /*
650 * Do optimistic parent lookup without any
651 * locking.
652 */
653 rcu_read_lock();
654 ret = ACCESS_ONCE(dentry->d_parent);
655 gotref = lockref_get_not_zero(&ret->d_lockref);
656 rcu_read_unlock();
657 if (likely(gotref)) {
658 if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
659 return ret;
660 dput(ret);
661 }
662
623repeat: 663repeat:
624 /* 664 /*
625 * Don't need rcu_dereference because we re-check it was correct under 665 * Don't need rcu_dereference because we re-check it was correct under
@@ -634,8 +674,8 @@ repeat:
634 goto repeat; 674 goto repeat;
635 } 675 }
636 rcu_read_unlock(); 676 rcu_read_unlock();
637 BUG_ON(!ret->d_count); 677 BUG_ON(!ret->d_lockref.count);
638 ret->d_count++; 678 ret->d_lockref.count++;
639 spin_unlock(&ret->d_lock); 679 spin_unlock(&ret->d_lock);
640 return ret; 680 return ret;
641} 681}
@@ -718,7 +758,15 @@ restart:
718 spin_lock(&inode->i_lock); 758 spin_lock(&inode->i_lock);
719 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { 759 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
720 spin_lock(&dentry->d_lock); 760 spin_lock(&dentry->d_lock);
721 if (!dentry->d_count) { 761 if (!dentry->d_lockref.count) {
762 /*
763 * inform the fs via d_prune that this dentry
764 * is about to be unhashed and destroyed.
765 */
766 if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
767 !d_unhashed(dentry))
768 dentry->d_op->d_prune(dentry);
769
722 __dget_dlock(dentry); 770 __dget_dlock(dentry);
723 __d_drop(dentry); 771 __d_drop(dentry);
724 spin_unlock(&dentry->d_lock); 772 spin_unlock(&dentry->d_lock);
@@ -744,7 +792,7 @@ static void try_prune_one_dentry(struct dentry *dentry)
744{ 792{
745 struct dentry *parent; 793 struct dentry *parent;
746 794
747 parent = dentry_kill(dentry, 0); 795 parent = dentry_kill(dentry);
748 /* 796 /*
749 * If dentry_kill returns NULL, we have nothing more to do. 797 * If dentry_kill returns NULL, we have nothing more to do.
750 * if it returns the same dentry, trylocks failed. In either 798 * if it returns the same dentry, trylocks failed. In either
@@ -763,13 +811,9 @@ static void try_prune_one_dentry(struct dentry *dentry)
763 /* Prune ancestors. */ 811 /* Prune ancestors. */
764 dentry = parent; 812 dentry = parent;
765 while (dentry) { 813 while (dentry) {
766 spin_lock(&dentry->d_lock); 814 if (lockref_put_or_lock(&dentry->d_lockref))
767 if (dentry->d_count > 1) {
768 dentry->d_count--;
769 spin_unlock(&dentry->d_lock);
770 return; 815 return;
771 } 816 dentry = dentry_kill(dentry);
772 dentry = dentry_kill(dentry, 1);
773 } 817 }
774} 818}
775 819
@@ -793,7 +837,7 @@ static void shrink_dentry_list(struct list_head *list)
793 * the LRU because of laziness during lookup. Do not free 837 * the LRU because of laziness during lookup. Do not free
794 * it - just keep it off the LRU list. 838 * it - just keep it off the LRU list.
795 */ 839 */
796 if (dentry->d_count) { 840 if (dentry->d_lockref.count) {
797 dentry_lru_del(dentry); 841 dentry_lru_del(dentry);
798 spin_unlock(&dentry->d_lock); 842 spin_unlock(&dentry->d_lock);
799 continue; 843 continue;
@@ -907,13 +951,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
907 * inform the fs that this dentry is about to be 951 * inform the fs that this dentry is about to be
908 * unhashed and destroyed. 952 * unhashed and destroyed.
909 */ 953 */
910 if (dentry->d_flags & DCACHE_OP_PRUNE) 954 if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
955 !d_unhashed(dentry))
911 dentry->d_op->d_prune(dentry); 956 dentry->d_op->d_prune(dentry);
912 957
913 dentry_lru_del(dentry); 958 dentry_lru_del(dentry);
914 __d_shrink(dentry); 959 __d_shrink(dentry);
915 960
916 if (dentry->d_count != 0) { 961 if (dentry->d_lockref.count != 0) {
917 printk(KERN_ERR 962 printk(KERN_ERR
918 "BUG: Dentry %p{i=%lx,n=%s}" 963 "BUG: Dentry %p{i=%lx,n=%s}"
919 " still in use (%d)" 964 " still in use (%d)"
@@ -922,7 +967,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
922 dentry->d_inode ? 967 dentry->d_inode ?
923 dentry->d_inode->i_ino : 0UL, 968 dentry->d_inode->i_ino : 0UL,
924 dentry->d_name.name, 969 dentry->d_name.name,
925 dentry->d_count, 970 dentry->d_lockref.count,
926 dentry->d_sb->s_type->name, 971 dentry->d_sb->s_type->name,
927 dentry->d_sb->s_id); 972 dentry->d_sb->s_id);
928 BUG(); 973 BUG();
@@ -933,7 +978,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
933 list_del(&dentry->d_u.d_child); 978 list_del(&dentry->d_u.d_child);
934 } else { 979 } else {
935 parent = dentry->d_parent; 980 parent = dentry->d_parent;
936 parent->d_count--; 981 parent->d_lockref.count--;
937 list_del(&dentry->d_u.d_child); 982 list_del(&dentry->d_u.d_child);
938 } 983 }
939 984
@@ -981,7 +1026,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
981 1026
982 dentry = sb->s_root; 1027 dentry = sb->s_root;
983 sb->s_root = NULL; 1028 sb->s_root = NULL;
984 dentry->d_count--; 1029 dentry->d_lockref.count--;
985 shrink_dcache_for_umount_subtree(dentry); 1030 shrink_dcache_for_umount_subtree(dentry);
986 1031
987 while (!hlist_bl_empty(&sb->s_anon)) { 1032 while (!hlist_bl_empty(&sb->s_anon)) {
@@ -996,7 +1041,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
996 * the parenthood after dropping the lock and check 1041 * the parenthood after dropping the lock and check
997 * that the sequence number still matches. 1042 * that the sequence number still matches.
998 */ 1043 */
999static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) 1044static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
1000{ 1045{
1001 struct dentry *new = old->d_parent; 1046 struct dentry *new = old->d_parent;
1002 1047
@@ -1010,7 +1055,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
1010 */ 1055 */
1011 if (new != old->d_parent || 1056 if (new != old->d_parent ||
1012 (old->d_flags & DCACHE_DENTRY_KILLED) || 1057 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1013 (!locked && read_seqretry(&rename_lock, seq))) { 1058 need_seqretry(&rename_lock, seq)) {
1014 spin_unlock(&new->d_lock); 1059 spin_unlock(&new->d_lock);
1015 new = NULL; 1060 new = NULL;
1016 } 1061 }
@@ -1018,34 +1063,55 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
1018 return new; 1063 return new;
1019} 1064}
1020 1065
1066/**
1067 * enum d_walk_ret - action to talke during tree walk
1068 * @D_WALK_CONTINUE: contrinue walk
1069 * @D_WALK_QUIT: quit walk
1070 * @D_WALK_NORETRY: quit when retry is needed
1071 * @D_WALK_SKIP: skip this dentry and its children
1072 */
1073enum d_walk_ret {
1074 D_WALK_CONTINUE,
1075 D_WALK_QUIT,
1076 D_WALK_NORETRY,
1077 D_WALK_SKIP,
1078};
1021 1079
1022/*
1023 * Search for at least 1 mount point in the dentry's subdirs.
1024 * We descend to the next level whenever the d_subdirs
1025 * list is non-empty and continue searching.
1026 */
1027
1028/** 1080/**
1029 * have_submounts - check for mounts over a dentry 1081 * d_walk - walk the dentry tree
1030 * @parent: dentry to check. 1082 * @parent: start of walk
1083 * @data: data passed to @enter() and @finish()
1084 * @enter: callback when first entering the dentry
1085 * @finish: callback when successfully finished the walk
1031 * 1086 *
1032 * Return true if the parent or its subdirectories contain 1087 * The @enter() and @finish() callbacks are called with d_lock held.
1033 * a mount point
1034 */ 1088 */
1035int have_submounts(struct dentry *parent) 1089static void d_walk(struct dentry *parent, void *data,
1090 enum d_walk_ret (*enter)(void *, struct dentry *),
1091 void (*finish)(void *))
1036{ 1092{
1037 struct dentry *this_parent; 1093 struct dentry *this_parent;
1038 struct list_head *next; 1094 struct list_head *next;
1039 unsigned seq; 1095 unsigned seq = 0;
1040 int locked = 0; 1096 enum d_walk_ret ret;
1097 bool retry = true;
1041 1098
1042 seq = read_seqbegin(&rename_lock);
1043again: 1099again:
1100 read_seqbegin_or_lock(&rename_lock, &seq);
1044 this_parent = parent; 1101 this_parent = parent;
1045
1046 if (d_mountpoint(parent))
1047 goto positive;
1048 spin_lock(&this_parent->d_lock); 1102 spin_lock(&this_parent->d_lock);
1103
1104 ret = enter(data, this_parent);
1105 switch (ret) {
1106 case D_WALK_CONTINUE:
1107 break;
1108 case D_WALK_QUIT:
1109 case D_WALK_SKIP:
1110 goto out_unlock;
1111 case D_WALK_NORETRY:
1112 retry = false;
1113 break;
1114 }
1049repeat: 1115repeat:
1050 next = this_parent->d_subdirs.next; 1116 next = this_parent->d_subdirs.next;
1051resume: 1117resume:
@@ -1055,12 +1121,22 @@ resume:
1055 next = tmp->next; 1121 next = tmp->next;
1056 1122
1057 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1123 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1058 /* Have we found a mount point ? */ 1124
1059 if (d_mountpoint(dentry)) { 1125 ret = enter(data, dentry);
1126 switch (ret) {
1127 case D_WALK_CONTINUE:
1128 break;
1129 case D_WALK_QUIT:
1060 spin_unlock(&dentry->d_lock); 1130 spin_unlock(&dentry->d_lock);
1061 spin_unlock(&this_parent->d_lock); 1131 goto out_unlock;
1062 goto positive; 1132 case D_WALK_NORETRY:
1133 retry = false;
1134 break;
1135 case D_WALK_SKIP:
1136 spin_unlock(&dentry->d_lock);
1137 continue;
1063 } 1138 }
1139
1064 if (!list_empty(&dentry->d_subdirs)) { 1140 if (!list_empty(&dentry->d_subdirs)) {
1065 spin_unlock(&this_parent->d_lock); 1141 spin_unlock(&this_parent->d_lock);
1066 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1142 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
@@ -1075,35 +1151,99 @@ resume:
1075 */ 1151 */
1076 if (this_parent != parent) { 1152 if (this_parent != parent) {
1077 struct dentry *child = this_parent; 1153 struct dentry *child = this_parent;
1078 this_parent = try_to_ascend(this_parent, locked, seq); 1154 this_parent = try_to_ascend(this_parent, seq);
1079 if (!this_parent) 1155 if (!this_parent)
1080 goto rename_retry; 1156 goto rename_retry;
1081 next = child->d_u.d_child.next; 1157 next = child->d_u.d_child.next;
1082 goto resume; 1158 goto resume;
1083 } 1159 }
1084 spin_unlock(&this_parent->d_lock); 1160 if (need_seqretry(&rename_lock, seq)) {
1085 if (!locked && read_seqretry(&rename_lock, seq)) 1161 spin_unlock(&this_parent->d_lock);
1086 goto rename_retry;
1087 if (locked)
1088 write_sequnlock(&rename_lock);
1089 return 0; /* No mount points found in tree */
1090positive:
1091 if (!locked && read_seqretry(&rename_lock, seq))
1092 goto rename_retry; 1162 goto rename_retry;
1093 if (locked) 1163 }
1094 write_sequnlock(&rename_lock); 1164 if (finish)
1095 return 1; 1165 finish(data);
1166
1167out_unlock:
1168 spin_unlock(&this_parent->d_lock);
1169 done_seqretry(&rename_lock, seq);
1170 return;
1096 1171
1097rename_retry: 1172rename_retry:
1098 if (locked) 1173 if (!retry)
1099 goto again; 1174 return;
1100 locked = 1; 1175 seq = 1;
1101 write_seqlock(&rename_lock);
1102 goto again; 1176 goto again;
1103} 1177}
1178
1179/*
1180 * Search for at least 1 mount point in the dentry's subdirs.
1181 * We descend to the next level whenever the d_subdirs
1182 * list is non-empty and continue searching.
1183 */
1184
1185/**
1186 * have_submounts - check for mounts over a dentry
1187 * @parent: dentry to check.
1188 *
1189 * Return true if the parent or its subdirectories contain
1190 * a mount point
1191 */
1192
1193static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
1194{
1195 int *ret = data;
1196 if (d_mountpoint(dentry)) {
1197 *ret = 1;
1198 return D_WALK_QUIT;
1199 }
1200 return D_WALK_CONTINUE;
1201}
1202
1203int have_submounts(struct dentry *parent)
1204{
1205 int ret = 0;
1206
1207 d_walk(parent, &ret, check_mount, NULL);
1208
1209 return ret;
1210}
1104EXPORT_SYMBOL(have_submounts); 1211EXPORT_SYMBOL(have_submounts);
1105 1212
1106/* 1213/*
1214 * Called by mount code to set a mountpoint and check if the mountpoint is
1215 * reachable (e.g. NFS can unhash a directory dentry and then the complete
1216 * subtree can become unreachable).
1217 *
1218 * Only one of check_submounts_and_drop() and d_set_mounted() must succeed. For
1219 * this reason take rename_lock and d_lock on dentry and ancestors.
1220 */
1221int d_set_mounted(struct dentry *dentry)
1222{
1223 struct dentry *p;
1224 int ret = -ENOENT;
1225 write_seqlock(&rename_lock);
1226 for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
1227 /* Need exclusion wrt. check_submounts_and_drop() */
1228 spin_lock(&p->d_lock);
1229 if (unlikely(d_unhashed(p))) {
1230 spin_unlock(&p->d_lock);
1231 goto out;
1232 }
1233 spin_unlock(&p->d_lock);
1234 }
1235 spin_lock(&dentry->d_lock);
1236 if (!d_unlinked(dentry)) {
1237 dentry->d_flags |= DCACHE_MOUNTED;
1238 ret = 0;
1239 }
1240 spin_unlock(&dentry->d_lock);
1241out:
1242 write_sequnlock(&rename_lock);
1243 return ret;
1244}
1245
1246/*
1107 * Search the dentry child list of the specified parent, 1247 * Search the dentry child list of the specified parent,
1108 * and move any unused dentries to the end of the unused 1248 * and move any unused dentries to the end of the unused
1109 * list for prune_dcache(). We descend to the next level 1249 * list for prune_dcache(). We descend to the next level
@@ -1117,93 +1257,46 @@ EXPORT_SYMBOL(have_submounts);
1117 * drop the lock and return early due to latency 1257 * drop the lock and return early due to latency
1118 * constraints. 1258 * constraints.
1119 */ 1259 */
1120static int select_parent(struct dentry *parent, struct list_head *dispose)
1121{
1122 struct dentry *this_parent;
1123 struct list_head *next;
1124 unsigned seq;
1125 int found = 0;
1126 int locked = 0;
1127
1128 seq = read_seqbegin(&rename_lock);
1129again:
1130 this_parent = parent;
1131 spin_lock(&this_parent->d_lock);
1132repeat:
1133 next = this_parent->d_subdirs.next;
1134resume:
1135 while (next != &this_parent->d_subdirs) {
1136 struct list_head *tmp = next;
1137 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1138 next = tmp->next;
1139 1260
1140 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1261struct select_data {
1262 struct dentry *start;
1263 struct list_head dispose;
1264 int found;
1265};
1141 1266
1142 /* 1267static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1143 * move only zero ref count dentries to the dispose list. 1268{
1144 * 1269 struct select_data *data = _data;
1145 * Those which are presently on the shrink list, being processed 1270 enum d_walk_ret ret = D_WALK_CONTINUE;
1146 * by shrink_dentry_list(), shouldn't be moved. Otherwise the
1147 * loop in shrink_dcache_parent() might not make any progress
1148 * and loop forever.
1149 */
1150 if (dentry->d_count) {
1151 dentry_lru_del(dentry);
1152 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1153 dentry_lru_move_list(dentry, dispose);
1154 dentry->d_flags |= DCACHE_SHRINK_LIST;
1155 found++;
1156 }
1157 /*
1158 * We can return to the caller if we have found some (this
1159 * ensures forward progress). We'll be coming back to find
1160 * the rest.
1161 */
1162 if (found && need_resched()) {
1163 spin_unlock(&dentry->d_lock);
1164 goto out;
1165 }
1166 1271
1167 /* 1272 if (data->start == dentry)
1168 * Descend a level if the d_subdirs list is non-empty. 1273 goto out;
1169 */
1170 if (!list_empty(&dentry->d_subdirs)) {
1171 spin_unlock(&this_parent->d_lock);
1172 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1173 this_parent = dentry;
1174 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1175 goto repeat;
1176 }
1177 1274
1178 spin_unlock(&dentry->d_lock);
1179 }
1180 /* 1275 /*
1181 * All done at this level ... ascend and resume the search. 1276 * move only zero ref count dentries to the dispose list.
1277 *
1278 * Those which are presently on the shrink list, being processed
1279 * by shrink_dentry_list(), shouldn't be moved. Otherwise the
1280 * loop in shrink_dcache_parent() might not make any progress
1281 * and loop forever.
1182 */ 1282 */
1183 if (this_parent != parent) { 1283 if (dentry->d_lockref.count) {
1184 struct dentry *child = this_parent; 1284 dentry_lru_del(dentry);
1185 this_parent = try_to_ascend(this_parent, locked, seq); 1285 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1186 if (!this_parent) 1286 dentry_lru_move_list(dentry, &data->dispose);
1187 goto rename_retry; 1287 dentry->d_flags |= DCACHE_SHRINK_LIST;
1188 next = child->d_u.d_child.next; 1288 data->found++;
1189 goto resume; 1289 ret = D_WALK_NORETRY;
1190 } 1290 }
1291 /*
1292 * We can return to the caller if we have found some (this
1293 * ensures forward progress). We'll be coming back to find
1294 * the rest.
1295 */
1296 if (data->found && need_resched())
1297 ret = D_WALK_QUIT;
1191out: 1298out:
1192 spin_unlock(&this_parent->d_lock); 1299 return ret;
1193 if (!locked && read_seqretry(&rename_lock, seq))
1194 goto rename_retry;
1195 if (locked)
1196 write_sequnlock(&rename_lock);
1197 return found;
1198
1199rename_retry:
1200 if (found)
1201 return found;
1202 if (locked)
1203 goto again;
1204 locked = 1;
1205 write_seqlock(&rename_lock);
1206 goto again;
1207} 1300}
1208 1301
1209/** 1302/**
@@ -1212,18 +1305,90 @@ rename_retry:
1212 * 1305 *
1213 * Prune the dcache to remove unused children of the parent dentry. 1306 * Prune the dcache to remove unused children of the parent dentry.
1214 */ 1307 */
1215void shrink_dcache_parent(struct dentry * parent) 1308void shrink_dcache_parent(struct dentry *parent)
1216{ 1309{
1217 LIST_HEAD(dispose); 1310 for (;;) {
1218 int found; 1311 struct select_data data;
1312
1313 INIT_LIST_HEAD(&data.dispose);
1314 data.start = parent;
1315 data.found = 0;
1219 1316
1220 while ((found = select_parent(parent, &dispose)) != 0) { 1317 d_walk(parent, &data, select_collect, NULL);
1221 shrink_dentry_list(&dispose); 1318 if (!data.found)
1319 break;
1320
1321 shrink_dentry_list(&data.dispose);
1222 cond_resched(); 1322 cond_resched();
1223 } 1323 }
1224} 1324}
1225EXPORT_SYMBOL(shrink_dcache_parent); 1325EXPORT_SYMBOL(shrink_dcache_parent);
1226 1326
1327static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry)
1328{
1329 struct select_data *data = _data;
1330
1331 if (d_mountpoint(dentry)) {
1332 data->found = -EBUSY;
1333 return D_WALK_QUIT;
1334 }
1335
1336 return select_collect(_data, dentry);
1337}
1338
1339static void check_and_drop(void *_data)
1340{
1341 struct select_data *data = _data;
1342
1343 if (d_mountpoint(data->start))
1344 data->found = -EBUSY;
1345 if (!data->found)
1346 __d_drop(data->start);
1347}
1348
1349/**
1350 * check_submounts_and_drop - prune dcache, check for submounts and drop
1351 *
1352 * All done as a single atomic operation relative to has_unlinked_ancestor().
1353 * Returns 0 if successfully unhashed @parent. If there were submounts then
1354 * return -EBUSY.
1355 *
1356 * @dentry: dentry to prune and drop
1357 */
1358int check_submounts_and_drop(struct dentry *dentry)
1359{
1360 int ret = 0;
1361
1362 /* Negative dentries can be dropped without further checks */
1363 if (!dentry->d_inode) {
1364 d_drop(dentry);
1365 goto out;
1366 }
1367
1368 for (;;) {
1369 struct select_data data;
1370
1371 INIT_LIST_HEAD(&data.dispose);
1372 data.start = dentry;
1373 data.found = 0;
1374
1375 d_walk(dentry, &data, check_and_collect, check_and_drop);
1376 ret = data.found;
1377
1378 if (!list_empty(&data.dispose))
1379 shrink_dentry_list(&data.dispose);
1380
1381 if (ret <= 0)
1382 break;
1383
1384 cond_resched();
1385 }
1386
1387out:
1388 return ret;
1389}
1390EXPORT_SYMBOL(check_submounts_and_drop);
1391
1227/** 1392/**
1228 * __d_alloc - allocate a dcache entry 1393 * __d_alloc - allocate a dcache entry
1229 * @sb: filesystem it will belong to 1394 * @sb: filesystem it will belong to
@@ -1269,7 +1434,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1269 smp_wmb(); 1434 smp_wmb();
1270 dentry->d_name.name = dname; 1435 dentry->d_name.name = dname;
1271 1436
1272 dentry->d_count = 1; 1437 dentry->d_lockref.count = 1;
1273 dentry->d_flags = 0; 1438 dentry->d_flags = 0;
1274 spin_lock_init(&dentry->d_lock); 1439 spin_lock_init(&dentry->d_lock);
1275 seqcount_init(&dentry->d_seq); 1440 seqcount_init(&dentry->d_seq);
@@ -1782,7 +1947,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
1782 * without taking d_lock and checking d_seq sequence count against @seq 1947 * without taking d_lock and checking d_seq sequence count against @seq
1783 * returned here. 1948 * returned here.
1784 * 1949 *
1785 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1950 * A refcount may be taken on the found dentry with the d_rcu_to_refcount
1786 * function. 1951 * function.
1787 * 1952 *
1788 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1953 * Alternatively, __d_lookup_rcu may be called again to look up the child of
@@ -1970,7 +2135,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
1970 goto next; 2135 goto next;
1971 } 2136 }
1972 2137
1973 dentry->d_count++; 2138 dentry->d_lockref.count++;
1974 found = dentry; 2139 found = dentry;
1975 spin_unlock(&dentry->d_lock); 2140 spin_unlock(&dentry->d_lock);
1976 break; 2141 break;
@@ -2069,7 +2234,7 @@ again:
2069 spin_lock(&dentry->d_lock); 2234 spin_lock(&dentry->d_lock);
2070 inode = dentry->d_inode; 2235 inode = dentry->d_inode;
2071 isdir = S_ISDIR(inode->i_mode); 2236 isdir = S_ISDIR(inode->i_mode);
2072 if (dentry->d_count == 1) { 2237 if (dentry->d_lockref.count == 1) {
2073 if (!spin_trylock(&inode->i_lock)) { 2238 if (!spin_trylock(&inode->i_lock)) {
2074 spin_unlock(&dentry->d_lock); 2239 spin_unlock(&dentry->d_lock);
2075 cpu_relax(); 2240 cpu_relax();
@@ -2506,9 +2671,39 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2506 return 0; 2671 return 0;
2507} 2672}
2508 2673
2674/**
2675 * prepend_name - prepend a pathname in front of current buffer pointer
2676 * @buffer: buffer pointer
2677 * @buflen: allocated length of the buffer
2678 * @name: name string and length qstr structure
2679 *
2680 * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
2681 * make sure that either the old or the new name pointer and length are
2682 * fetched. However, there may be mismatch between length and pointer.
2683 * The length cannot be trusted, we need to copy it byte-by-byte until
2684 * the length is reached or a null byte is found. It also prepends "/" at
2685 * the beginning of the name. The sequence number check at the caller will
2686 * retry it again when a d_move() does happen. So any garbage in the buffer
2687 * due to mismatched pointer and length will be discarded.
2688 */
2509static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2689static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2510{ 2690{
2511 return prepend(buffer, buflen, name->name, name->len); 2691 const char *dname = ACCESS_ONCE(name->name);
2692 u32 dlen = ACCESS_ONCE(name->len);
2693 char *p;
2694
2695 if (*buflen < dlen + 1)
2696 return -ENAMETOOLONG;
2697 *buflen -= dlen + 1;
2698 p = *buffer -= dlen + 1;
2699 *p++ = '/';
2700 while (dlen--) {
2701 char c = *dname++;
2702 if (!c)
2703 break;
2704 *p++ = c;
2705 }
2706 return 0;
2512} 2707}
2513 2708
2514/** 2709/**
@@ -2518,7 +2713,15 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2518 * @buffer: pointer to the end of the buffer 2713 * @buffer: pointer to the end of the buffer
2519 * @buflen: pointer to buffer length 2714 * @buflen: pointer to buffer length
2520 * 2715 *
2521 * Caller holds the rename_lock. 2716 * The function will first try to write out the pathname without taking any
2717 * lock other than the RCU read lock to make sure that dentries won't go away.
2718 * It only checks the sequence number of the global rename_lock as any change
2719 * in the dentry's d_seq will be preceded by changes in the rename_lock
2720 * sequence number. If the sequence number had been changed, it will restart
2721 * the whole pathname back-tracing sequence again by taking the rename_lock.
2722 * In this case, there is no need to take the RCU read lock as the recursive
2723 * parent pointer references will keep the dentry chain alive as long as no
2724 * rename operation is performed.
2522 */ 2725 */
2523static int prepend_path(const struct path *path, 2726static int prepend_path(const struct path *path,
2524 const struct path *root, 2727 const struct path *root,
@@ -2527,54 +2730,66 @@ static int prepend_path(const struct path *path,
2527 struct dentry *dentry = path->dentry; 2730 struct dentry *dentry = path->dentry;
2528 struct vfsmount *vfsmnt = path->mnt; 2731 struct vfsmount *vfsmnt = path->mnt;
2529 struct mount *mnt = real_mount(vfsmnt); 2732 struct mount *mnt = real_mount(vfsmnt);
2530 bool slash = false;
2531 int error = 0; 2733 int error = 0;
2734 unsigned seq = 0;
2735 char *bptr;
2736 int blen;
2532 2737
2738 rcu_read_lock();
2739restart:
2740 bptr = *buffer;
2741 blen = *buflen;
2742 read_seqbegin_or_lock(&rename_lock, &seq);
2533 while (dentry != root->dentry || vfsmnt != root->mnt) { 2743 while (dentry != root->dentry || vfsmnt != root->mnt) {
2534 struct dentry * parent; 2744 struct dentry * parent;
2535 2745
2536 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2746 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2537 /* Global root? */ 2747 /* Global root? */
2538 if (!mnt_has_parent(mnt)) 2748 if (mnt_has_parent(mnt)) {
2539 goto global_root; 2749 dentry = mnt->mnt_mountpoint;
2540 dentry = mnt->mnt_mountpoint; 2750 mnt = mnt->mnt_parent;
2541 mnt = mnt->mnt_parent; 2751 vfsmnt = &mnt->mnt;
2542 vfsmnt = &mnt->mnt; 2752 continue;
2543 continue; 2753 }
2754 /*
2755 * Filesystems needing to implement special "root names"
2756 * should do so with ->d_dname()
2757 */
2758 if (IS_ROOT(dentry) &&
2759 (dentry->d_name.len != 1 ||
2760 dentry->d_name.name[0] != '/')) {
2761 WARN(1, "Root dentry has weird name <%.*s>\n",
2762 (int) dentry->d_name.len,
2763 dentry->d_name.name);
2764 }
2765 if (!error)
2766 error = is_mounted(vfsmnt) ? 1 : 2;
2767 break;
2544 } 2768 }
2545 parent = dentry->d_parent; 2769 parent = dentry->d_parent;
2546 prefetch(parent); 2770 prefetch(parent);
2547 spin_lock(&dentry->d_lock); 2771 error = prepend_name(&bptr, &blen, &dentry->d_name);
2548 error = prepend_name(buffer, buflen, &dentry->d_name);
2549 spin_unlock(&dentry->d_lock);
2550 if (!error)
2551 error = prepend(buffer, buflen, "/", 1);
2552 if (error) 2772 if (error)
2553 break; 2773 break;
2554 2774
2555 slash = true;
2556 dentry = parent; 2775 dentry = parent;
2557 } 2776 }
2777 if (!(seq & 1))
2778 rcu_read_unlock();
2779 if (need_seqretry(&rename_lock, seq)) {
2780 seq = 1;
2781 goto restart;
2782 }
2783 done_seqretry(&rename_lock, seq);
2558 2784
2559 if (!error && !slash) 2785 if (error >= 0 && bptr == *buffer) {
2560 error = prepend(buffer, buflen, "/", 1); 2786 if (--blen < 0)
2561 2787 error = -ENAMETOOLONG;
2562 return error; 2788 else
2563 2789 *--bptr = '/';
2564global_root: 2790 }
2565 /* 2791 *buffer = bptr;
2566 * Filesystems needing to implement special "root names" 2792 *buflen = blen;
2567 * should do so with ->d_dname()
2568 */
2569 if (IS_ROOT(dentry) &&
2570 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2571 WARN(1, "Root dentry has weird name <%.*s>\n",
2572 (int) dentry->d_name.len, dentry->d_name.name);
2573 }
2574 if (!slash)
2575 error = prepend(buffer, buflen, "/", 1);
2576 if (!error)
2577 error = is_mounted(vfsmnt) ? 1 : 2;
2578 return error; 2793 return error;
2579} 2794}
2580 2795
@@ -2603,9 +2818,7 @@ char *__d_path(const struct path *path,
2603 2818
2604 prepend(&res, &buflen, "\0", 1); 2819 prepend(&res, &buflen, "\0", 1);
2605 br_read_lock(&vfsmount_lock); 2820 br_read_lock(&vfsmount_lock);
2606 write_seqlock(&rename_lock);
2607 error = prepend_path(path, root, &res, &buflen); 2821 error = prepend_path(path, root, &res, &buflen);
2608 write_sequnlock(&rename_lock);
2609 br_read_unlock(&vfsmount_lock); 2822 br_read_unlock(&vfsmount_lock);
2610 2823
2611 if (error < 0) 2824 if (error < 0)
@@ -2624,9 +2837,7 @@ char *d_absolute_path(const struct path *path,
2624 2837
2625 prepend(&res, &buflen, "\0", 1); 2838 prepend(&res, &buflen, "\0", 1);
2626 br_read_lock(&vfsmount_lock); 2839 br_read_lock(&vfsmount_lock);
2627 write_seqlock(&rename_lock);
2628 error = prepend_path(path, &root, &res, &buflen); 2840 error = prepend_path(path, &root, &res, &buflen);
2629 write_sequnlock(&rename_lock);
2630 br_read_unlock(&vfsmount_lock); 2841 br_read_unlock(&vfsmount_lock);
2631 2842
2632 if (error > 1) 2843 if (error > 1)
@@ -2692,9 +2903,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
2692 2903
2693 get_fs_root(current->fs, &root); 2904 get_fs_root(current->fs, &root);
2694 br_read_lock(&vfsmount_lock); 2905 br_read_lock(&vfsmount_lock);
2695 write_seqlock(&rename_lock);
2696 error = path_with_deleted(path, &root, &res, &buflen); 2906 error = path_with_deleted(path, &root, &res, &buflen);
2697 write_sequnlock(&rename_lock);
2698 br_read_unlock(&vfsmount_lock); 2907 br_read_unlock(&vfsmount_lock);
2699 if (error < 0) 2908 if (error < 0)
2700 res = ERR_PTR(error); 2909 res = ERR_PTR(error);
@@ -2729,10 +2938,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
2729 char *end = buffer + buflen; 2938 char *end = buffer + buflen;
2730 /* these dentries are never renamed, so d_lock is not needed */ 2939 /* these dentries are never renamed, so d_lock is not needed */
2731 if (prepend(&end, &buflen, " (deleted)", 11) || 2940 if (prepend(&end, &buflen, " (deleted)", 11) ||
2732 prepend_name(&end, &buflen, &dentry->d_name) || 2941 prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
2733 prepend(&end, &buflen, "/", 1)) 2942 prepend(&end, &buflen, "/", 1))
2734 end = ERR_PTR(-ENAMETOOLONG); 2943 end = ERR_PTR(-ENAMETOOLONG);
2735 return end; 2944 return end;
2736} 2945}
2737 2946
2738/* 2947/*
@@ -2740,30 +2949,42 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
2740 */ 2949 */
2741static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2950static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2742{ 2951{
2743 char *end = buf + buflen; 2952 char *end, *retval;
2744 char *retval; 2953 int len, seq = 0;
2954 int error = 0;
2745 2955
2746 prepend(&end, &buflen, "\0", 1); 2956 rcu_read_lock();
2957restart:
2958 end = buf + buflen;
2959 len = buflen;
2960 prepend(&end, &len, "\0", 1);
2747 if (buflen < 1) 2961 if (buflen < 1)
2748 goto Elong; 2962 goto Elong;
2749 /* Get '/' right */ 2963 /* Get '/' right */
2750 retval = end-1; 2964 retval = end-1;
2751 *retval = '/'; 2965 *retval = '/';
2752 2966 read_seqbegin_or_lock(&rename_lock, &seq);
2753 while (!IS_ROOT(dentry)) { 2967 while (!IS_ROOT(dentry)) {
2754 struct dentry *parent = dentry->d_parent; 2968 struct dentry *parent = dentry->d_parent;
2755 int error; 2969 int error;
2756 2970
2757 prefetch(parent); 2971 prefetch(parent);
2758 spin_lock(&dentry->d_lock); 2972 error = prepend_name(&end, &len, &dentry->d_name);
2759 error = prepend_name(&end, &buflen, &dentry->d_name); 2973 if (error)
2760 spin_unlock(&dentry->d_lock); 2974 break;
2761 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2762 goto Elong;
2763 2975
2764 retval = end; 2976 retval = end;
2765 dentry = parent; 2977 dentry = parent;
2766 } 2978 }
2979 if (!(seq & 1))
2980 rcu_read_unlock();
2981 if (need_seqretry(&rename_lock, seq)) {
2982 seq = 1;
2983 goto restart;
2984 }
2985 done_seqretry(&rename_lock, seq);
2986 if (error)
2987 goto Elong;
2767 return retval; 2988 return retval;
2768Elong: 2989Elong:
2769 return ERR_PTR(-ENAMETOOLONG); 2990 return ERR_PTR(-ENAMETOOLONG);
@@ -2771,13 +2992,7 @@ Elong:
2771 2992
2772char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2993char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2773{ 2994{
2774 char *retval; 2995 return __dentry_path(dentry, buf, buflen);
2775
2776 write_seqlock(&rename_lock);
2777 retval = __dentry_path(dentry, buf, buflen);
2778 write_sequnlock(&rename_lock);
2779
2780 return retval;
2781} 2996}
2782EXPORT_SYMBOL(dentry_path_raw); 2997EXPORT_SYMBOL(dentry_path_raw);
2783 2998
@@ -2786,7 +3001,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2786 char *p = NULL; 3001 char *p = NULL;
2787 char *retval; 3002 char *retval;
2788 3003
2789 write_seqlock(&rename_lock);
2790 if (d_unlinked(dentry)) { 3004 if (d_unlinked(dentry)) {
2791 p = buf + buflen; 3005 p = buf + buflen;
2792 if (prepend(&p, &buflen, "//deleted", 10) != 0) 3006 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2794,7 +3008,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2794 buflen++; 3008 buflen++;
2795 } 3009 }
2796 retval = __dentry_path(dentry, buf, buflen); 3010 retval = __dentry_path(dentry, buf, buflen);
2797 write_sequnlock(&rename_lock);
2798 if (!IS_ERR(retval) && p) 3011 if (!IS_ERR(retval) && p)
2799 *p = '/'; /* restore '/' overriden with '\0' */ 3012 *p = '/'; /* restore '/' overriden with '\0' */
2800 return retval; 3013 return retval;
@@ -2802,6 +3015,18 @@ Elong:
2802 return ERR_PTR(-ENAMETOOLONG); 3015 return ERR_PTR(-ENAMETOOLONG);
2803} 3016}
2804 3017
3018static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
3019 struct path *pwd)
3020{
3021 unsigned seq;
3022
3023 do {
3024 seq = read_seqcount_begin(&fs->seq);
3025 *root = fs->root;
3026 *pwd = fs->pwd;
3027 } while (read_seqcount_retry(&fs->seq, seq));
3028}
3029
2805/* 3030/*
2806 * NOTE! The user-level library version returns a 3031 * NOTE! The user-level library version returns a
2807 * character pointer. The kernel system call just 3032 * character pointer. The kernel system call just
@@ -2829,11 +3054,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2829 if (!page) 3054 if (!page)
2830 return -ENOMEM; 3055 return -ENOMEM;
2831 3056
2832 get_fs_root_and_pwd(current->fs, &root, &pwd); 3057 rcu_read_lock();
3058 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
2833 3059
2834 error = -ENOENT; 3060 error = -ENOENT;
2835 br_read_lock(&vfsmount_lock); 3061 br_read_lock(&vfsmount_lock);
2836 write_seqlock(&rename_lock);
2837 if (!d_unlinked(pwd.dentry)) { 3062 if (!d_unlinked(pwd.dentry)) {
2838 unsigned long len; 3063 unsigned long len;
2839 char *cwd = page + PAGE_SIZE; 3064 char *cwd = page + PAGE_SIZE;
@@ -2841,7 +3066,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2841 3066
2842 prepend(&cwd, &buflen, "\0", 1); 3067 prepend(&cwd, &buflen, "\0", 1);
2843 error = prepend_path(&pwd, &root, &cwd, &buflen); 3068 error = prepend_path(&pwd, &root, &cwd, &buflen);
2844 write_sequnlock(&rename_lock);
2845 br_read_unlock(&vfsmount_lock); 3069 br_read_unlock(&vfsmount_lock);
2846 3070
2847 if (error < 0) 3071 if (error < 0)
@@ -2862,13 +3086,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2862 error = -EFAULT; 3086 error = -EFAULT;
2863 } 3087 }
2864 } else { 3088 } else {
2865 write_sequnlock(&rename_lock);
2866 br_read_unlock(&vfsmount_lock); 3089 br_read_unlock(&vfsmount_lock);
2867 } 3090 }
2868 3091
2869out: 3092out:
2870 path_put(&pwd); 3093 rcu_read_unlock();
2871 path_put(&root);
2872 free_page((unsigned long) page); 3094 free_page((unsigned long) page);
2873 return error; 3095 return error;
2874} 3096}
@@ -2915,68 +3137,24 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2915 return result; 3137 return result;
2916} 3138}
2917 3139
2918void d_genocide(struct dentry *root) 3140static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
2919{ 3141{
2920 struct dentry *this_parent; 3142 struct dentry *root = data;
2921 struct list_head *next; 3143 if (dentry != root) {
2922 unsigned seq; 3144 if (d_unhashed(dentry) || !dentry->d_inode)
2923 int locked = 0; 3145 return D_WALK_SKIP;
2924
2925 seq = read_seqbegin(&rename_lock);
2926again:
2927 this_parent = root;
2928 spin_lock(&this_parent->d_lock);
2929repeat:
2930 next = this_parent->d_subdirs.next;
2931resume:
2932 while (next != &this_parent->d_subdirs) {
2933 struct list_head *tmp = next;
2934 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2935 next = tmp->next;
2936 3146
2937 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2938 if (d_unhashed(dentry) || !dentry->d_inode) {
2939 spin_unlock(&dentry->d_lock);
2940 continue;
2941 }
2942 if (!list_empty(&dentry->d_subdirs)) {
2943 spin_unlock(&this_parent->d_lock);
2944 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2945 this_parent = dentry;
2946 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2947 goto repeat;
2948 }
2949 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 3147 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2950 dentry->d_flags |= DCACHE_GENOCIDE; 3148 dentry->d_flags |= DCACHE_GENOCIDE;
2951 dentry->d_count--; 3149 dentry->d_lockref.count--;
2952 }
2953 spin_unlock(&dentry->d_lock);
2954 }
2955 if (this_parent != root) {
2956 struct dentry *child = this_parent;
2957 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2958 this_parent->d_flags |= DCACHE_GENOCIDE;
2959 this_parent->d_count--;
2960 } 3150 }
2961 this_parent = try_to_ascend(this_parent, locked, seq);
2962 if (!this_parent)
2963 goto rename_retry;
2964 next = child->d_u.d_child.next;
2965 goto resume;
2966 } 3151 }
2967 spin_unlock(&this_parent->d_lock); 3152 return D_WALK_CONTINUE;
2968 if (!locked && read_seqretry(&rename_lock, seq)) 3153}
2969 goto rename_retry;
2970 if (locked)
2971 write_sequnlock(&rename_lock);
2972 return;
2973 3154
2974rename_retry: 3155void d_genocide(struct dentry *parent)
2975 if (locked) 3156{
2976 goto again; 3157 d_walk(parent, parent, d_genocide_kill, NULL);
2977 locked = 1;
2978 write_seqlock(&rename_lock);
2979 goto again;
2980} 3158}
2981 3159
2982void d_tmpfile(struct dentry *dentry, struct inode *inode) 3160void d_tmpfile(struct dentry *dentry, struct inode *inode)