aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c571
1 files changed, 345 insertions, 226 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 86d4db15473e..23702a9d4e6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 67 .age_limit = 45,
68}; 68};
69 69
70static void __d_free(struct dentry *dentry) 70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos)
76{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos);
80}
81#endif
82
83static void __d_free(struct rcu_head *head)
71{ 84{
85 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
86
72 WARN_ON(!list_empty(&dentry->d_alias)); 87 WARN_ON(!list_empty(&dentry->d_alias));
73 if (dname_external(dentry)) 88 if (dname_external(dentry))
74 kfree(dentry->d_name.name); 89 kfree(dentry->d_name.name);
75 kmem_cache_free(dentry_cache, dentry); 90 kmem_cache_free(dentry_cache, dentry);
76} 91}
77 92
78static void d_callback(struct rcu_head *head)
79{
80 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
81 __d_free(dentry);
82}
83
84/* 93/*
85 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 94 * no dcache_lock, please.
86 * inside dcache_lock.
87 */ 95 */
88static void d_free(struct dentry *dentry) 96static void d_free(struct dentry *dentry)
89{ 97{
98 percpu_counter_dec(&nr_dentry);
90 if (dentry->d_op && dentry->d_op->d_release) 99 if (dentry->d_op && dentry->d_op->d_release)
91 dentry->d_op->d_release(dentry); 100 dentry->d_op->d_release(dentry);
101
92 /* if dentry was never inserted into hash, immediate free is OK */ 102 /* if dentry was never inserted into hash, immediate free is OK */
93 if (hlist_unhashed(&dentry->d_hash)) 103 if (hlist_unhashed(&dentry->d_hash))
94 __d_free(dentry); 104 __d_free(&dentry->d_u.d_rcu);
95 else 105 else
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 106 call_rcu(&dentry->d_u.d_rcu, __d_free);
97} 107}
98 108
99/* 109/*
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry)
123} 133}
124 134
125/* 135/*
126 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. 136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
127 */ 137 */
128static void dentry_lru_add(struct dentry *dentry) 138static void dentry_lru_add(struct dentry *dentry)
129{ 139{
130 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 140 if (list_empty(&dentry->d_lru)) {
131 dentry->d_sb->s_nr_dentry_unused++; 141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
132 dentry_stat.nr_unused++; 142 dentry->d_sb->s_nr_dentry_unused++;
133} 143 percpu_counter_inc(&nr_dentry_unused);
134 144 }
135static void dentry_lru_add_tail(struct dentry *dentry)
136{
137 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
138 dentry->d_sb->s_nr_dentry_unused++;
139 dentry_stat.nr_unused++;
140} 145}
141 146
142static void dentry_lru_del(struct dentry *dentry) 147static void dentry_lru_del(struct dentry *dentry)
143{ 148{
144 if (!list_empty(&dentry->d_lru)) { 149 if (!list_empty(&dentry->d_lru)) {
145 list_del(&dentry->d_lru); 150 list_del_init(&dentry->d_lru);
146 dentry->d_sb->s_nr_dentry_unused--; 151 dentry->d_sb->s_nr_dentry_unused--;
147 dentry_stat.nr_unused--; 152 percpu_counter_dec(&nr_dentry_unused);
148 } 153 }
149} 154}
150 155
151static void dentry_lru_del_init(struct dentry *dentry) 156static void dentry_lru_move_tail(struct dentry *dentry)
152{ 157{
153 if (likely(!list_empty(&dentry->d_lru))) { 158 if (list_empty(&dentry->d_lru)) {
154 list_del_init(&dentry->d_lru); 159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
155 dentry->d_sb->s_nr_dentry_unused--; 160 dentry->d_sb->s_nr_dentry_unused++;
156 dentry_stat.nr_unused--; 161 percpu_counter_inc(&nr_dentry_unused);
162 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
157 } 164 }
158} 165}
159 166
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry)
172 struct dentry *parent; 179 struct dentry *parent;
173 180
174 list_del(&dentry->d_u.d_child); 181 list_del(&dentry->d_u.d_child);
175 dentry_stat.nr_dentry--; /* For d_free, below */
176 /*drops the locks, at that point nobody can reach this dentry */ 182 /*drops the locks, at that point nobody can reach this dentry */
177 dentry_iput(dentry); 183 dentry_iput(dentry);
178 if (IS_ROOT(dentry)) 184 if (IS_ROOT(dentry))
@@ -237,13 +243,15 @@ repeat:
237 if (dentry->d_op->d_delete(dentry)) 243 if (dentry->d_op->d_delete(dentry))
238 goto unhash_it; 244 goto unhash_it;
239 } 245 }
246
240 /* Unreachable? Get rid of it */ 247 /* Unreachable? Get rid of it */
241 if (d_unhashed(dentry)) 248 if (d_unhashed(dentry))
242 goto kill_it; 249 goto kill_it;
243 if (list_empty(&dentry->d_lru)) { 250
244 dentry->d_flags |= DCACHE_REFERENCED; 251 /* Otherwise leave it cached and ensure it's on the LRU */
245 dentry_lru_add(dentry); 252 dentry->d_flags |= DCACHE_REFERENCED;
246 } 253 dentry_lru_add(dentry);
254
247 spin_unlock(&dentry->d_lock); 255 spin_unlock(&dentry->d_lock);
248 spin_unlock(&dcache_lock); 256 spin_unlock(&dcache_lock);
249 return; 257 return;
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry)
318EXPORT_SYMBOL(d_invalidate); 326EXPORT_SYMBOL(d_invalidate);
319 327
320/* This should be called _only_ with dcache_lock held */ 328/* This should be called _only_ with dcache_lock held */
321
322static inline struct dentry * __dget_locked(struct dentry *dentry) 329static inline struct dentry * __dget_locked(struct dentry *dentry)
323{ 330{
324 atomic_inc(&dentry->d_count); 331 atomic_inc(&dentry->d_count);
325 dentry_lru_del_init(dentry); 332 dentry_lru_del(dentry);
326 return dentry; 333 return dentry;
327} 334}
328 335
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry)
441 448
442 if (dentry->d_op && dentry->d_op->d_delete) 449 if (dentry->d_op && dentry->d_op->d_delete)
443 dentry->d_op->d_delete(dentry); 450 dentry->d_op->d_delete(dentry);
444 dentry_lru_del_init(dentry); 451 dentry_lru_del(dentry);
445 __d_drop(dentry); 452 __d_drop(dentry);
446 dentry = d_kill(dentry); 453 dentry = d_kill(dentry);
447 spin_lock(&dcache_lock); 454 spin_lock(&dcache_lock);
448 } 455 }
449} 456}
450 457
451/* 458static void shrink_dentry_list(struct list_head *list)
452 * Shrink the dentry LRU on a given superblock.
453 * @sb : superblock to shrink dentry LRU.
454 * @count: If count is NULL, we prune all dentries on superblock.
455 * @flags: If flags is non-zero, we need to do special processing based on
456 * which flags are set. This means we don't need to maintain multiple
457 * similar copies of this loop.
458 */
459static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
460{ 459{
461 LIST_HEAD(referenced);
462 LIST_HEAD(tmp);
463 struct dentry *dentry; 460 struct dentry *dentry;
464 int cnt = 0;
465 461
466 BUG_ON(!sb); 462 while (!list_empty(list)) {
467 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); 463 dentry = list_entry(list->prev, struct dentry, d_lru);
468 spin_lock(&dcache_lock); 464 dentry_lru_del(dentry);
469 if (count != NULL)
470 /* called from prune_dcache() and shrink_dcache_parent() */
471 cnt = *count;
472restart:
473 if (count == NULL)
474 list_splice_init(&sb->s_dentry_lru, &tmp);
475 else {
476 while (!list_empty(&sb->s_dentry_lru)) {
477 dentry = list_entry(sb->s_dentry_lru.prev,
478 struct dentry, d_lru);
479 BUG_ON(dentry->d_sb != sb);
480 465
481 spin_lock(&dentry->d_lock);
482 /*
483 * If we are honouring the DCACHE_REFERENCED flag and
484 * the dentry has this flag set, don't free it. Clear
485 * the flag and put it back on the LRU.
486 */
487 if ((flags & DCACHE_REFERENCED)
488 && (dentry->d_flags & DCACHE_REFERENCED)) {
489 dentry->d_flags &= ~DCACHE_REFERENCED;
490 list_move(&dentry->d_lru, &referenced);
491 spin_unlock(&dentry->d_lock);
492 } else {
493 list_move_tail(&dentry->d_lru, &tmp);
494 spin_unlock(&dentry->d_lock);
495 cnt--;
496 if (!cnt)
497 break;
498 }
499 cond_resched_lock(&dcache_lock);
500 }
501 }
502 while (!list_empty(&tmp)) {
503 dentry = list_entry(tmp.prev, struct dentry, d_lru);
504 dentry_lru_del_init(dentry);
505 spin_lock(&dentry->d_lock);
506 /* 466 /*
507 * We found an inuse dentry which was not removed from 467 * We found an inuse dentry which was not removed from
508 * the LRU because of laziness during lookup. Do not free 468 * the LRU because of laziness during lookup. Do not free
509 * it - just keep it off the LRU list. 469 * it - just keep it off the LRU list.
510 */ 470 */
471 spin_lock(&dentry->d_lock);
511 if (atomic_read(&dentry->d_count)) { 472 if (atomic_read(&dentry->d_count)) {
512 spin_unlock(&dentry->d_lock); 473 spin_unlock(&dentry->d_lock);
513 continue; 474 continue;
@@ -516,13 +477,60 @@ restart:
516 /* dentry->d_lock was dropped in prune_one_dentry() */ 477 /* dentry->d_lock was dropped in prune_one_dentry() */
517 cond_resched_lock(&dcache_lock); 478 cond_resched_lock(&dcache_lock);
518 } 479 }
519 if (count == NULL && !list_empty(&sb->s_dentry_lru)) 480}
520 goto restart; 481
521 if (count != NULL) 482/**
522 *count = cnt; 483 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
484 * @sb: superblock to shrink dentry LRU.
485 * @count: number of entries to prune
486 * @flags: flags to control the dentry processing
487 *
488 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
489 */
490static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
491{
492 /* called from prune_dcache() and shrink_dcache_parent() */
493 struct dentry *dentry;
494 LIST_HEAD(referenced);
495 LIST_HEAD(tmp);
496 int cnt = *count;
497
498 spin_lock(&dcache_lock);
499 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb);
503
504 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU.
508 */
509 if (flags & DCACHE_REFERENCED) {
510 spin_lock(&dentry->d_lock);
511 if (dentry->d_flags & DCACHE_REFERENCED) {
512 dentry->d_flags &= ~DCACHE_REFERENCED;
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock);
519 }
520
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
523 if (!list_empty(&referenced)) 530 if (!list_empty(&referenced))
524 list_splice(&referenced, &sb->s_dentry_lru); 531 list_splice(&referenced, &sb->s_dentry_lru);
525 spin_unlock(&dcache_lock); 532 spin_unlock(&dcache_lock);
533
526} 534}
527 535
528/** 536/**
@@ -536,9 +544,9 @@ restart:
536 */ 544 */
537static void prune_dcache(int count) 545static void prune_dcache(int count)
538{ 546{
539 struct super_block *sb, *n; 547 struct super_block *sb, *p = NULL;
540 int w_count; 548 int w_count;
541 int unused = dentry_stat.nr_unused; 549 int unused = percpu_counter_sum_positive(&nr_dentry_unused);
542 int prune_ratio; 550 int prune_ratio;
543 int pruned; 551 int pruned;
544 552
@@ -550,7 +558,7 @@ static void prune_dcache(int count)
550 else 558 else
551 prune_ratio = unused / count; 559 prune_ratio = unused / count;
552 spin_lock(&sb_lock); 560 spin_lock(&sb_lock);
553 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 561 list_for_each_entry(sb, &super_blocks, s_list) {
554 if (list_empty(&sb->s_instances)) 562 if (list_empty(&sb->s_instances))
555 continue; 563 continue;
556 if (sb->s_nr_dentry_unused == 0) 564 if (sb->s_nr_dentry_unused == 0)
@@ -590,14 +598,16 @@ static void prune_dcache(int count)
590 up_read(&sb->s_umount); 598 up_read(&sb->s_umount);
591 } 599 }
592 spin_lock(&sb_lock); 600 spin_lock(&sb_lock);
593 /* lock was dropped, must reset next */ 601 if (p)
594 list_safe_reset_next(sb, n, s_list); 602 __put_super(p);
595 count -= pruned; 603 count -= pruned;
596 __put_super(sb); 604 p = sb;
597 /* more work left to do? */ 605 /* more work left to do? */
598 if (count <= 0) 606 if (count <= 0)
599 break; 607 break;
600 } 608 }
609 if (p)
610 __put_super(p);
601 spin_unlock(&sb_lock); 611 spin_unlock(&sb_lock);
602 spin_unlock(&dcache_lock); 612 spin_unlock(&dcache_lock);
603} 613}
@@ -606,13 +616,19 @@ static void prune_dcache(int count)
606 * shrink_dcache_sb - shrink dcache for a superblock 616 * shrink_dcache_sb - shrink dcache for a superblock
607 * @sb: superblock 617 * @sb: superblock
608 * 618 *
609 * Shrink the dcache for the specified super block. This 619 * Shrink the dcache for the specified super block. This is used to free
610 * is used to free the dcache before unmounting a file 620 * the dcache before unmounting a file system.
611 * system
612 */ 621 */
613void shrink_dcache_sb(struct super_block * sb) 622void shrink_dcache_sb(struct super_block *sb)
614{ 623{
615 __shrink_dcache_sb(sb, NULL, 0); 624 LIST_HEAD(tmp);
625
626 spin_lock(&dcache_lock);
627 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp);
629 shrink_dentry_list(&tmp);
630 }
631 spin_unlock(&dcache_lock);
616} 632}
617EXPORT_SYMBOL(shrink_dcache_sb); 633EXPORT_SYMBOL(shrink_dcache_sb);
618 634
@@ -630,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
630 646
631 /* detach this root from the system */ 647 /* detach this root from the system */
632 spin_lock(&dcache_lock); 648 spin_lock(&dcache_lock);
633 dentry_lru_del_init(dentry); 649 dentry_lru_del(dentry);
634 __d_drop(dentry); 650 __d_drop(dentry);
635 spin_unlock(&dcache_lock); 651 spin_unlock(&dcache_lock);
636 652
@@ -644,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
644 spin_lock(&dcache_lock); 660 spin_lock(&dcache_lock);
645 list_for_each_entry(loop, &dentry->d_subdirs, 661 list_for_each_entry(loop, &dentry->d_subdirs,
646 d_u.d_child) { 662 d_u.d_child) {
647 dentry_lru_del_init(loop); 663 dentry_lru_del(loop);
648 __d_drop(loop); 664 __d_drop(loop);
649 cond_resched_lock(&dcache_lock); 665 cond_resched_lock(&dcache_lock);
650 } 666 }
@@ -701,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
701 * otherwise we ascend to the parent and move to the 717 * otherwise we ascend to the parent and move to the
702 * next sibling if there is one */ 718 * next sibling if there is one */
703 if (!parent) 719 if (!parent)
704 goto out; 720 return;
705
706 dentry = parent; 721 dentry = parent;
707
708 } while (list_empty(&dentry->d_subdirs)); 722 } while (list_empty(&dentry->d_subdirs));
709 723
710 dentry = list_entry(dentry->d_subdirs.next, 724 dentry = list_entry(dentry->d_subdirs.next,
711 struct dentry, d_u.d_child); 725 struct dentry, d_u.d_child);
712 } 726 }
713out:
714 /* several dentries were freed, need to correct nr_dentry */
715 spin_lock(&dcache_lock);
716 dentry_stat.nr_dentry -= detached;
717 spin_unlock(&dcache_lock);
718} 727}
719 728
720/* 729/*
@@ -828,14 +837,15 @@ resume:
828 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
829 next = tmp->next; 838 next = tmp->next;
830 839
831 dentry_lru_del_init(dentry);
832 /* 840 /*
833 * move only zero ref count dentries to the end 841 * move only zero ref count dentries to the end
834 * of the unused list for prune_dcache 842 * of the unused list for prune_dcache
835 */ 843 */
836 if (!atomic_read(&dentry->d_count)) { 844 if (!atomic_read(&dentry->d_count)) {
837 dentry_lru_add_tail(dentry); 845 dentry_lru_move_tail(dentry);
838 found++; 846 found++;
847 } else {
848 dentry_lru_del(dentry);
839 } 849 }
840 850
841 /* 851 /*
@@ -898,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
898 */ 908 */
899static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
900{ 910{
911 int nr_unused;
912
901 if (nr) { 913 if (nr) {
902 if (!(gfp_mask & __GFP_FS)) 914 if (!(gfp_mask & __GFP_FS))
903 return -1; 915 return -1;
904 prune_dcache(nr); 916 prune_dcache(nr);
905 } 917 }
906 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 918
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
907} 921}
908 922
909static struct shrinker dcache_shrinker = { 923static struct shrinker dcache_shrinker = {
@@ -970,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
970 spin_lock(&dcache_lock); 984 spin_lock(&dcache_lock);
971 if (parent) 985 if (parent)
972 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 986 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
973 dentry_stat.nr_dentry++;
974 spin_unlock(&dcache_lock); 987 spin_unlock(&dcache_lock);
975 988
989 percpu_counter_inc(&nr_dentry);
990
976 return dentry; 991 return dentry;
977} 992}
978EXPORT_SYMBOL(d_alloc); 993EXPORT_SYMBOL(d_alloc);
@@ -1330,31 +1345,13 @@ EXPORT_SYMBOL(d_add_ci);
1330 * d_lookup - search for a dentry 1345 * d_lookup - search for a dentry
1331 * @parent: parent dentry 1346 * @parent: parent dentry
1332 * @name: qstr of name we wish to find 1347 * @name: qstr of name we wish to find
1348 * Returns: dentry, or NULL
1333 * 1349 *
1334 * Searches the children of the parent dentry for the name in question. If 1350 * d_lookup searches the children of the parent dentry for the name in
1335 * the dentry is found its reference count is incremented and the dentry 1351 * question. If the dentry is found its reference count is incremented and the
1336 * is returned. The caller must use dput to free the entry when it has 1352 * dentry is returned. The caller must use dput to free the entry when it has
1337 * finished using it. %NULL is returned on failure. 1353 * finished using it. %NULL is returned if the dentry does not exist.
1338 *
1339 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
1340 * Memory barriers are used while updating and doing lockless traversal.
1341 * To avoid races with d_move while rename is happening, d_lock is used.
1342 *
1343 * Overflows in memcmp(), while d_move, are avoided by keeping the length
1344 * and name pointer in one structure pointed by d_qstr.
1345 *
1346 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1347 * lookup is going on.
1348 *
1349 * The dentry unused LRU is not updated even if lookup finds the required dentry
1350 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1351 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1352 * acquisition.
1353 *
1354 * d_lookup() is protected against the concurrent renames in some unrelated
1355 * directory using the seqlockt_t rename_lock.
1356 */ 1354 */
1357
1358struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1355struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1359{ 1356{
1360 struct dentry * dentry = NULL; 1357 struct dentry * dentry = NULL;
@@ -1370,6 +1367,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1370} 1367}
1371EXPORT_SYMBOL(d_lookup); 1368EXPORT_SYMBOL(d_lookup);
1372 1369
1370/*
1371 * __d_lookup - search for a dentry (racy)
1372 * @parent: parent dentry
1373 * @name: qstr of name we wish to find
1374 * Returns: dentry, or NULL
1375 *
1376 * __d_lookup is like d_lookup, however it may (rarely) return a
1377 * false-negative result due to unrelated rename activity.
1378 *
1379 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1380 * however it must be used carefully, eg. with a following d_lookup in
1381 * the case of failure.
1382 *
1383 * __d_lookup callers must be commented.
1384 */
1373struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1385struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1374{ 1386{
1375 unsigned int len = name->len; 1387 unsigned int len = name->len;
@@ -1380,6 +1392,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1380 struct hlist_node *node; 1392 struct hlist_node *node;
1381 struct dentry *dentry; 1393 struct dentry *dentry;
1382 1394
1395 /*
1396 * The hash list is protected using RCU.
1397 *
1398 * Take d_lock when comparing a candidate dentry, to avoid races
1399 * with d_move().
1400 *
1401 * It is possible that concurrent renames can mess up our list
1402 * walk here and result in missing our dentry, resulting in the
1403 * false-negative result. d_lookup() protects against concurrent
1404 * renames using rename_lock seqlock.
1405 *
1406 * See Documentation/vfs/dcache-locking.txt for more details.
1407 */
1383 rcu_read_lock(); 1408 rcu_read_lock();
1384 1409
1385 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1410 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
@@ -1394,8 +1419,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1394 1419
1395 /* 1420 /*
1396 * Recheck the dentry after taking the lock - d_move may have 1421 * Recheck the dentry after taking the lock - d_move may have
1397 * changed things. Don't bother checking the hash because we're 1422 * changed things. Don't bother checking the hash because
1398 * about to compare the whole name anyway. 1423 * we're about to compare the whole name anyway.
1399 */ 1424 */
1400 if (dentry->d_parent != parent) 1425 if (dentry->d_parent != parent)
1401 goto next; 1426 goto next;
@@ -1466,33 +1491,26 @@ out:
1466 * This is used by ncpfs in its readdir implementation. 1491 * This is used by ncpfs in its readdir implementation.
1467 * Zero is returned in the dentry is invalid. 1492 * Zero is returned in the dentry is invalid.
1468 */ 1493 */
1469 1494int d_validate(struct dentry *dentry, struct dentry *parent)
1470int d_validate(struct dentry *dentry, struct dentry *dparent)
1471{ 1495{
1472 struct hlist_head *base; 1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
1473 struct hlist_node *lhp; 1497 struct hlist_node *node;
1498 struct dentry *d;
1474 1499
1475 /* Check whether the ptr might be valid at all.. */ 1500 /* Check whether the ptr might be valid at all.. */
1476 if (!kmem_ptr_validate(dentry_cache, dentry)) 1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1477 goto out; 1502 return 0;
1478 1503 if (dentry->d_parent != parent)
1479 if (dentry->d_parent != dparent) 1504 return 0;
1480 goto out;
1481 1505
1482 spin_lock(&dcache_lock); 1506 rcu_read_lock();
1483 base = d_hash(dparent, dentry->d_name.hash); 1507 hlist_for_each_entry_rcu(d, node, head, d_hash) {
1484 hlist_for_each(lhp,base) { 1508 if (d == dentry) {
1485 /* hlist_for_each_entry_rcu() not required for d_hash list 1509 dget(dentry);
1486 * as it is parsed under dcache_lock
1487 */
1488 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1489 __dget_locked(dentry);
1490 spin_unlock(&dcache_lock);
1491 return 1; 1510 return 1;
1492 } 1511 }
1493 } 1512 }
1494 spin_unlock(&dcache_lock); 1513 rcu_read_unlock();
1495out:
1496 return 0; 1514 return 0;
1497} 1515}
1498EXPORT_SYMBOL(d_validate); 1516EXPORT_SYMBOL(d_validate);
@@ -1903,48 +1921,30 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1903} 1921}
1904 1922
1905/** 1923/**
1906 * __d_path - return the path of a dentry 1924 * Prepend path string to a buffer
1925 *
1907 * @path: the dentry/vfsmount to report 1926 * @path: the dentry/vfsmount to report
1908 * @root: root vfsmnt/dentry (may be modified by this function) 1927 * @root: root vfsmnt/dentry (may be modified by this function)
1909 * @buffer: buffer to return value in 1928 * @buffer: pointer to the end of the buffer
1910 * @buflen: buffer length 1929 * @buflen: pointer to buffer length
1911 * 1930 *
1912 * Convert a dentry into an ASCII path name. If the entry has been deleted 1931 * Caller holds the dcache_lock.
1913 * the string " (deleted)" is appended. Note that this is ambiguous.
1914 *
1915 * Returns a pointer into the buffer or an error code if the
1916 * path was too long.
1917 *
1918 * "buflen" should be positive. Caller holds the dcache_lock.
1919 * 1932 *
1920 * If path is not reachable from the supplied root, then the value of 1933 * If path is not reachable from the supplied root, then the value of
1921 * root is changed (without modifying refcounts). 1934 * root is changed (without modifying refcounts).
1922 */ 1935 */
1923char *__d_path(const struct path *path, struct path *root, 1936static int prepend_path(const struct path *path, struct path *root,
1924 char *buffer, int buflen) 1937 char **buffer, int *buflen)
1925{ 1938{
1926 struct dentry *dentry = path->dentry; 1939 struct dentry *dentry = path->dentry;
1927 struct vfsmount *vfsmnt = path->mnt; 1940 struct vfsmount *vfsmnt = path->mnt;
1928 char *end = buffer + buflen; 1941 bool slash = false;
1929 char *retval; 1942 int error = 0;
1930
1931 spin_lock(&vfsmount_lock);
1932 prepend(&end, &buflen, "\0", 1);
1933 if (d_unlinked(dentry) &&
1934 (prepend(&end, &buflen, " (deleted)", 10) != 0))
1935 goto Elong;
1936
1937 if (buflen < 1)
1938 goto Elong;
1939 /* Get '/' right */
1940 retval = end-1;
1941 *retval = '/';
1942 1943
1943 for (;;) { 1944 br_read_lock(vfsmount_lock);
1945 while (dentry != root->dentry || vfsmnt != root->mnt) {
1944 struct dentry * parent; 1946 struct dentry * parent;
1945 1947
1946 if (dentry == root->dentry && vfsmnt == root->mnt)
1947 break;
1948 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1948 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1949 /* Global root? */ 1949 /* Global root? */
1950 if (vfsmnt->mnt_parent == vfsmnt) { 1950 if (vfsmnt->mnt_parent == vfsmnt) {
@@ -1956,28 +1956,90 @@ char *__d_path(const struct path *path, struct path *root,
1956 } 1956 }
1957 parent = dentry->d_parent; 1957 parent = dentry->d_parent;
1958 prefetch(parent); 1958 prefetch(parent);
1959 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 1959 error = prepend_name(buffer, buflen, &dentry->d_name);
1960 (prepend(&end, &buflen, "/", 1) != 0)) 1960 if (!error)
1961 goto Elong; 1961 error = prepend(buffer, buflen, "/", 1);
1962 retval = end; 1962 if (error)
1963 break;
1964
1965 slash = true;
1963 dentry = parent; 1966 dentry = parent;
1964 } 1967 }
1965 1968
1966out: 1969out:
1967 spin_unlock(&vfsmount_lock); 1970 if (!error && !slash)
1968 return retval; 1971 error = prepend(buffer, buflen, "/", 1);
1972
1973 br_read_unlock(vfsmount_lock);
1974 return error;
1969 1975
1970global_root: 1976global_root:
1971 retval += 1; /* hit the slash */ 1977 /*
1972 if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) 1978 * Filesystems needing to implement special "root names"
1973 goto Elong; 1979 * should do so with ->d_dname()
1980 */
1981 if (IS_ROOT(dentry) &&
1982 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
1983 WARN(1, "Root dentry has weird name <%.*s>\n",
1984 (int) dentry->d_name.len, dentry->d_name.name);
1985 }
1974 root->mnt = vfsmnt; 1986 root->mnt = vfsmnt;
1975 root->dentry = dentry; 1987 root->dentry = dentry;
1976 goto out; 1988 goto out;
1989}
1977 1990
1978Elong: 1991/**
1979 retval = ERR_PTR(-ENAMETOOLONG); 1992 * __d_path - return the path of a dentry
1980 goto out; 1993 * @path: the dentry/vfsmount to report
1994 * @root: root vfsmnt/dentry (may be modified by this function)
1995 * @buf: buffer to return value in
1996 * @buflen: buffer length
1997 *
1998 * Convert a dentry into an ASCII path name.
1999 *
2000 * Returns a pointer into the buffer or an error code if the
2001 * path was too long.
2002 *
2003 * "buflen" should be positive.
2004 *
2005 * If path is not reachable from the supplied root, then the value of
2006 * root is changed (without modifying refcounts).
2007 */
2008char *__d_path(const struct path *path, struct path *root,
2009 char *buf, int buflen)
2010{
2011 char *res = buf + buflen;
2012 int error;
2013
2014 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock);
2016 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock);
2018
2019 if (error)
2020 return ERR_PTR(error);
2021 return res;
2022}
2023
2024/*
2025 * same as __d_path but appends "(deleted)" for unlinked files.
2026 */
2027static int path_with_deleted(const struct path *path, struct path *root,
2028 char **buf, int *buflen)
2029{
2030 prepend(buf, buflen, "\0", 1);
2031 if (d_unlinked(path->dentry)) {
2032 int error = prepend(buf, buflen, " (deleted)", 10);
2033 if (error)
2034 return error;
2035 }
2036
2037 return prepend_path(path, root, buf, buflen);
2038}
2039
2040static int prepend_unreachable(char **buffer, int *buflen)
2041{
2042 return prepend(buffer, buflen, "(unreachable)", 13);
1981} 2043}
1982 2044
1983/** 2045/**
@@ -1998,9 +2060,10 @@ Elong:
1998 */ 2060 */
1999char *d_path(const struct path *path, char *buf, int buflen) 2061char *d_path(const struct path *path, char *buf, int buflen)
2000{ 2062{
2001 char *res; 2063 char *res = buf + buflen;
2002 struct path root; 2064 struct path root;
2003 struct path tmp; 2065 struct path tmp;
2066 int error;
2004 2067
2005 /* 2068 /*
2006 * We have various synthetic filesystems that never get mounted. On 2069 * We have various synthetic filesystems that never get mounted. On
@@ -2012,19 +2075,51 @@ char *d_path(const struct path *path, char *buf, int buflen)
2012 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2075 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2013 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2076 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2014 2077
2015 read_lock(&current->fs->lock); 2078 get_fs_root(current->fs, &root);
2016 root = current->fs->root;
2017 path_get(&root);
2018 read_unlock(&current->fs->lock);
2019 spin_lock(&dcache_lock); 2079 spin_lock(&dcache_lock);
2020 tmp = root; 2080 tmp = root;
2021 res = __d_path(path, &tmp, buf, buflen); 2081 error = path_with_deleted(path, &tmp, &res, &buflen);
2082 if (error)
2083 res = ERR_PTR(error);
2022 spin_unlock(&dcache_lock); 2084 spin_unlock(&dcache_lock);
2023 path_put(&root); 2085 path_put(&root);
2024 return res; 2086 return res;
2025} 2087}
2026EXPORT_SYMBOL(d_path); 2088EXPORT_SYMBOL(d_path);
2027 2089
2090/**
2091 * d_path_with_unreachable - return the path of a dentry
2092 * @path: path to report
2093 * @buf: buffer to return value in
2094 * @buflen: buffer length
2095 *
2096 * The difference from d_path() is that this prepends "(unreachable)"
2097 * to paths which are unreachable from the current process' root.
2098 */
2099char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2100{
2101 char *res = buf + buflen;
2102 struct path root;
2103 struct path tmp;
2104 int error;
2105
2106 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2107 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2108
2109 get_fs_root(current->fs, &root);
2110 spin_lock(&dcache_lock);
2111 tmp = root;
2112 error = path_with_deleted(path, &tmp, &res, &buflen);
2113 if (!error && !path_equal(&tmp, &root))
2114 error = prepend_unreachable(&res, &buflen);
2115 spin_unlock(&dcache_lock);
2116 path_put(&root);
2117 if (error)
2118 res = ERR_PTR(error);
2119
2120 return res;
2121}
2122
2028/* 2123/*
2029 * Helper function for dentry_operations.d_dname() members 2124 * Helper function for dentry_operations.d_dname() members
2030 */ 2125 */
@@ -2049,16 +2144,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2049/* 2144/*
2050 * Write full pathname from the root of the filesystem into the buffer. 2145 * Write full pathname from the root of the filesystem into the buffer.
2051 */ 2146 */
2052char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2147char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2053{ 2148{
2054 char *end = buf + buflen; 2149 char *end = buf + buflen;
2055 char *retval; 2150 char *retval;
2056 2151
2057 spin_lock(&dcache_lock);
2058 prepend(&end, &buflen, "\0", 1); 2152 prepend(&end, &buflen, "\0", 1);
2059 if (d_unlinked(dentry) &&
2060 (prepend(&end, &buflen, "//deleted", 9) != 0))
2061 goto Elong;
2062 if (buflen < 1) 2153 if (buflen < 1)
2063 goto Elong; 2154 goto Elong;
2064 /* Get '/' right */ 2155 /* Get '/' right */
@@ -2076,7 +2167,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2076 retval = end; 2167 retval = end;
2077 dentry = parent; 2168 dentry = parent;
2078 } 2169 }
2170 return retval;
2171Elong:
2172 return ERR_PTR(-ENAMETOOLONG);
2173}
2174EXPORT_SYMBOL(__dentry_path);
2175
2176char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2177{
2178 char *p = NULL;
2179 char *retval;
2180
2181 spin_lock(&dcache_lock);
2182 if (d_unlinked(dentry)) {
2183 p = buf + buflen;
2184 if (prepend(&p, &buflen, "//deleted", 10) != 0)
2185 goto Elong;
2186 buflen++;
2187 }
2188 retval = __dentry_path(dentry, buf, buflen);
2079 spin_unlock(&dcache_lock); 2189 spin_unlock(&dcache_lock);
2190 if (!IS_ERR(retval) && p)
2191 *p = '/'; /* restore '/' overriden with '\0' */
2080 return retval; 2192 return retval;
2081Elong: 2193Elong:
2082 spin_unlock(&dcache_lock); 2194 spin_unlock(&dcache_lock);
@@ -2110,27 +2222,30 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2110 if (!page) 2222 if (!page)
2111 return -ENOMEM; 2223 return -ENOMEM;
2112 2224
2113 read_lock(&current->fs->lock); 2225 get_fs_root_and_pwd(current->fs, &root, &pwd);
2114 pwd = current->fs->pwd;
2115 path_get(&pwd);
2116 root = current->fs->root;
2117 path_get(&root);
2118 read_unlock(&current->fs->lock);
2119 2226
2120 error = -ENOENT; 2227 error = -ENOENT;
2121 spin_lock(&dcache_lock); 2228 spin_lock(&dcache_lock);
2122 if (!d_unlinked(pwd.dentry)) { 2229 if (!d_unlinked(pwd.dentry)) {
2123 unsigned long len; 2230 unsigned long len;
2124 struct path tmp = root; 2231 struct path tmp = root;
2125 char * cwd; 2232 char *cwd = page + PAGE_SIZE;
2233 int buflen = PAGE_SIZE;
2126 2234
2127 cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); 2235 prepend(&cwd, &buflen, "\0", 1);
2236 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2128 spin_unlock(&dcache_lock); 2237 spin_unlock(&dcache_lock);
2129 2238
2130 error = PTR_ERR(cwd); 2239 if (error)
2131 if (IS_ERR(cwd))
2132 goto out; 2240 goto out;
2133 2241
2242 /* Unreachable from current root */
2243 if (!path_equal(&tmp, &root)) {
2244 error = prepend_unreachable(&cwd, &buflen);
2245 if (error)
2246 goto out;
2247 }
2248
2134 error = -ERANGE; 2249 error = -ERANGE;
2135 len = PAGE_SIZE + page - cwd; 2250 len = PAGE_SIZE + page - cwd;
2136 if (len <= size) { 2251 if (len <= size) {
@@ -2195,11 +2310,12 @@ int path_is_under(struct path *path1, struct path *path2)
2195 struct vfsmount *mnt = path1->mnt; 2310 struct vfsmount *mnt = path1->mnt;
2196 struct dentry *dentry = path1->dentry; 2311 struct dentry *dentry = path1->dentry;
2197 int res; 2312 int res;
2198 spin_lock(&vfsmount_lock); 2313
2314 br_read_lock(vfsmount_lock);
2199 if (mnt != path2->mnt) { 2315 if (mnt != path2->mnt) {
2200 for (;;) { 2316 for (;;) {
2201 if (mnt->mnt_parent == mnt) { 2317 if (mnt->mnt_parent == mnt) {
2202 spin_unlock(&vfsmount_lock); 2318 br_read_unlock(vfsmount_lock);
2203 return 0; 2319 return 0;
2204 } 2320 }
2205 if (mnt->mnt_parent == path2->mnt) 2321 if (mnt->mnt_parent == path2->mnt)
@@ -2209,7 +2325,7 @@ int path_is_under(struct path *path1, struct path *path2)
2209 dentry = mnt->mnt_mountpoint; 2325 dentry = mnt->mnt_mountpoint;
2210 } 2326 }
2211 res = is_subdir(dentry, path2->dentry); 2327 res = is_subdir(dentry, path2->dentry);
2212 spin_unlock(&vfsmount_lock); 2328 br_read_unlock(vfsmount_lock);
2213 return res; 2329 return res;
2214} 2330}
2215EXPORT_SYMBOL(path_is_under); 2331EXPORT_SYMBOL(path_is_under);
@@ -2311,6 +2427,9 @@ static void __init dcache_init(void)
2311{ 2427{
2312 int loop; 2428 int loop;
2313 2429
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2314 /* 2433 /*
2315 * A constructor could be added for stable state like the lists, 2434 * A constructor could be added for stable state like the lists,
2316 * but it is probably not worth it because of the cache nature 2435 * but it is probably not worth it because of the cache nature