aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2017-12-20 17:45:40 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2017-12-25 20:22:07 -0500
commitf1ee616214cb22410e939d963bbb2349c2570f02 (patch)
tree3c7c0aab550ed16f00fbe76595e42de6f461b52a
parent00b0c9b82663ac42e5a09f58ce960f81f29d64ee (diff)
VFS: don't keep disconnected dentries on d_anon
The original purpose of the per-superblock d_anon list was to keep disconnected dentries in the cache between consecutive requests to the NFS server. Dentries can be disconnected if a client holds a file open and repeatedly performs IO on it, and if the server drops the dentry, whether due to memory pressure, server restart, or "echo 3 > /proc/sys/vm/drop_caches". This purpose was thwarted by commit 75a6f82a0d10 ("freeing unlinked file indefinitely delayed") which caused disconnected dentries to be freed as soon as their refcount reached zero. This means that, when a dentry being used by nfsd gets disconnected, a new one needs to be allocated for every request (unless requests overlap). As the dentry has no name, no parent, and no children, there is little of value to cache. As small memory allocations are typically fast (from per-cpu free lists) this likely has little cost. This means that the original purpose of s_anon is no longer relevant: there is no longer any need to keep disconnected dentries on a list so they appear to be hashed. However, s_anon now has a new use. When you mount an NFS filesystem, the dentry stored in s_root is just a placebo. The "real" root dentry is allocated using d_obtain_root() and so it kept on the s_anon list. I don't know the reason for this, but suspect it related to NFSv4 where a mount of "server:/some/path" require NFS to look up the root filehandle on the server, then walk down "/some" and "/path" to get the filehandle to mount. Whatever the reason, NFS depends on the s_anon list and on shrink_dcache_for_umount() pruning all dentries on this list. So we cannot simply remove s_anon. We could just leave the code unchanged, but apart from that being potentially confusing, the (unfair) bit-spin-lock which protects s_anon can become a bottle neck when lots of disconnected dentries are being created. So this patch renames s_anon to s_roots, and stops storing disconnected dentries on the list. Only dentries obtained with d_obtain_root() are now stored on this list. There are many fewer of these (only NFS and NILFS2 use the call, and only during filesystem mount) so contention on the bit-lock will not be a problem. Possibly an alternate solution should be found for NFS and NILFS2, but that would require understanding their needs first. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/filesystems/nfs/Exporting27
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h10
-rw-r--r--fs/dcache.c22
-rw-r--r--fs/super.c2
-rw-r--r--include/linux/fs.h2
5 files changed, 34 insertions, 29 deletions
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
index 520a4becb75c..63889149f532 100644
--- a/Documentation/filesystems/nfs/Exporting
+++ b/Documentation/filesystems/nfs/Exporting
@@ -56,13 +56,25 @@ a/ A dentry flag DCACHE_DISCONNECTED which is set on
56 any dentry that might not be part of the proper prefix. 56 any dentry that might not be part of the proper prefix.
57 This is set when anonymous dentries are created, and cleared when a 57 This is set when anonymous dentries are created, and cleared when a
58 dentry is noticed to be a child of a dentry which is in the proper 58 dentry is noticed to be a child of a dentry which is in the proper
59 prefix. 59 prefix. If the refcount on a dentry with this flag set
60 60 becomes zero, the dentry is immediately discarded, rather than being
61b/ A per-superblock list "s_anon" of dentries which are the roots of 61 kept in the dcache. If a dentry that is not already in the dcache
62 subtrees that are not in the proper prefix. These dentries, as 62 is repeatedly accessed by filehandle (as NFSD might do), an new dentry
63 well as the proper prefix, need to be released at unmount time. As 63 will be a allocated for each access, and discarded at the end of
64 these dentries will not be hashed, they are linked together on the 64 the access.
65 d_hash list_head. 65
66 Note that such a dentry can acquire children, name, ancestors, etc.
67 without losing DCACHE_DISCONNECTED - that flag is only cleared when
68 subtree is successfully reconnected to root. Until then dentries
69 in such subtree are retained only as long as there are references;
70 refcount reaching zero means immediate eviction, same as for unhashed
71 dentries. That guarantees that we won't need to hunt them down upon
72 umount.
73
74b/ A primitive for creation of secondary roots - d_obtain_root(inode).
75 Those do _not_ bear DCACHE_DISCONNECTED. They are placed on the
76 per-superblock list (->s_roots), so they can be located at umount
77 time for eviction purposes.
66 78
67c/ Helper routines to allocate anonymous dentries, and to help attach 79c/ Helper routines to allocate anonymous dentries, and to help attach
68 loose directory dentries at lookup time. They are: 80 loose directory dentries at lookup time. They are:
@@ -77,7 +89,6 @@ c/ Helper routines to allocate anonymous dentries, and to help attach
77 (such as an anonymous one created by d_obtain_alias), if appropriate. 89 (such as an anonymous one created by d_obtain_alias), if appropriate.
78 It returns NULL when the passed-in dentry is used, following the calling 90 It returns NULL when the passed-in dentry is used, following the calling
79 convention of ->lookup. 91 convention of ->lookup.
80
81 92
82Filesystem Issues 93Filesystem Issues
83----------------- 94-----------------
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index b133fd00c08c..0d62fcf016dc 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -1296,15 +1296,7 @@ static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
1296 spin_lock_nested(&dentry->d_lock, 1296 spin_lock_nested(&dentry->d_lock,
1297 nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL); 1297 nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
1298 ll_d2d(dentry)->lld_invalid = 1; 1298 ll_d2d(dentry)->lld_invalid = 1;
1299 /* 1299 if (d_count(dentry) == 0)
1300 * We should be careful about dentries created by d_obtain_alias().
1301 * These dentries are not put in the dentry tree, instead they are
1302 * linked to sb->s_anon through dentry->d_hash.
1303 * shrink_dcache_for_umount() shrinks the tree and sb->s_anon list.
1304 * If we unhashed such a dentry, unmount would not be able to find
1305 * it and busy inodes would be reported.
1306 */
1307 if (d_count(dentry) == 0 && !(dentry->d_flags & DCACHE_DISCONNECTED))
1308 __d_drop(dentry); 1300 __d_drop(dentry);
1309 spin_unlock(&dentry->d_lock); 1301 spin_unlock(&dentry->d_lock);
1310} 1302}
diff --git a/fs/dcache.c b/fs/dcache.c
index b99a39206930..17e6b84b9656 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -48,8 +48,8 @@
48 * - i_dentry, d_u.d_alias, d_inode of aliases 48 * - i_dentry, d_u.d_alias, d_inode of aliases
49 * dcache_hash_bucket lock protects: 49 * dcache_hash_bucket lock protects:
50 * - the dcache hash table 50 * - the dcache hash table
51 * s_anon bl list spinlock protects: 51 * s_roots bl list spinlock protects:
52 * - the s_anon list (see __d_drop) 52 * - the s_roots list (see __d_drop)
53 * dentry->d_sb->s_dentry_lru_lock protects: 53 * dentry->d_sb->s_dentry_lru_lock protects:
54 * - the dcache lru lists and counters 54 * - the dcache lru lists and counters
55 * d_lock protects: 55 * d_lock protects:
@@ -67,7 +67,7 @@
67 * dentry->d_lock 67 * dentry->d_lock
68 * dentry->d_sb->s_dentry_lru_lock 68 * dentry->d_sb->s_dentry_lru_lock
69 * dcache_hash_bucket lock 69 * dcache_hash_bucket lock
70 * s_anon lock 70 * s_roots lock
71 * 71 *
72 * If there is an ancestor relationship: 72 * If there is an ancestor relationship:
73 * dentry->d_parent->...->d_parent->d_lock 73 * dentry->d_parent->...->d_parent->d_lock
@@ -476,10 +476,10 @@ void __d_drop(struct dentry *dentry)
476 /* 476 /*
477 * Hashed dentries are normally on the dentry hashtable, 477 * Hashed dentries are normally on the dentry hashtable,
478 * with the exception of those newly allocated by 478 * with the exception of those newly allocated by
479 * d_obtain_alias, which are always IS_ROOT: 479 * d_obtain_root, which are always IS_ROOT:
480 */ 480 */
481 if (unlikely(IS_ROOT(dentry))) 481 if (unlikely(IS_ROOT(dentry)))
482 b = &dentry->d_sb->s_anon; 482 b = &dentry->d_sb->s_roots;
483 else 483 else
484 b = d_hash(dentry->d_name.hash); 484 b = d_hash(dentry->d_name.hash);
485 485
@@ -1499,8 +1499,8 @@ void shrink_dcache_for_umount(struct super_block *sb)
1499 sb->s_root = NULL; 1499 sb->s_root = NULL;
1500 do_one_tree(dentry); 1500 do_one_tree(dentry);
1501 1501
1502 while (!hlist_bl_empty(&sb->s_anon)) { 1502 while (!hlist_bl_empty(&sb->s_roots)) {
1503 dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); 1503 dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
1504 do_one_tree(dentry); 1504 do_one_tree(dentry);
1505 } 1505 }
1506} 1506}
@@ -1964,9 +1964,11 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1964 spin_lock(&tmp->d_lock); 1964 spin_lock(&tmp->d_lock);
1965 __d_set_inode_and_type(tmp, inode, add_flags); 1965 __d_set_inode_and_type(tmp, inode, add_flags);
1966 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); 1966 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
1967 hlist_bl_lock(&tmp->d_sb->s_anon); 1967 if (!disconnected) {
1968 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1968 hlist_bl_lock(&tmp->d_sb->s_roots);
1969 hlist_bl_unlock(&tmp->d_sb->s_anon); 1969 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots);
1970 hlist_bl_unlock(&tmp->d_sb->s_roots);
1971 }
1970 spin_unlock(&tmp->d_lock); 1972 spin_unlock(&tmp->d_lock);
1971 spin_unlock(&inode->i_lock); 1973 spin_unlock(&inode->i_lock);
1972 1974
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6f..9ea66601d664 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -207,7 +207,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
207 if (s->s_user_ns != &init_user_ns) 207 if (s->s_user_ns != &init_user_ns)
208 s->s_iflags |= SB_I_NODEV; 208 s->s_iflags |= SB_I_NODEV;
209 INIT_HLIST_NODE(&s->s_instances); 209 INIT_HLIST_NODE(&s->s_instances);
210 INIT_HLIST_BL_HEAD(&s->s_anon); 210 INIT_HLIST_BL_HEAD(&s->s_roots);
211 mutex_init(&s->s_sync_lock); 211 mutex_init(&s->s_sync_lock);
212 INIT_LIST_HEAD(&s->s_inodes); 212 INIT_LIST_HEAD(&s->s_inodes);
213 spin_lock_init(&s->s_inode_list_lock); 213 spin_lock_init(&s->s_inode_list_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2995a271ec46..6276f8315e5b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1359,7 +1359,7 @@ struct super_block {
1359 1359
1360 const struct fscrypt_operations *s_cop; 1360 const struct fscrypt_operations *s_cop;
1361 1361
1362 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ 1362 struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
1363 struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1363 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1364 struct block_device *s_bdev; 1364 struct block_device *s_bdev;
1365 struct backing_dev_info *s_bdi; 1365 struct backing_dev_info *s_bdi;