summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2019-03-15 22:23:19 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2019-04-09 19:18:04 -0400
commit5467a68cbf6884c9a9d91e2a89140afb1839c835 (patch)
tree06501c271a2d4140c50b3643a64e30086170f134
parent9419a3191dcb27f24478d288abaab697228d28e6 (diff)
dcache: sort the freeing-without-RCU-delay mess for good.
For lockless accesses to dentries we don't have pinned we rely (among other things) upon having an RCU delay between dropping the last reference and actually freeing the memory. On the other hand, for things like pipes and sockets we neither do that kind of lockless access, nor want to deal with the overhead of an RCU delay every time a socket gets closed. So delay was made optional - setting DCACHE_RCUACCESS in ->d_flags made sure it would happen. We tried to avoid setting it unless we knew we need it. Unfortunately, that had led to recurring class of bugs, in which we missed the need to set it. We only really need it for dentries that are created by d_alloc_pseudo(), so let's not bother with trying to be smart - just make having an RCU delay the default. The ones that do *not* get it set the replacement flag (DCACHE_NORCU) and we'd better use that sparingly. d_alloc_pseudo() is the only such user right now. FWIW, the race that finally prompted that switch had been between __lock_parent() of immediate subdirectory of what's currently the root of a disconnected tree (e.g. from open-by-handle in progress) racing with d_splice_alias() elsewhere picking another alias for the same inode, either on outright corrupted fs image, or (in case of open-by-handle on NFS) that subdirectory having been just moved on server. It's not easy to hit, so the sky is not falling, but that's not the first race on similar missed cases and the logics for settinf DCACHE_RCUACCESS has gotten ridiculously convoluted. Cc: stable@vger.kernel.org Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/filesystems/porting5
-rw-r--r--fs/dcache.c24
-rw-r--r--fs/nsfs.c3
-rw-r--r--include/linux/dcache.h2
4 files changed, 20 insertions, 14 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index cf43bc4dbf31..a60fa516d4cb 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -638,3 +638,8 @@ in your dentry operations instead.
638 inode to d_splice_alias() will also do the right thing (equivalent of 638 inode to d_splice_alias() will also do the right thing (equivalent of
639 d_add(dentry, NULL); return NULL;), so that kind of special cases 639 d_add(dentry, NULL); return NULL;), so that kind of special cases
640 also doesn't need a separate treatment. 640 also doesn't need a separate treatment.
641--
642[mandatory]
643 DCACHE_RCUACCESS is gone; having an RCU delay on dentry freeing is the
644 default. DCACHE_NORCU opts out, and only d_alloc_pseudo() has any
645 business doing so.
diff --git a/fs/dcache.c b/fs/dcache.c
index aac41adf4743..c663c602f9ef 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -344,7 +344,7 @@ static void dentry_free(struct dentry *dentry)
344 } 344 }
345 } 345 }
346 /* if dentry was never visible to RCU, immediate free is OK */ 346 /* if dentry was never visible to RCU, immediate free is OK */
347 if (!(dentry->d_flags & DCACHE_RCUACCESS)) 347 if (dentry->d_flags & DCACHE_NORCU)
348 __d_free(&dentry->d_u.d_rcu); 348 __d_free(&dentry->d_u.d_rcu);
349 else 349 else
350 call_rcu(&dentry->d_u.d_rcu, __d_free); 350 call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -1701,7 +1701,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1701 struct dentry *dentry = __d_alloc(parent->d_sb, name); 1701 struct dentry *dentry = __d_alloc(parent->d_sb, name);
1702 if (!dentry) 1702 if (!dentry)
1703 return NULL; 1703 return NULL;
1704 dentry->d_flags |= DCACHE_RCUACCESS;
1705 spin_lock(&parent->d_lock); 1704 spin_lock(&parent->d_lock);
1706 /* 1705 /*
1707 * don't need child lock because it is not subject 1706 * don't need child lock because it is not subject
@@ -1726,7 +1725,7 @@ struct dentry *d_alloc_cursor(struct dentry * parent)
1726{ 1725{
1727 struct dentry *dentry = d_alloc_anon(parent->d_sb); 1726 struct dentry *dentry = d_alloc_anon(parent->d_sb);
1728 if (dentry) { 1727 if (dentry) {
1729 dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; 1728 dentry->d_flags |= DCACHE_DENTRY_CURSOR;
1730 dentry->d_parent = dget(parent); 1729 dentry->d_parent = dget(parent);
1731 } 1730 }
1732 return dentry; 1731 return dentry;
@@ -1739,10 +1738,17 @@ struct dentry *d_alloc_cursor(struct dentry * parent)
1739 * 1738 *
1740 * For a filesystem that just pins its dentries in memory and never 1739 * For a filesystem that just pins its dentries in memory and never
1741 * performs lookups at all, return an unhashed IS_ROOT dentry. 1740 * performs lookups at all, return an unhashed IS_ROOT dentry.
1741 * This is used for pipes, sockets et.al. - the stuff that should
1742 * never be anyone's children or parents. Unlike all other
1743 * dentries, these will not have RCU delay between dropping the
1744 * last reference and freeing them.
1742 */ 1745 */
1743struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1746struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1744{ 1747{
1745 return __d_alloc(sb, name); 1748 struct dentry *dentry = __d_alloc(sb, name);
1749 if (likely(dentry))
1750 dentry->d_flags |= DCACHE_NORCU;
1751 return dentry;
1746} 1752}
1747EXPORT_SYMBOL(d_alloc_pseudo); 1753EXPORT_SYMBOL(d_alloc_pseudo);
1748 1754
@@ -1911,12 +1917,10 @@ struct dentry *d_make_root(struct inode *root_inode)
1911 1917
1912 if (root_inode) { 1918 if (root_inode) {
1913 res = d_alloc_anon(root_inode->i_sb); 1919 res = d_alloc_anon(root_inode->i_sb);
1914 if (res) { 1920 if (res)
1915 res->d_flags |= DCACHE_RCUACCESS;
1916 d_instantiate(res, root_inode); 1921 d_instantiate(res, root_inode);
1917 } else { 1922 else
1918 iput(root_inode); 1923 iput(root_inode);
1919 }
1920 } 1924 }
1921 return res; 1925 return res;
1922} 1926}
@@ -2781,9 +2785,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
2781 copy_name(dentry, target); 2785 copy_name(dentry, target);
2782 target->d_hash.pprev = NULL; 2786 target->d_hash.pprev = NULL;
2783 dentry->d_parent->d_lockref.count++; 2787 dentry->d_parent->d_lockref.count++;
2784 if (dentry == old_parent) 2788 if (dentry != old_parent) /* wasn't IS_ROOT */
2785 dentry->d_flags |= DCACHE_RCUACCESS;
2786 else
2787 WARN_ON(!--old_parent->d_lockref.count); 2789 WARN_ON(!--old_parent->d_lockref.count);
2788 } else { 2790 } else {
2789 target->d_parent = old_parent; 2791 target->d_parent = old_parent;
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 60702d677bd4..30d150a4f0c6 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -85,13 +85,12 @@ slow:
85 inode->i_fop = &ns_file_operations; 85 inode->i_fop = &ns_file_operations;
86 inode->i_private = ns; 86 inode->i_private = ns;
87 87
88 dentry = d_alloc_pseudo(mnt->mnt_sb, &empty_name); 88 dentry = d_alloc_anon(mnt->mnt_sb);
89 if (!dentry) { 89 if (!dentry) {
90 iput(inode); 90 iput(inode);
91 return ERR_PTR(-ENOMEM); 91 return ERR_PTR(-ENOMEM);
92 } 92 }
93 d_instantiate(dentry, inode); 93 d_instantiate(dentry, inode);
94 dentry->d_flags |= DCACHE_RCUACCESS;
95 dentry->d_fsdata = (void *)ns->ops; 94 dentry->d_fsdata = (void *)ns->ops;
96 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); 95 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
97 if (d) { 96 if (d) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 60996e64c579..6e1e8e6602c6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -176,7 +176,6 @@ struct dentry_operations {
176 * typically using d_splice_alias. */ 176 * typically using d_splice_alias. */
177 177
178#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */ 178#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */
179#define DCACHE_RCUACCESS 0x00000080 /* Entry has ever been RCU-visible */
180 179
181#define DCACHE_CANT_MOUNT 0x00000100 180#define DCACHE_CANT_MOUNT 0x00000100
182#define DCACHE_GENOCIDE 0x00000200 181#define DCACHE_GENOCIDE 0x00000200
@@ -217,6 +216,7 @@ struct dentry_operations {
217 216
218#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ 217#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */
219#define DCACHE_DENTRY_CURSOR 0x20000000 218#define DCACHE_DENTRY_CURSOR 0x20000000
219#define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */
220 220
221extern seqlock_t rename_lock; 221extern seqlock_t rename_lock;
222 222