aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2013-02-20 11:19:05 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2013-02-26 02:46:09 -0500
commitecf3d1f1aa74da0d632b651a2e05a911f60e92c0 (patch)
tree62a2e0a46bfd993a24a1154ec1331c57bbd50482
parent4f4a4faddea0fe45bf508e723c3a810c5190ed62 (diff)
vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op
The following set of operations on a NFS client and server will cause server# mkdir a client# cd a server# mv a a.bak client# sleep 30 # (or whatever the dir attrcache timeout is) client# stat . stat: cannot stat `.': Stale NFS file handle Obviously, we should not be getting an ESTALE error back there since the inode still exists on the server. The problem is that the lookup code will call d_revalidate on the dentry that "." refers to, because NFS has FS_REVAL_DOT set. nfs_lookup_revalidate will see that the parent directory has changed and will try to reverify the dentry by redoing a LOOKUP. That of course fails, so the lookup code returns ESTALE. The problem here is that d_revalidate is really a bad fit for this case. What we really want to know at this point is whether the inode is still good or not, but we don't really care what name it goes by or whether the dcache is still valid. Add a new d_op->d_weak_revalidate operation and have complete_walk call that instead of d_revalidate. The intent there is to allow for a "weaker" d_revalidate that just checks to see whether the inode is still good. This is also gives us an opportunity to kill off the FS_REVAL_DOT special casing. [AV: changed method name, added note in porting, fixed confusion re having it possibly called from RCU mode (it won't be)] Cc: NeilBrown <neilb@suse.de> Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/porting4
-rw-r--r--Documentation/filesystems/vfs.txt24
-rw-r--r--fs/9p/vfs_dentry.c1
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/dcache.c3
-rw-r--r--fs/namei.c8
-rw-r--r--fs/nfs/dir.c40
-rw-r--r--fs/nfs/nfs4super.c6
-rw-r--r--fs/nfs/super.c6
-rw-r--r--include/linux/dcache.h3
-rw-r--r--include/linux/fs.h1
12 files changed, 84 insertions, 16 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f48e0c6b4c42..0706d32a61e6 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -10,6 +10,7 @@ be able to use diff(1).
10--------------------------- dentry_operations -------------------------- 10--------------------------- dentry_operations --------------------------
11prototypes: 11prototypes:
12 int (*d_revalidate)(struct dentry *, unsigned int); 12 int (*d_revalidate)(struct dentry *, unsigned int);
13 int (*d_weak_revalidate)(struct dentry *, unsigned int);
13 int (*d_hash)(const struct dentry *, const struct inode *, 14 int (*d_hash)(const struct dentry *, const struct inode *,
14 struct qstr *); 15 struct qstr *);
15 int (*d_compare)(const struct dentry *, const struct inode *, 16 int (*d_compare)(const struct dentry *, const struct inode *,
@@ -25,6 +26,7 @@ prototypes:
25locking rules: 26locking rules:
26 rename_lock ->d_lock may block rcu-walk 27 rename_lock ->d_lock may block rcu-walk
27d_revalidate: no no yes (ref-walk) maybe 28d_revalidate: no no yes (ref-walk) maybe
29d_weak_revalidate:no no yes no
28d_hash no no no maybe 30d_hash no no no maybe
29d_compare: yes no no maybe 31d_compare: yes no no maybe
30d_delete: no yes no no 32d_delete: no yes no no
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 0472c31c163b..4db22f6491e0 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -441,3 +441,7 @@ d_make_root() drops the reference to inode if dentry allocation fails.
441two, it gets "is it an O_EXCL or equivalent?" boolean argument. Note that 441two, it gets "is it an O_EXCL or equivalent?" boolean argument. Note that
442local filesystems can ignore tha argument - they are guaranteed that the 442local filesystems can ignore tha argument - they are guaranteed that the
443object doesn't exist. It's remote/distributed ones that might care... 443object doesn't exist. It's remote/distributed ones that might care...
444--
445[mandatory]
446 FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate()
447in your dentry operations instead.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index e3869098163e..bc4b06b3160a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -900,6 +900,7 @@ defined:
900 900
901struct dentry_operations { 901struct dentry_operations {
902 int (*d_revalidate)(struct dentry *, unsigned int); 902 int (*d_revalidate)(struct dentry *, unsigned int);
903 int (*d_weak_revalidate)(struct dentry *, unsigned int);
903 int (*d_hash)(const struct dentry *, const struct inode *, 904 int (*d_hash)(const struct dentry *, const struct inode *,
904 struct qstr *); 905 struct qstr *);
905 int (*d_compare)(const struct dentry *, const struct inode *, 906 int (*d_compare)(const struct dentry *, const struct inode *,
@@ -915,8 +916,13 @@ struct dentry_operations {
915 916
916 d_revalidate: called when the VFS needs to revalidate a dentry. This 917 d_revalidate: called when the VFS needs to revalidate a dentry. This
917 is called whenever a name look-up finds a dentry in the 918 is called whenever a name look-up finds a dentry in the
918 dcache. Most filesystems leave this as NULL, because all their 919 dcache. Most local filesystems leave this as NULL, because all their
919 dentries in the dcache are valid 920 dentries in the dcache are valid. Network filesystems are different
921 since things can change on the server without the client necessarily
922 being aware of it.
923
924 This function should return a positive value if the dentry is still
925 valid, and zero or a negative error code if it isn't.
920 926
921 d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU). 927 d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
922 If in rcu-walk mode, the filesystem must revalidate the dentry without 928 If in rcu-walk mode, the filesystem must revalidate the dentry without
@@ -927,6 +933,20 @@ struct dentry_operations {
927 If a situation is encountered that rcu-walk cannot handle, return 933 If a situation is encountered that rcu-walk cannot handle, return
928 -ECHILD and it will be called again in ref-walk mode. 934 -ECHILD and it will be called again in ref-walk mode.
929 935
936 d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry.
937 This is called when a path-walk ends at dentry that was not acquired by
938 doing a lookup in the parent directory. This includes "/", "." and "..",
939 as well as procfs-style symlinks and mountpoint traversal.
940
941 In this case, we are less concerned with whether the dentry is still
942 fully correct, but rather that the inode is still valid. As with
943 d_revalidate, most local filesystems will set this to NULL since their
944 dcache entries are always valid.
945
946 This function has the same return code semantics as d_revalidate.
947
948 d_weak_revalidate is only called after leaving rcu-walk mode.
949
930 d_hash: called when the VFS adds a dentry to the hash table. The first 950 d_hash: called when the VFS adds a dentry to the hash table. The first
931 dentry passed to d_hash is the parent directory that the name is 951 dentry passed to d_hash is the parent directory that the name is
932 to be hashed into. The inode is the dentry's inode. 952 to be hashed into. The inode is the dentry's inode.
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 64600b5d0522..9ad68628522c 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -137,6 +137,7 @@ out_valid:
137 137
138const struct dentry_operations v9fs_cached_dentry_operations = { 138const struct dentry_operations v9fs_cached_dentry_operations = {
139 .d_revalidate = v9fs_lookup_revalidate, 139 .d_revalidate = v9fs_lookup_revalidate,
140 .d_weak_revalidate = v9fs_lookup_revalidate,
140 .d_delete = v9fs_cached_dentry_delete, 141 .d_delete = v9fs_cached_dentry_delete,
141 .d_release = v9fs_dentry_release, 142 .d_release = v9fs_dentry_release,
142}; 143};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 137d50396898..91dad63e5a2d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -363,5 +363,5 @@ struct file_system_type v9fs_fs_type = {
363 .mount = v9fs_mount, 363 .mount = v9fs_mount,
364 .kill_sb = v9fs_kill_super, 364 .kill_sb = v9fs_kill_super,
365 .owner = THIS_MODULE, 365 .owner = THIS_MODULE,
366 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT, 366 .fs_flags = FS_RENAME_DOES_D_MOVE,
367}; 367};
diff --git a/fs/dcache.c b/fs/dcache.c
index ebab049826c0..68220dd0c135 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1358,6 +1358,7 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1358 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1358 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
1359 DCACHE_OP_COMPARE | 1359 DCACHE_OP_COMPARE |
1360 DCACHE_OP_REVALIDATE | 1360 DCACHE_OP_REVALIDATE |
1361 DCACHE_OP_WEAK_REVALIDATE |
1361 DCACHE_OP_DELETE )); 1362 DCACHE_OP_DELETE ));
1362 dentry->d_op = op; 1363 dentry->d_op = op;
1363 if (!op) 1364 if (!op)
@@ -1368,6 +1369,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1368 dentry->d_flags |= DCACHE_OP_COMPARE; 1369 dentry->d_flags |= DCACHE_OP_COMPARE;
1369 if (op->d_revalidate) 1370 if (op->d_revalidate)
1370 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1371 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1372 if (op->d_weak_revalidate)
1373 dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1371 if (op->d_delete) 1374 if (op->d_delete)
1372 dentry->d_flags |= DCACHE_OP_DELETE; 1375 dentry->d_flags |= DCACHE_OP_DELETE;
1373 if (op->d_prune) 1376 if (op->d_prune)
diff --git a/fs/namei.c b/fs/namei.c
index 052c095c2808..dc984fee5532 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -600,14 +600,10 @@ static int complete_walk(struct nameidata *nd)
600 if (likely(!(nd->flags & LOOKUP_JUMPED))) 600 if (likely(!(nd->flags & LOOKUP_JUMPED)))
601 return 0; 601 return 0;
602 602
603 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 603 if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
604 return 0; 604 return 0;
605 605
606 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) 606 status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
607 return 0;
608
609 /* Note: we do not d_invalidate() */
610 status = d_revalidate(dentry, nd->flags);
611 if (status > 0) 607 if (status > 0)
612 return 0; 608 return 0;
613 609
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a8bd28cde7e2..f23f455be42b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1136,6 +1136,45 @@ out_error:
1136} 1136}
1137 1137
1138/* 1138/*
1139 * A weaker form of d_revalidate for revalidating just the dentry->d_inode
1140 * when we don't really care about the dentry name. This is called when a
1141 * pathwalk ends on a dentry that was not found via a normal lookup in the
1142 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1143 *
1144 * In this situation, we just want to verify that the inode itself is OK
1145 * since the dentry might have changed on the server.
1146 */
1147static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1148{
1149 int error;
1150 struct inode *inode = dentry->d_inode;
1151
1152 /*
1153 * I believe we can only get a negative dentry here in the case of a
1154 * procfs-style symlink. Just assume it's correct for now, but we may
1155 * eventually need to do something more here.
1156 */
1157 if (!inode) {
1158 dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
1159 __func__, dentry->d_parent->d_name.name,
1160 dentry->d_name.name);
1161 return 1;
1162 }
1163
1164 if (is_bad_inode(inode)) {
1165 dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
1166 __func__, dentry->d_parent->d_name.name,
1167 dentry->d_name.name);
1168 return 0;
1169 }
1170
1171 error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
1172 dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1173 __func__, inode->i_ino, error ? "invalid" : "valid");
1174 return !error;
1175}
1176
1177/*
1139 * This is called from dput() when d_count is going to 0. 1178 * This is called from dput() when d_count is going to 0.
1140 */ 1179 */
1141static int nfs_dentry_delete(const struct dentry *dentry) 1180static int nfs_dentry_delete(const struct dentry *dentry)
@@ -1202,6 +1241,7 @@ static void nfs_d_release(struct dentry *dentry)
1202 1241
1203const struct dentry_operations nfs_dentry_operations = { 1242const struct dentry_operations nfs_dentry_operations = {
1204 .d_revalidate = nfs_lookup_revalidate, 1243 .d_revalidate = nfs_lookup_revalidate,
1244 .d_weak_revalidate = nfs_weak_revalidate,
1205 .d_delete = nfs_dentry_delete, 1245 .d_delete = nfs_dentry_delete,
1206 .d_iput = nfs_dentry_iput, 1246 .d_iput = nfs_dentry_iput,
1207 .d_automount = nfs_d_automount, 1247 .d_automount = nfs_d_automount,
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 84d2e9e2f313..569b166cc050 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -28,7 +28,7 @@ static struct file_system_type nfs4_remote_fs_type = {
28 .name = "nfs4", 28 .name = "nfs4",
29 .mount = nfs4_remote_mount, 29 .mount = nfs4_remote_mount,
30 .kill_sb = nfs_kill_super, 30 .kill_sb = nfs_kill_super,
31 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 31 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
32}; 32};
33 33
34static struct file_system_type nfs4_remote_referral_fs_type = { 34static struct file_system_type nfs4_remote_referral_fs_type = {
@@ -36,7 +36,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
36 .name = "nfs4", 36 .name = "nfs4",
37 .mount = nfs4_remote_referral_mount, 37 .mount = nfs4_remote_referral_mount,
38 .kill_sb = nfs_kill_super, 38 .kill_sb = nfs_kill_super,
39 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 39 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
40}; 40};
41 41
42struct file_system_type nfs4_referral_fs_type = { 42struct file_system_type nfs4_referral_fs_type = {
@@ -44,7 +44,7 @@ struct file_system_type nfs4_referral_fs_type = {
44 .name = "nfs4", 44 .name = "nfs4",
45 .mount = nfs4_referral_mount, 45 .mount = nfs4_referral_mount,
46 .kill_sb = nfs_kill_super, 46 .kill_sb = nfs_kill_super,
47 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 47 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
48}; 48};
49 49
50static const struct super_operations nfs4_sops = { 50static const struct super_operations nfs4_sops = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5d..92acc26f9c5f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -292,7 +292,7 @@ struct file_system_type nfs_fs_type = {
292 .name = "nfs", 292 .name = "nfs",
293 .mount = nfs_fs_mount, 293 .mount = nfs_fs_mount,
294 .kill_sb = nfs_kill_super, 294 .kill_sb = nfs_kill_super,
295 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 295 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
296}; 296};
297EXPORT_SYMBOL_GPL(nfs_fs_type); 297EXPORT_SYMBOL_GPL(nfs_fs_type);
298 298
@@ -301,7 +301,7 @@ struct file_system_type nfs_xdev_fs_type = {
301 .name = "nfs", 301 .name = "nfs",
302 .mount = nfs_xdev_mount, 302 .mount = nfs_xdev_mount,
303 .kill_sb = nfs_kill_super, 303 .kill_sb = nfs_kill_super,
304 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 304 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
305}; 305};
306 306
307const struct super_operations nfs_sops = { 307const struct super_operations nfs_sops = {
@@ -331,7 +331,7 @@ struct file_system_type nfs4_fs_type = {
331 .name = "nfs4", 331 .name = "nfs4",
332 .mount = nfs_fs_mount, 332 .mount = nfs_fs_mount,
333 .kill_sb = nfs_kill_super, 333 .kill_sb = nfs_kill_super,
334 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 334 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
335}; 335};
336EXPORT_SYMBOL_GPL(nfs4_fs_type); 336EXPORT_SYMBOL_GPL(nfs4_fs_type);
337 337
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 03d169288423..1a6bb81f0fe5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -145,6 +145,7 @@ enum dentry_d_lock_class
145 145
146struct dentry_operations { 146struct dentry_operations {
147 int (*d_revalidate)(struct dentry *, unsigned int); 147 int (*d_revalidate)(struct dentry *, unsigned int);
148 int (*d_weak_revalidate)(struct dentry *, unsigned int);
148 int (*d_hash)(const struct dentry *, const struct inode *, 149 int (*d_hash)(const struct dentry *, const struct inode *,
149 struct qstr *); 150 struct qstr *);
150 int (*d_compare)(const struct dentry *, const struct inode *, 151 int (*d_compare)(const struct dentry *, const struct inode *,
@@ -192,6 +193,8 @@ struct dentry_operations {
192#define DCACHE_GENOCIDE 0x0200 193#define DCACHE_GENOCIDE 0x0200
193#define DCACHE_SHRINK_LIST 0x0400 194#define DCACHE_SHRINK_LIST 0x0400
194 195
196#define DCACHE_OP_WEAK_REVALIDATE 0x0800
197
195#define DCACHE_NFSFS_RENAMED 0x1000 198#define DCACHE_NFSFS_RENAMED 0x1000
196 /* this dentry has been "silly renamed" and has to be deleted on the last 199 /* this dentry has been "silly renamed" and has to be deleted on the last
197 * dput() */ 200 * dput() */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7f471520b88b..da94011ae83c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1807,7 +1807,6 @@ struct file_system_type {
1807#define FS_HAS_SUBTYPE 4 1807#define FS_HAS_SUBTYPE 4
1808#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ 1808#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
1809#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ 1809#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
1810#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
1811#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ 1810#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
1812 struct dentry *(*mount) (struct file_system_type *, int, 1811 struct dentry *(*mount) (struct file_system_type *, int,
1813 const char *, void *); 1812 const char *, void *);