aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 11:56:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 11:56:33 -0500
commitb4a45f5fe8078bfc10837dbd5b98735058bc4698 (patch)
treedf6f13a27610a3ec7eb4a661448cd779a8f84c79
parent01539ba2a706ab7d35fc0667dff919ade7f87d63 (diff)
parentb3e19d924b6eaf2ca7d22cba99a517c5171007b6 (diff)
Merge branch 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin
* 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin: (57 commits) fs: scale mntget/mntput fs: rename vfsmount counter helpers fs: implement faster dentry memcmp fs: prefetch inode data in dcache lookup fs: improve scalability of pseudo filesystems fs: dcache per-inode inode alias locking fs: dcache per-bucket dcache hash locking bit_spinlock: add required includes kernel: add bl_list xfs: provide simple rcu-walk ACL implementation btrfs: provide simple rcu-walk ACL implementation ext2,3,4: provide simple rcu-walk ACL implementation fs: provide simple rcu-walk generic_check_acl implementation fs: provide rcu-walk aware permission i_ops fs: rcu-walk aware d_revalidate method fs: cache optimise dentry and inode for rcu-walk fs: dcache reduce branches in lookup path fs: dcache remove d_mounted fs: fs_struct use seqlock fs: rcu-walk for path lookup ...
-rw-r--r--Documentation/filesystems/Locking29
-rw-r--r--Documentation/filesystems/dentry-locking.txt174
-rw-r--r--Documentation/filesystems/path-lookup.txt382
-rw-r--r--Documentation/filesystems/porting69
-rw-r--r--Documentation/filesystems/vfs.txt74
-rw-r--r--arch/ia64/kernel/perfmon.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c5
-rw-r--r--drivers/mtd/mtdchar.c2
-rw-r--r--drivers/staging/autofs/root.c7
-rw-r--r--drivers/staging/pohmelfs/inode.c9
-rw-r--r--drivers/staging/pohmelfs/path_entry.c17
-rw-r--r--drivers/staging/smbfs/cache.c16
-rw-r--r--drivers/staging/smbfs/dir.c50
-rw-r--r--drivers/staging/smbfs/file.c5
-rw-r--r--drivers/staging/smbfs/inode.c9
-rw-r--r--drivers/usb/core/inode.c12
-rw-r--r--fs/9p/acl.c5
-rw-r--r--fs/9p/acl.h2
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode.c39
-rw-r--r--fs/adfs/dir.c13
-rw-r--r--fs/adfs/super.c11
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/namei.c68
-rw-r--r--fs/affs/super.c11
-rw-r--r--fs/afs/dir.c10
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/security.c7
-rw-r--r--fs/afs/super.c10
-rw-r--r--fs/anon_inodes.c6
-rw-r--r--fs/autofs4/autofs_i.h21
-rw-r--r--fs/autofs4/expire.c141
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c91
-rw-r--r--fs/autofs4/waitq.c23
-rw-r--r--fs/bad_inode.c5
-rw-r--r--fs/befs/linuxvfs.c10
-rw-r--r--fs/bfs/inode.c9
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/acl.c21
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/inode.c17
-rw-r--r--fs/ceph/dir.c28
-rw-r--r--fs/ceph/inode.c38
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/dir.c77
-rw-r--r--fs/cifs/inode.c14
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c20
-rw-r--r--fs/coda/inode.c9
-rw-r--r--fs/coda/pioctl.c6
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c24
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/dcache.c1375
-rw-r--r--fs/ecryptfs/dentry.c9
-rw-r--r--fs/ecryptfs/inode.c12
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/ecryptfs/super.c12
-rw-r--r--fs/efs/super.c9
-rw-r--r--fs/exofs/super.c9
-rw-r--r--fs/exportfs/expfs.c14
-rw-r--r--fs/ext2/acl.c11
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/acl.c11
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/super.c9
-rw-r--r--fs/ext4/acl.c11
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fat/namei_msdos.c23
-rw-r--r--fs/fat/namei_vfat.c55
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_inode.c9
-rw-r--r--fs/fs_struct.c36
-rw-r--r--fs/fuse/dir.c18
-rw-r--r--fs/fuse/inode.c13
-rw-r--r--fs/generic_acl.c20
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/dentry.c22
-rw-r--r--fs/gfs2/export.c4
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/ops_inode.c20
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/string.c17
-rw-r--r--fs/hfs/super.c11
-rw-r--r--fs/hfs/sysdep.c7
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h8
-rw-r--r--fs/hfsplus/super.c12
-rw-r--r--fs/hfsplus/unicode.c18
-rw-r--r--fs/hostfs/hostfs_kern.c44
-rw-r--r--fs/hpfs/dentry.c27
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/inode.c50
-rw-r--r--fs/internal.h1
-rw-r--r--fs/isofs/inode.c131
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/jffs2/acl.c5
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c63
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c63
-rw-r--r--fs/locks.c2
-rw-r--r--fs/logfs/dir.c6
-rw-r--r--fs/logfs/inode.c9
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c857
-rw-r--r--fs/namespace.c291
-rw-r--r--fs/ncpfs/dir.c88
-rw-r--r--fs/ncpfs/inode.c12
-rw-r--r--fs/ncpfs/ncplib_kernel.h16
-rw-r--r--fs/nfs/dir.c32
-rw-r--r--fs/nfs/getroot.c10
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/inode.c10
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/super.c12
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ocfs2/acl.c8
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/dcache.c20
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c9
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/file.c7
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/namei.c10
-rw-r--r--fs/ocfs2/super.c9
-rw-r--r--fs/openpromfs/inode.c9
-rw-r--r--fs/pipe.c12
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/proc/base.c53
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c9
-rw-r--r--fs/proc/proc_sysctl.c31
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c18
-rw-r--r--fs/romfs/super.c9
-rw-r--r--fs/squashfs/super.c9
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/inode.c9
-rw-r--r--fs/sysv/namei.c5
-rw-r--r--fs/sysv/super.c2
-rw-r--r--fs/ubifs/super.c10
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/super.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_iget.c13
-rw-r--r--include/linux/bit_spinlock.h4
-rw-r--r--include/linux/coda_linux.h2
-rw-r--r--include/linux/dcache.h243
-rw-r--r--include/linux/fs.h63
-rw-r--r--include/linux/fs_struct.h3
-rw-r--r--include/linux/fsnotify.h2
-rw-r--r--include/linux/fsnotify_backend.h11
-rw-r--r--include/linux/generic_acl.h2
-rw-r--r--include/linux/list_bl.h144
-rw-r--r--include/linux/mount.h53
-rw-r--r--include/linux/namei.h16
-rw-r--r--include/linux/ncp_fs.h4
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/path.h2
-rw-r--r--include/linux/posix_acl.h19
-rw-r--r--include/linux/rculist_bl.h127
-rw-r--r--include/linux/reiserfs_xattr.h2
-rw-r--r--include/linux/security.h8
-rw-r--r--include/linux/seqlock.h80
-rw-r--r--include/linux/slab.h2
-rw-r--r--ipc/mqueue.c9
-rw-r--r--kernel/cgroup.c54
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/shmem.c9
-rw-r--r--mm/slab.c32
-rw-r--r--mm/slob.c5
-rw-r--r--mm/slub.c40
-rw-r--r--mm/util.c21
-rw-r--r--net/socket.c24
-rw-r--r--net/sunrpc/rpc_pipe.c14
-rw-r--r--security/security.c9
-rw-r--r--security/selinux/selinuxfs.c16
-rw-r--r--security/tomoyo/realpath.c1
212 files changed, 4774 insertions, 2014 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 33fa3e5d38fd..977d8919cc69 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -9,22 +9,25 @@ be able to use diff(1).
9 9
10--------------------------- dentry_operations -------------------------- 10--------------------------- dentry_operations --------------------------
11prototypes: 11prototypes:
12 int (*d_revalidate)(struct dentry *, int); 12 int (*d_revalidate)(struct dentry *, struct nameidata *);
13 int (*d_hash) (struct dentry *, struct qstr *); 13 int (*d_hash)(const struct dentry *, const struct inode *,
14 int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); 14 struct qstr *);
15 int (*d_compare)(const struct dentry *, const struct inode *,
16 const struct dentry *, const struct inode *,
17 unsigned int, const char *, const struct qstr *);
15 int (*d_delete)(struct dentry *); 18 int (*d_delete)(struct dentry *);
16 void (*d_release)(struct dentry *); 19 void (*d_release)(struct dentry *);
17 void (*d_iput)(struct dentry *, struct inode *); 20 void (*d_iput)(struct dentry *, struct inode *);
18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 21 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
19 22
20locking rules: 23locking rules:
21 dcache_lock rename_lock ->d_lock may block 24 rename_lock ->d_lock may block rcu-walk
22d_revalidate: no no no yes 25d_revalidate: no no yes (ref-walk) maybe
23d_hash no no no yes 26d_hash no no no maybe
24d_compare: no yes no no 27d_compare: yes no no maybe
25d_delete: yes no yes no 28d_delete: no yes no no
26d_release: no no no yes 29d_release: no no yes no
27d_iput: no no no yes 30d_iput: no no yes no
28d_dname: no no no no 31d_dname: no no no no
29 32
30--------------------------- inode_operations --------------------------- 33--------------------------- inode_operations ---------------------------
@@ -44,8 +47,8 @@ ata *);
44 void * (*follow_link) (struct dentry *, struct nameidata *); 47 void * (*follow_link) (struct dentry *, struct nameidata *);
45 void (*put_link) (struct dentry *, struct nameidata *, void *); 48 void (*put_link) (struct dentry *, struct nameidata *, void *);
46 void (*truncate) (struct inode *); 49 void (*truncate) (struct inode *);
47 int (*permission) (struct inode *, int, struct nameidata *); 50 int (*permission) (struct inode *, int, unsigned int);
48 int (*check_acl)(struct inode *, int); 51 int (*check_acl)(struct inode *, int, unsigned int);
49 int (*setattr) (struct dentry *, struct iattr *); 52 int (*setattr) (struct dentry *, struct iattr *);
50 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); 53 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
51 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 54 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -73,7 +76,7 @@ follow_link: no
73put_link: no 76put_link: no
74truncate: yes (see below) 77truncate: yes (see below)
75setattr: yes 78setattr: yes
76permission: no 79permission: no (may not block if called in rcu-walk mode)
77check_acl: no 80check_acl: no
78getattr: no 81getattr: no
79setxattr: yes 82setxattr: yes
diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt
deleted file mode 100644
index 79334ed5daa7..000000000000
--- a/Documentation/filesystems/dentry-locking.txt
+++ /dev/null
@@ -1,174 +0,0 @@
1RCU-based dcache locking model
2==============================
3
4On many workloads, the most common operation on dcache is to look up a
5dentry, given a parent dentry and the name of the child. Typically,
6for every open(), stat() etc., the dentry corresponding to the
7pathname will be looked up by walking the tree starting with the first
8component of the pathname and using that dentry along with the next
9component to look up the next level and so on. Since it is a frequent
10operation for workloads like multiuser environments and web servers,
11it is important to optimize this path.
12
13Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in
14every component during path look-up. Since 2.5.10 onwards, fast-walk
15algorithm changed this by holding the dcache_lock at the beginning and
16walking as many cached path component dentries as possible. This
17significantly decreases the number of acquisition of
18dcache_lock. However it also increases the lock hold time
19significantly and affects performance in large SMP machines. Since
202.5.62 kernel, dcache has been using a new locking model that uses RCU
21to make dcache look-up lock-free.
22
23The current dcache locking model is not very different from the
24existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock
25protected the hash chain, d_child, d_alias, d_lru lists as well as
26d_inode and several other things like mount look-up. RCU-based changes
27affect only the way the hash chain is protected. For everything else
28the dcache_lock must be taken for both traversing as well as
29updating. The hash chain updates too take the dcache_lock. The
30significant change is the way d_lookup traverses the hash chain, it
31doesn't acquire the dcache_lock for this and rely on RCU to ensure
32that the dentry has not been *freed*.
33
34
35Dcache locking details
36======================
37
38For many multi-user workloads, open() and stat() on files are very
39frequently occurring operations. Both involve walking of path names to
40find the dentry corresponding to the concerned file. In 2.4 kernel,
41dcache_lock was held during look-up of each path component. Contention
42and cache-line bouncing of this global lock caused significant
43scalability problems. With the introduction of RCU in Linux kernel,
44this was worked around by making the look-up of path components during
45path walking lock-free.
46
47
48Safe lock-free look-up of dcache hash table
49===========================================
50
51Dcache is a complex data structure with the hash table entries also
52linked together in other lists. In 2.4 kernel, dcache_lock protected
53all the lists. We applied RCU only on hash chain walking. The rest of
54the lists are still protected by dcache_lock. Some of the important
55changes are :
56
571. The deletion from hash chain is done using hlist_del_rcu() macro
58 which doesn't initialize next pointer of the deleted dentry and
59 this allows us to walk safely lock-free while a deletion is
60 happening.
61
622. Insertion of a dentry into the hash table is done using
63 hlist_add_head_rcu() which take care of ordering the writes - the
64 writes to the dentry must be visible before the dentry is
65 inserted. This works in conjunction with hlist_for_each_rcu(),
66 which has since been replaced by hlist_for_each_entry_rcu(), while
67 walking the hash chain. The only requirement is that all
68 initialization to the dentry must be done before
69 hlist_add_head_rcu() since we don't have dcache_lock protection
70 while traversing the hash chain. This isn't different from the
71 existing code.
72
733. The dentry looked up without holding dcache_lock by cannot be
74 returned for walking if it is unhashed. It then may have a NULL
75 d_inode or other bogosity since RCU doesn't protect the other
76 fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
77 indicate unhashed dentries and use this in conjunction with a
78 per-dentry lock (d_lock). Once looked up without the dcache_lock,
79 we acquire the per-dentry lock (d_lock) and check if the dentry is
80 unhashed. If so, the look-up is failed. If not, the reference count
81 of the dentry is increased and the dentry is returned.
82
834. Once a dentry is looked up, it must be ensured during the path walk
84 for that component it doesn't go away. In pre-2.5.10 code, this was
85 done holding a reference to the dentry. dcache_rcu does the same.
86 In some sense, dcache_rcu path walking looks like the pre-2.5.10
87 version.
88
895. All dentry hash chain updates must take the dcache_lock as well as
90 the per-dentry lock in that order. dput() does this to ensure that
91 a dentry that has just been looked up in another CPU doesn't get
92 deleted before dget() can be done on it.
93
946. There are several ways to do reference counting of RCU protected
95 objects. One such example is in ipv4 route cache where deferred
96 freeing (using call_rcu()) is done as soon as the reference count
97 goes to zero. This cannot be done in the case of dentries because
98 tearing down of dentries require blocking (dentry_iput()) which
99 isn't supported from RCU callbacks. Instead, tearing down of
100 dentries happen synchronously in dput(), but actual freeing happens
101 later when RCU grace period is over. This allows safe lock-free
102 walking of the hash chains, but a matched dentry may have been
103 partially torn down. The checking of DCACHE_UNHASHED flag with
104 d_lock held detects such dentries and prevents them from being
105 returned from look-up.
106
107
108Maintaining POSIX rename semantics
109==================================
110
111Since look-up of dentries is lock-free, it can race against a
112concurrent rename operation. For example, during rename of file A to
113B, look-up of either A or B must succeed. So, if look-up of B happens
114after A has been removed from the hash chain but not added to the new
115hash chain, it may fail. Also, a comparison while the name is being
116written concurrently by a rename may result in false positive matches
117violating rename semantics. Issues related to race with rename are
118handled as described below :
119
1201. Look-up can be done in two ways - d_lookup() which is safe from
121 simultaneous renames and __d_lookup() which is not. If
122 __d_lookup() fails, it must be followed up by a d_lookup() to
123 correctly determine whether a dentry is in the hash table or
124 not. d_lookup() protects look-ups using a sequence lock
125 (rename_lock).
126
1272. The name associated with a dentry (d_name) may be changed if a
128 rename is allowed to happen simultaneously. To avoid memcmp() in
129 __d_lookup() go out of bounds due to a rename and false positive
130 comparison, the name comparison is done while holding the
131 per-dentry lock. This prevents concurrent renames during this
132 operation.
133
1343. Hash table walking during look-up may move to a different bucket as
135 the current dentry is moved to a different bucket due to rename.
136 But we use hlists in dcache hash table and they are
137 null-terminated. So, even if a dentry moves to a different bucket,
138 hash chain walk will terminate. [with a list_head list, it may not
139 since termination is when the list_head in the original bucket is
140 reached]. Since we redo the d_parent check and compare name while
141 holding d_lock, lock-free look-up will not race against d_move().
142
1434. There can be a theoretical race when a dentry keeps coming back to
144 original bucket due to double moves. Due to this look-up may
145 consider that it has never moved and can end up in a infinite loop.
146 But this is not any worse that theoretical livelocks we already
147 have in the kernel.
148
149
150Important guidelines for filesystem developers related to dcache_rcu
151====================================================================
152
1531. Existing dcache interfaces (pre-2.5.62) exported to filesystem
154 don't change. Only dcache internal implementation changes. However
155 filesystems *must not* delete from the dentry hash chains directly
156 using the list macros like allowed earlier. They must use dcache
157 APIs like d_drop() or __d_drop() depending on the situation.
158
1592. d_flags is now protected by a per-dentry lock (d_lock). All access
160 to d_flags must be protected by it.
161
1623. For a hashed dentry, checking of d_count needs to be protected by
163 d_lock.
164
165
166Papers and other documentation on dcache locking
167================================================
168
1691. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
170
1712. http://lse.sourceforge.net/locking/dcache/dcache.html
172
173
174
diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt
new file mode 100644
index 000000000000..eb59c8b44be9
--- /dev/null
+++ b/Documentation/filesystems/path-lookup.txt
@@ -0,0 +1,382 @@
1Path walking and name lookup locking
2====================================
3
4Path resolution is the finding a dentry corresponding to a path name string, by
5performing a path walk. Typically, for every open(), stat() etc., the path name
6will be resolved. Paths are resolved by walking the namespace tree, starting
7with the first component of the pathname (eg. root or cwd) with a known dentry,
8then finding the child of that dentry, which is named the next component in the
9path string. Then repeating the lookup from the child dentry and finding its
10child with the next element, and so on.
11
12Since it is a frequent operation for workloads like multiuser environments and
13web servers, it is important to optimize this code.
14
15Path walking synchronisation history:
16Prior to 2.5.10, dcache_lock was acquired in d_lookup (dcache hash lookup) and
17thus in every component during path look-up. Since 2.5.10 onwards, fast-walk
18algorithm changed this by holding the dcache_lock at the beginning and walking
19as many cached path component dentries as possible. This significantly
20decreases the number of acquisition of dcache_lock. However it also increases
21the lock hold time significantly and affects performance in large SMP machines.
22Since 2.5.62 kernel, dcache has been using a new locking model that uses RCU to
23make dcache look-up lock-free.
24
25All the above algorithms required taking a lock and reference count on the
26dentry that was looked up, so that may be used as the basis for walking the
27next path element. This is inefficient and unscalable. It is inefficient
28because of the locks and atomic operations required for every dentry element
29slows things down. It is not scalable because many parallel applications that
30are path-walk intensive tend to do path lookups starting from a common dentry
31(usually, the root "/" or current working directory). So contention on these
32common path elements causes lock and cacheline queueing.
33
34Since 2.6.38, RCU is used to make a significant part of the entire path walk
35(including dcache look-up) completely "store-free" (so, no locks, atomics, or
36even stores into cachelines of common dentries). This is known as "rcu-walk"
37path walking.
38
39Path walking overview
40=====================
41
42A name string specifies a start (root directory, cwd, fd-relative) and a
43sequence of elements (directory entry names), which together refer to a path in
44the namespace. A path is represented as a (dentry, vfsmount) tuple. The name
45elements are sub-strings, seperated by '/'.
46
47Name lookups will want to find a particular path that a name string refers to
48(usually the final element, or parent of final element). This is done by taking
49the path given by the name's starting point (which we know in advance -- eg.
50current->fs->cwd or current->fs->root) as the first parent of the lookup. Then
51iteratively for each subsequent name element, look up the child of the current
52parent with the given name and if it is not the desired entry, make it the
53parent for the next lookup.
54
55A parent, of course, must be a directory, and we must have appropriate
56permissions on the parent inode to be able to walk into it.
57
58Turning the child into a parent for the next lookup requires more checks and
59procedures. Symlinks essentially substitute the symlink name for the target
60name in the name string, and require some recursive path walking. Mount points
61must be followed into (thus changing the vfsmount that subsequent path elements
62refer to), switching from the mount point path to the root of the particular
63mounted vfsmount. These behaviours are variously modified depending on the
64exact path walking flags.
65
66Path walking then must, broadly, do several particular things:
67- find the start point of the walk;
68- perform permissions and validity checks on inodes;
69- perform dcache hash name lookups on (parent, name element) tuples;
70- traverse mount points;
71- traverse symlinks;
72- lookup and create missing parts of the path on demand.
73
74Safe store-free look-up of dcache hash table
75============================================
76
77Dcache name lookup
78------------------
79In order to lookup a dcache (parent, name) tuple, we take a hash on the tuple
80and use that to select a bucket in the dcache-hash table. The list of entries
81in that bucket is then walked, and we do a full comparison of each entry
82against our (parent, name) tuple.
83
84The hash lists are RCU protected, so list walking is not serialised with
85concurrent updates (insertion, deletion from the hash). This is a standard RCU
86list application with the exception of renames, which will be covered below.
87
88Parent and name members of a dentry, as well as its membership in the dcache
89hash, and its inode are protected by the per-dentry d_lock spinlock. A
90reference is taken on the dentry (while the fields are verified under d_lock),
91and this stabilises its d_inode pointer and actual inode. This gives a stable
92point to perform the next step of our path walk against.
93
94These members are also protected by d_seq seqlock, although this offers
95read-only protection and no durability of results, so care must be taken when
96using d_seq for synchronisation (see seqcount based lookups, below).
97
98Renames
99-------
100Back to the rename case. In usual RCU protected lists, the only operations that
101will happen to an object is insertion, and then eventually removal from the
102list. The object will not be reused until an RCU grace period is complete.
103This ensures the RCU list traversal primitives can run over the object without
104problems (see RCU documentation for how this works).
105
106However when a dentry is renamed, its hash value can change, requiring it to be
107moved to a new hash list. Allocating and inserting a new alias would be
108expensive and also problematic for directory dentries. Latency would be far to
109high to wait for a grace period after removing the dentry and before inserting
110it in the new hash bucket. So what is done is to insert the dentry into the
111new list immediately.
112
113However, when the dentry's list pointers are updated to point to objects in the
114new list before waiting for a grace period, this can result in a concurrent RCU
115lookup of the old list veering off into the new (incorrect) list and missing
116the remaining dentries on the list.
117
118There is no fundamental problem with walking down the wrong list, because the
119dentry comparisons will never match. However it is fatal to miss a matching
120dentry. So a seqlock is used to detect when a rename has occurred, and so the
121lookup can be retried.
122
123 1 2 3
124 +---+ +---+ +---+
125hlist-->| N-+->| N-+->| N-+->
126head <--+-P |<-+-P |<-+-P |
127 +---+ +---+ +---+
128
129Rename of dentry 2 may require it deleted from the above list, and inserted
130into a new list. Deleting 2 gives the following list.
131
132 1 3
133 +---+ +---+ (don't worry, the longer pointers do not
134hlist-->| N-+-------->| N-+-> impose a measurable performance overhead
135head <--+-P |<--------+-P | on modern CPUs)
136 +---+ +---+
137 ^ 2 ^
138 | +---+ |
139 | | N-+----+
140 +----+-P |
141 +---+
142
143This is a standard RCU-list deletion, which leaves the deleted object's
144pointers intact, so a concurrent list walker that is currently looking at
145object 2 will correctly continue to object 3 when it is time to traverse the
146next object.
147
148However, when inserting object 2 onto a new list, we end up with this:
149
150 1 3
151 +---+ +---+
152hlist-->| N-+-------->| N-+->
153head <--+-P |<--------+-P |
154 +---+ +---+
155 2
156 +---+
157 | N-+---->
158 <----+-P |
159 +---+
160
161Because we didn't wait for a grace period, there may be a concurrent lookup
162still at 2. Now when it follows 2's 'next' pointer, it will walk off into
163another list without ever having checked object 3.
164
165A related, but distinctly different, issue is that of rename atomicity versus
166lookup operations. If a file is renamed from 'A' to 'B', a lookup must only
167find either 'A' or 'B'. So if a lookup of 'A' returns NULL, a subsequent lookup
168of 'B' must succeed (note the reverse is not true).
169
170Between deleting the dentry from the old hash list, and inserting it on the new
171hash list, a lookup may find neither 'A' nor 'B' matching the dentry. The same
172rename seqlock is also used to cover this race in much the same way, by
173retrying a negative lookup result if a rename was in progress.
174
175Seqcount based lookups
176----------------------
177In refcount based dcache lookups, d_lock is used to serialise access to
178the dentry, stabilising it while comparing its name and parent and then
179taking a reference count (the reference count then gives a stable place to
180start the next part of the path walk from).
181
182As explained above, we would like to do path walking without taking locks or
183reference counts on intermediate dentries along the path. To do this, a per
184dentry seqlock (d_seq) is used to take a "coherent snapshot" of what the dentry
185looks like (its name, parent, and inode). That snapshot is then used to start
186the next part of the path walk. When loading the coherent snapshot under d_seq,
187care must be taken to load the members up-front, and use those pointers rather
188than reloading from the dentry later on (otherwise we'd have interesting things
189like d_inode going NULL underneath us, if the name was unlinked).
190
191Also important is to avoid performing any destructive operations (pretty much:
192no non-atomic stores to shared data), and to recheck the seqcount when we are
193"done" with the operation. Retry or abort if the seqcount does not match.
194Avoiding destructive or changing operations means we can easily unwind from
195failure.
196
197What this means is that a caller, provided they are holding RCU lock to
198protect the dentry object from disappearing, can perform a seqcount based
199lookup which does not increment the refcount on the dentry or write to
200it in any way. This returned dentry can be used for subsequent operations,
201provided that d_seq is rechecked after that operation is complete.
202
203Inodes are also rcu freed, so the seqcount lookup dentry's inode may also be
204queried for permissions.
205
206With this two parts of the puzzle, we can do path lookups without taking
207locks or refcounts on dentry elements.
208
209RCU-walk path walking design
210============================
211
212Path walking code now has two distinct modes, ref-walk and rcu-walk. ref-walk
213is the traditional[*] way of performing dcache lookups using d_lock to
214serialise concurrent modifications to the dentry and take a reference count on
215it. ref-walk is simple and obvious, and may sleep, take locks, etc while path
216walking is operating on each dentry. rcu-walk uses seqcount based dentry
217lookups, and can perform lookup of intermediate elements without any stores to
218shared data in the dentry or inode. rcu-walk can not be applied to all cases,
219eg. if the filesystem must sleep or perform non trivial operations, rcu-walk
220must be switched to ref-walk mode.
221
222[*] RCU is still used for the dentry hash lookup in ref-walk, but not the full
223 path walk.
224
225Where ref-walk uses a stable, refcounted ``parent'' to walk the remaining
226path string, rcu-walk uses a d_seq protected snapshot. When looking up a
227child of this parent snapshot, we open d_seq critical section on the child
228before closing d_seq critical section on the parent. This gives an interlocking
229ladder of snapshots to walk down.
230
231
232 proc 101
233 /----------------\
234 / comm: "vi" \
235 / fs.root: dentry0 \
236 \ fs.cwd: dentry2 /
237 \ /
238 \----------------/
239
240So when vi wants to open("/home/npiggin/test.c", O_RDWR), then it will
241start from current->fs->root, which is a pinned dentry. Alternatively,
242"./test.c" would start from cwd; both names refer to the same path in
243the context of proc101.
244
245 dentry 0
246 +---------------------+ rcu-walk begins here, we note d_seq, check the
247 | name: "/" | inode's permission, and then look up the next
248 | inode: 10 | path element which is "home"...
249 | children:"home", ...|
250 +---------------------+
251 |
252 dentry 1 V
253 +---------------------+ ... which brings us here. We find dentry1 via
254 | name: "home" | hash lookup, then note d_seq and compare name
255 | inode: 678 | string and parent pointer. When we have a match,
256 | children:"npiggin" | we now recheck the d_seq of dentry0. Then we
257 +---------------------+ check inode and look up the next element.
258 |
259 dentry2 V
260 +---------------------+ Note: if dentry0 is now modified, lookup is
261 | name: "npiggin" | not necessarily invalid, so we need only keep a
262 | inode: 543 | parent for d_seq verification, and grandparents
263 | children:"a.c", ... | can be forgotten.
264 +---------------------+
265 |
266 dentry3 V
267 +---------------------+ At this point we have our destination dentry.
268 | name: "a.c" | We now take its d_lock, verify d_seq of this
269 | inode: 14221 | dentry. If that checks out, we can increment
270 | children:NULL | its refcount because we're holding d_lock.
271 +---------------------+
272
273Taking a refcount on a dentry from rcu-walk mode, by taking its d_lock,
274re-checking its d_seq, and then incrementing its refcount is called
275"dropping rcu" or dropping from rcu-walk into ref-walk mode.
276
277It is, in some sense, a bit of a house of cards. If the seqcount check of the
278parent snapshot fails, the house comes down, because we had closed the d_seq
279section on the grandparent, so we have nothing left to stand on. In that case,
280the path walk must be fully restarted (which we do in ref-walk mode, to avoid
281live locks). It is costly to have a full restart, but fortunately they are
282quite rare.
283
284When we reach a point where sleeping is required, or a filesystem callout
285requires ref-walk, then instead of restarting the walk, we attempt to drop rcu
286at the last known good dentry we have. Avoiding a full restart in ref-walk in
287these cases is fundamental for performance and scalability because blocking
288operations such as creates and unlinks are not uncommon.
289
290The detailed design for rcu-walk is like this:
291* LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk.
292* Take the RCU lock for the entire path walk, starting with the acquiring
293 of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are
294 not required for dentry persistence.
295* synchronize_rcu is called when unregistering a filesystem, so we can
296 access d_ops and i_ops during rcu-walk.
297* Similarly take the vfsmount lock for the entire path walk. So now mnt
298 refcounts are not required for persistence. Also we are free to perform mount
299 lookups, and to assume dentry mount points and mount roots are stable up and
300 down the path.
301* Have a per-dentry seqlock to protect the dentry name, parent, and inode,
302 so we can load this tuple atomically, and also check whether any of its
303 members have changed.
304* Dentry lookups (based on parent, candidate string tuple) recheck the parent
305 sequence after the child is found in case anything changed in the parent
306 during the path walk.
307* inode is also RCU protected so we can load d_inode and use the inode for
308 limited things.
309* i_mode, i_uid, i_gid can be tested for exec permissions during path walk.
310* i_op can be loaded.
311* When the destination dentry is reached, drop rcu there (ie. take d_lock,
312 verify d_seq, increment refcount).
313* If seqlock verification fails anywhere along the path, do a full restart
314 of the path lookup in ref-walk mode. -ECHILD tends to be used (for want of
315 a better errno) to signal an rcu-walk failure.
316
317The cases where rcu-walk cannot continue are:
318* NULL dentry (ie. any uncached path element)
319* Following links
320
321It may be possible eventually to make following links rcu-walk aware.
322
323Uncached path elements will always require dropping to ref-walk mode, at the
324very least because i_mutex needs to be grabbed, and objects allocated.
325
326Final note:
327"store-free" path walking is not strictly store free. We take vfsmount lock
328and refcounts (both of which can be made per-cpu), and we also store to the
329stack (which is essentially CPU-local), and we also have to take locks and
330refcount on final dentry.
331
332The point is that shared data, where practically possible, is not locked
333or stored into. The result is massive improvements in performance and
334scalability of path resolution.
335
336
337Interesting statistics
338======================
339
340The following table gives rcu lookup statistics for a few simple workloads
341(2s12c24t Westmere, debian non-graphical system). Ungraceful are attempts to
342drop rcu that fail due to d_seq failure and requiring the entire path lookup
343again. Other cases are successful rcu-drops that are required before the final
344element, nodentry for missing dentry, revalidate for filesystem revalidate
345routine requiring rcu drop, permission for permission check requiring drop,
346and link for symlink traversal requiring drop.
347
348 rcu-lookups restart nodentry link revalidate permission
349bootup 47121 0 4624 1010 10283 7852
350dbench 25386793 0 6778659(26.7%) 55 549 1156
351kbuild 2696672 10 64442(2.3%) 108764(4.0%) 1 1590
352git diff 39605 0 28 2 0 106
353vfstest 24185492 4945 708725(2.9%) 1076136(4.4%) 0 2651
354
355What this shows is that failed rcu-walk lookups, ie. ones that are restarted
356entirely with ref-walk, are quite rare. Even the "vfstest" case which
357specifically has concurrent renames/mkdir/rmdir/ creat/unlink/etc to excercise
358such races is not showing a huge amount of restarts.
359
360Dropping from rcu-walk to ref-walk mean that we have encountered a dentry where
361the reference count needs to be taken for some reason. This is either because
362we have reached the target of the path walk, or because we have encountered a
363condition that can't be resolved in rcu-walk mode. Ideally, we drop rcu-walk
364only when we have reached the target dentry, so the other statistics show where
365this does not happen.
366
367Note that a graceful drop from rcu-walk mode due to something such as the
368dentry not existing (which can be common) is not necessarily a failure of
369rcu-walk scheme, because some elements of the path may have been walked in
370rcu-walk mode. The further we get from common path elements (such as cwd or
371root), the less contended the dentry is likely to be. The closer we are to
372common path elements, the more likely they will exist in dentry cache.
373
374
375Papers and other documentation on dcache locking
376================================================
377
3781. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
379
3802. http://lse.sourceforge.net/locking/dcache/dcache.html
381
382
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index b12c89538680..07a32b42cf9c 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -216,7 +216,6 @@ had ->revalidate()) add calls in ->follow_link()/->readlink().
216->d_parent changes are not protected by BKL anymore. Read access is safe 216->d_parent changes are not protected by BKL anymore. Read access is safe
217if at least one of the following is true: 217if at least one of the following is true:
218 * filesystem has no cross-directory rename() 218 * filesystem has no cross-directory rename()
219 * dcache_lock is held
220 * we know that parent had been locked (e.g. we are looking at 219 * we know that parent had been locked (e.g. we are looking at
221->d_parent of ->lookup() argument). 220->d_parent of ->lookup() argument).
222 * we are called from ->rename(). 221 * we are called from ->rename().
@@ -318,3 +317,71 @@ if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput(
318may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly 317may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
319free the on-disk inode, you may end up doing that while ->write_inode() is writing 318free the on-disk inode, you may end up doing that while ->write_inode() is writing
320to it. 319to it.
320
321---
322[mandatory]
323
324 .d_delete() now only advises the dcache as to whether or not to cache
325unreferenced dentries, and is now only called when the dentry refcount goes to
3260. Even on 0 refcount transition, it must be able to tolerate being called 0,
3271, or more times (eg. constant, idempotent).
328
329---
330[mandatory]
331
332 .d_compare() calling convention and locking rules are significantly
333changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
334look at examples of other filesystems) for guidance.
335
336---
337[mandatory]
338
339 .d_hash() calling convention and locking rules are significantly
340changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
341look at examples of other filesystems) for guidance.
342
343---
344[mandatory]
345 dcache_lock is gone, replaced by fine grained locks. See fs/dcache.c
346for details of what locks to replace dcache_lock with in order to protect
347particular things. Most of the time, a filesystem only needs ->d_lock, which
348protects *all* the dcache state of a given dentry.
349
350--
351[mandatory]
352
353 Filesystems must RCU-free their inodes, if they can have been accessed
354via rcu-walk path walk (basically, if the file can have had a path name in the
355vfs namespace).
356
357 i_dentry and i_rcu share storage in a union, and the vfs expects
358i_dentry to be reinitialized before it is freed, so an:
359
360 INIT_LIST_HEAD(&inode->i_dentry);
361
362must be done in the RCU callback.
363
364--
365[recommended]
366 vfs now tries to do path walking in "rcu-walk mode", which avoids
367atomic operations and scalability hazards on dentries and inodes (see
368Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above)
369are examples of the changes required to support this. For more complex
370filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
371no changes are required to the filesystem. However, this is costly and loses
372the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
373are rcu-walk aware, shown below. Filesystems should take advantage of this
374where possible.
375
376--
377[mandatory]
378 d_revalidate is a callback that is made on every path element (if
379the filesystem provides it), which requires dropping out of rcu-walk mode. This
380may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
381returned if the filesystem cannot handle rcu-walk. See
382Documentation/filesystems/vfs.txt for more details.
383
384 permission and check_acl are inode permission checks that are called
385on many or all directory inodes on the way down a path walk (to check for
386exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See
387Documentation/filesystems/vfs.txt for more details.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 20899e095e7e..fbb324e2bd43 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -325,7 +325,8 @@ struct inode_operations {
325 void * (*follow_link) (struct dentry *, struct nameidata *); 325 void * (*follow_link) (struct dentry *, struct nameidata *);
326 void (*put_link) (struct dentry *, struct nameidata *, void *); 326 void (*put_link) (struct dentry *, struct nameidata *, void *);
327 void (*truncate) (struct inode *); 327 void (*truncate) (struct inode *);
328 int (*permission) (struct inode *, int, struct nameidata *); 328 int (*permission) (struct inode *, int, unsigned int);
329 int (*check_acl)(struct inode *, int, unsigned int);
329 int (*setattr) (struct dentry *, struct iattr *); 330 int (*setattr) (struct dentry *, struct iattr *);
330 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 331 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
331 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 332 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -414,6 +415,13 @@ otherwise noted.
414 permission: called by the VFS to check for access rights on a POSIX-like 415 permission: called by the VFS to check for access rights on a POSIX-like
415 filesystem. 416 filesystem.
416 417
418 May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk
419 mode, the filesystem must check the permission without blocking or
420 storing to the inode.
421
422 If a situation is encountered that rcu-walk cannot handle, return
423 -ECHILD and it will be called again in ref-walk mode.
424
417 setattr: called by the VFS to set attributes for a file. This method 425 setattr: called by the VFS to set attributes for a file. This method
418 is called by chmod(2) and related system calls. 426 is called by chmod(2) and related system calls.
419 427
@@ -847,9 +855,12 @@ defined:
847 855
848struct dentry_operations { 856struct dentry_operations {
849 int (*d_revalidate)(struct dentry *, struct nameidata *); 857 int (*d_revalidate)(struct dentry *, struct nameidata *);
850 int (*d_hash) (struct dentry *, struct qstr *); 858 int (*d_hash)(const struct dentry *, const struct inode *,
851 int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); 859 struct qstr *);
852 int (*d_delete)(struct dentry *); 860 int (*d_compare)(const struct dentry *, const struct inode *,
861 const struct dentry *, const struct inode *,
862 unsigned int, const char *, const struct qstr *);
863 int (*d_delete)(const struct dentry *);
853 void (*d_release)(struct dentry *); 864 void (*d_release)(struct dentry *);
854 void (*d_iput)(struct dentry *, struct inode *); 865 void (*d_iput)(struct dentry *, struct inode *);
855 char *(*d_dname)(struct dentry *, char *, int); 866 char *(*d_dname)(struct dentry *, char *, int);
@@ -860,13 +871,45 @@ struct dentry_operations {
860 dcache. Most filesystems leave this as NULL, because all their 871 dcache. Most filesystems leave this as NULL, because all their
861 dentries in the dcache are valid 872 dentries in the dcache are valid
862 873
863 d_hash: called when the VFS adds a dentry to the hash table 874 d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU).
875 If in rcu-walk mode, the filesystem must revalidate the dentry without
876 blocking or storing to the dentry, d_parent and d_inode should not be
877 used without care (because they can go NULL), instead nd->inode should
878 be used.
879
880 If a situation is encountered that rcu-walk cannot handle, return
881 -ECHILD and it will be called again in ref-walk mode.
882
883 d_hash: called when the VFS adds a dentry to the hash table. The first
884 dentry passed to d_hash is the parent directory that the name is
885 to be hashed into. The inode is the dentry's inode.
886
887 Same locking and synchronisation rules as d_compare regarding
888 what is safe to dereference etc.
889
890 d_compare: called to compare a dentry name with a given name. The first
891 dentry is the parent of the dentry to be compared, the second is
892 the parent's inode, then the dentry and inode (may be NULL) of the
893 child dentry. len and name string are properties of the dentry to be
894 compared. qstr is the name to compare it with.
895
896 Must be constant and idempotent, and should not take locks if
897 possible, and should not or store into the dentry or inodes.
898 Should not dereference pointers outside the dentry or inodes without
899 lots of care (eg. d_parent, d_inode, d_name should not be used).
900
901 However, our vfsmount is pinned, and RCU held, so the dentries and
902 inodes won't disappear, neither will our sb or filesystem module.
903 ->i_sb and ->d_sb may be used.
864 904
865 d_compare: called when a dentry should be compared with another 905 It is a tricky calling convention because it needs to be called under
906 "rcu-walk", ie. without any locks or references on things.
866 907
867 d_delete: called when the last reference to a dentry is 908 d_delete: called when the last reference to a dentry is dropped and the
868 deleted. This means no-one is using the dentry, however it is 909 dcache is deciding whether or not to cache it. Return 1 to delete
869 still valid and in the dcache 910 immediately, or 0 to cache the dentry. Default is NULL which means to
911 always cache a reachable dentry. d_delete must be constant and
912 idempotent.
870 913
871 d_release: called when a dentry is really deallocated 914 d_release: called when a dentry is really deallocated
872 915
@@ -910,14 +953,11 @@ manipulate dentries:
910 the usage count) 953 the usage count)
911 954
912 dput: close a handle for a dentry (decrements the usage count). If 955 dput: close a handle for a dentry (decrements the usage count). If
913 the usage count drops to 0, the "d_delete" method is called 956 the usage count drops to 0, and the dentry is still in its
914 and the dentry is placed on the unused list if the dentry is 957 parent's hash, the "d_delete" method is called to check whether
915 still in its parents hash list. Putting the dentry on the 958 it should be cached. If it should not be cached, or if the dentry
916 unused list just means that if the system needs some RAM, it 959 is not hashed, it is deleted. Otherwise cached dentries are put
917 goes through the unused list of dentries and deallocates them. 960 into an LRU list to be reclaimed on memory shortage.
918 If the dentry has already been unhashed and the usage count
919 drops to 0, in this case the dentry is deallocated after the
920 "d_delete" method is called
921 961
922 d_drop: this unhashes a dentry from its parents hash list. A 962 d_drop: this unhashes a dentry from its parents hash list. A
923 subsequent call to dput() will deallocate the dentry if its 963 subsequent call to dput() will deallocate the dentry if its
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 39e534f5a3b0..f099b82703d8 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
1542 * any operations on the root directory. However, we need a non-trivial 1542 * any operations on the root directory. However, we need a non-trivial
1543 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1543 * d_name - pfm: will go nicely and kill the special-casing in procfs.
1544 */ 1544 */
1545static struct vfsmount *pfmfs_mnt; 1545static struct vfsmount *pfmfs_mnt __read_mostly;
1546 1546
1547static int __init 1547static int __init
1548init_pfm_fs(void) 1548init_pfm_fs(void)
@@ -2185,7 +2185,7 @@ static const struct file_operations pfm_file_ops = {
2185}; 2185};
2186 2186
2187static int 2187static int
2188pfmfs_delete_dentry(struct dentry *dentry) 2188pfmfs_delete_dentry(const struct dentry *dentry)
2189{ 2189{
2190 return 1; 2190 return 1;
2191} 2191}
@@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx)
2233 } 2233 }
2234 path.mnt = mntget(pfmfs_mnt); 2234 path.mnt = mntget(pfmfs_mnt);
2235 2235
2236 path.dentry->d_op = &pfmfs_dentry_operations; 2236 d_set_d_op(path.dentry, &pfmfs_dentry_operations);
2237 d_add(path.dentry, inode); 2237 d_add(path.dentry, inode);
2238 2238
2239 file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2239 file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 3532b92de983..856e9c398068 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -71,12 +71,18 @@ spufs_alloc_inode(struct super_block *sb)
71 return &ei->vfs_inode; 71 return &ei->vfs_inode;
72} 72}
73 73
74static void 74static void spufs_i_callback(struct rcu_head *head)
75spufs_destroy_inode(struct inode *inode)
76{ 75{
76 struct inode *inode = container_of(head, struct inode, i_rcu);
77 INIT_LIST_HEAD(&inode->i_dentry);
77 kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); 78 kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
78} 79}
79 80
81static void spufs_destroy_inode(struct inode *inode)
82{
83 call_rcu(&inode->i_rcu, spufs_i_callback);
84}
85
80static void 86static void
81spufs_init_once(void *p) 87spufs_init_once(void *p)
82{ 88{
@@ -159,18 +165,18 @@ static void spufs_prune_dir(struct dentry *dir)
159 165
160 mutex_lock(&dir->d_inode->i_mutex); 166 mutex_lock(&dir->d_inode->i_mutex);
161 list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { 167 list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
162 spin_lock(&dcache_lock);
163 spin_lock(&dentry->d_lock); 168 spin_lock(&dentry->d_lock);
164 if (!(d_unhashed(dentry)) && dentry->d_inode) { 169 if (!(d_unhashed(dentry)) && dentry->d_inode) {
165 dget_locked(dentry); 170 dget_dlock(dentry);
166 __d_drop(dentry); 171 __d_drop(dentry);
167 spin_unlock(&dentry->d_lock); 172 spin_unlock(&dentry->d_lock);
168 simple_unlink(dir->d_inode, dentry); 173 simple_unlink(dir->d_inode, dentry);
169 spin_unlock(&dcache_lock); 174 /* XXX: what was dcache_lock protecting here? Other
175 * filesystems (IB, configfs) release dcache_lock
176 * before unlink */
170 dput(dentry); 177 dput(dentry);
171 } else { 178 } else {
172 spin_unlock(&dentry->d_lock); 179 spin_unlock(&dentry->d_lock);
173 spin_unlock(&dcache_lock);
174 } 180 }
175 } 181 }
176 shrink_dcache_parent(dir); 182 shrink_dcache_parent(dir);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 8c8afc716b98..31ae1b108aea 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -277,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name)
277 goto bail; 277 goto bail;
278 } 278 }
279 279
280 spin_lock(&dcache_lock);
281 spin_lock(&tmp->d_lock); 280 spin_lock(&tmp->d_lock);
282 if (!(d_unhashed(tmp) && tmp->d_inode)) { 281 if (!(d_unhashed(tmp) && tmp->d_inode)) {
283 dget_locked(tmp); 282 dget_dlock(tmp);
284 __d_drop(tmp); 283 __d_drop(tmp);
285 spin_unlock(&tmp->d_lock); 284 spin_unlock(&tmp->d_lock);
286 spin_unlock(&dcache_lock);
287 simple_unlink(parent->d_inode, tmp); 285 simple_unlink(parent->d_inode, tmp);
288 } else { 286 } else
289 spin_unlock(&tmp->d_lock); 287 spin_unlock(&tmp->d_lock);
290 spin_unlock(&dcache_lock);
291 }
292 288
293 ret = 0; 289 ret = 0;
294bail: 290bail:
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f99bddc01716..df7fa251dcdc 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -453,17 +453,14 @@ static int remove_file(struct dentry *parent, char *name)
453 goto bail; 453 goto bail;
454 } 454 }
455 455
456 spin_lock(&dcache_lock);
457 spin_lock(&tmp->d_lock); 456 spin_lock(&tmp->d_lock);
458 if (!(d_unhashed(tmp) && tmp->d_inode)) { 457 if (!(d_unhashed(tmp) && tmp->d_inode)) {
459 dget_locked(tmp); 458 dget_dlock(tmp);
460 __d_drop(tmp); 459 __d_drop(tmp);
461 spin_unlock(&tmp->d_lock); 460 spin_unlock(&tmp->d_lock);
462 spin_unlock(&dcache_lock);
463 simple_unlink(parent->d_inode, tmp); 461 simple_unlink(parent->d_inode, tmp);
464 } else { 462 } else {
465 spin_unlock(&tmp->d_lock); 463 spin_unlock(&tmp->d_lock);
466 spin_unlock(&dcache_lock);
467 } 464 }
468 465
469 ret = 0; 466 ret = 0;
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4759d827e8c7..f511dd15fd31 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1201,7 +1201,7 @@ err_unregister_chdev:
1201static void __exit cleanup_mtdchar(void) 1201static void __exit cleanup_mtdchar(void)
1202{ 1202{
1203 unregister_mtd_user(&mtdchar_notifier); 1203 unregister_mtd_user(&mtdchar_notifier);
1204 mntput(mtd_inode_mnt); 1204 mntput_long(mtd_inode_mnt);
1205 unregister_filesystem(&mtd_inodefs_type); 1205 unregister_filesystem(&mtd_inodefs_type);
1206 __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); 1206 __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
1207} 1207}
diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c
index 0fdec4befd84..bf0e9755da67 100644
--- a/drivers/staging/autofs/root.c
+++ b/drivers/staging/autofs/root.c
@@ -154,13 +154,16 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str
154 * yet completely filled in, and revalidate has to delay such 154 * yet completely filled in, and revalidate has to delay such
155 * lookups.. 155 * lookups..
156 */ 156 */
157static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) 157static int autofs_revalidate(struct dentry *dentry, struct nameidata *nd)
158{ 158{
159 struct inode * dir; 159 struct inode * dir;
160 struct autofs_sb_info *sbi; 160 struct autofs_sb_info *sbi;
161 struct autofs_dir_ent *ent; 161 struct autofs_dir_ent *ent;
162 int res; 162 int res;
163 163
164 if (nd->flags & LOOKUP_RCU)
165 return -ECHILD;
166
164 lock_kernel(); 167 lock_kernel();
165 dir = dentry->d_parent->d_inode; 168 dir = dentry->d_parent->d_inode;
166 sbi = autofs_sbi(dir->i_sb); 169 sbi = autofs_sbi(dir->i_sb);
@@ -237,7 +240,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
237 * 240 *
238 * We need to do this before we release the directory semaphore. 241 * We need to do this before we release the directory semaphore.
239 */ 242 */
240 dentry->d_op = &autofs_dentry_operations; 243 d_set_d_op(dentry, &autofs_dentry_operations);
241 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 244 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
242 d_add(dentry, NULL); 245 d_add(dentry, NULL);
243 246
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 61685ccceda8..cc8d2840f9b6 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = {
826 .set_page_dirty = __set_page_dirty_nobuffers, 826 .set_page_dirty = __set_page_dirty_nobuffers,
827}; 827};
828 828
829static void pohmelfs_i_callback(struct rcu_head *head)
830{
831 struct inode *inode = container_of(head, struct inode, i_rcu);
832 INIT_LIST_HEAD(&inode->i_dentry);
833 kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode));
834}
835
829/* 836/*
830 * ->detroy_inode() callback. Deletes inode from the caches 837 * ->detroy_inode() callback. Deletes inode from the caches
831 * and frees private data. 838 * and frees private data.
@@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode)
842 849
843 dprintk("%s: pi: %p, inode: %p, ino: %llu.\n", 850 dprintk("%s: pi: %p, inode: %p, ino: %llu.\n",
844 __func__, pi, &pi->vfs_inode, pi->ino); 851 __func__, pi, &pi->vfs_inode, pi->ino);
845 kmem_cache_free(pohmelfs_inode_cache, pi);
846 atomic_long_dec(&psb->total_inodes); 852 atomic_long_dec(&psb->total_inodes);
853 call_rcu(&inode->i_rcu, pohmelfs_i_callback);
847} 854}
848 855
849/* 856/*
diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index 8ec83d2dffb7..400a9fc386ad 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -83,10 +83,11 @@ out:
83int pohmelfs_path_length(struct pohmelfs_inode *pi) 83int pohmelfs_path_length(struct pohmelfs_inode *pi)
84{ 84{
85 struct dentry *d, *root, *first; 85 struct dentry *d, *root, *first;
86 int len = 1; /* Root slash */ 86 int len;
87 unsigned seq;
87 88
88 first = d = d_find_alias(&pi->vfs_inode); 89 first = d_find_alias(&pi->vfs_inode);
89 if (!d) { 90 if (!first) {
90 dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode); 91 dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode);
91 return -ENOENT; 92 return -ENOENT;
92 } 93 }
@@ -95,7 +96,11 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
95 root = dget(current->fs->root.dentry); 96 root = dget(current->fs->root.dentry);
96 spin_unlock(&current->fs->lock); 97 spin_unlock(&current->fs->lock);
97 98
98 spin_lock(&dcache_lock); 99rename_retry:
100 len = 1; /* Root slash */
101 d = first;
102 seq = read_seqbegin(&rename_lock);
103 rcu_read_lock();
99 104
100 if (!IS_ROOT(d) && d_unhashed(d)) 105 if (!IS_ROOT(d) && d_unhashed(d))
101 len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */ 106 len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */
@@ -104,7 +109,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
104 len += d->d_name.len + 1; /* Plus slash */ 109 len += d->d_name.len + 1; /* Plus slash */
105 d = d->d_parent; 110 d = d->d_parent;
106 } 111 }
107 spin_unlock(&dcache_lock); 112 rcu_read_unlock();
113 if (read_seqretry(&rename_lock, seq))
114 goto rename_retry;
108 115
109 dput(root); 116 dput(root);
110 dput(first); 117 dput(first);
diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
index dbb98658148b..f2a1323ca827 100644
--- a/drivers/staging/smbfs/cache.c
+++ b/drivers/staging/smbfs/cache.c
@@ -62,7 +62,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
62 struct list_head *next; 62 struct list_head *next;
63 struct dentry *dentry; 63 struct dentry *dentry;
64 64
65 spin_lock(&dcache_lock); 65 spin_lock(&parent->d_lock);
66 next = parent->d_subdirs.next; 66 next = parent->d_subdirs.next;
67 while (next != &parent->d_subdirs) { 67 while (next != &parent->d_subdirs) {
68 dentry = list_entry(next, struct dentry, d_u.d_child); 68 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -70,7 +70,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
70 smb_age_dentry(server, dentry); 70 smb_age_dentry(server, dentry);
71 next = next->next; 71 next = next->next;
72 } 72 }
73 spin_unlock(&dcache_lock); 73 spin_unlock(&parent->d_lock);
74} 74}
75 75
76/* 76/*
@@ -96,13 +96,13 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
96 } 96 }
97 97
98 /* If a pointer is invalid, we search the dentry. */ 98 /* If a pointer is invalid, we search the dentry. */
99 spin_lock(&dcache_lock); 99 spin_lock(&parent->d_lock);
100 next = parent->d_subdirs.next; 100 next = parent->d_subdirs.next;
101 while (next != &parent->d_subdirs) { 101 while (next != &parent->d_subdirs) {
102 dent = list_entry(next, struct dentry, d_u.d_child); 102 dent = list_entry(next, struct dentry, d_u.d_child);
103 if ((unsigned long)dent->d_fsdata == fpos) { 103 if ((unsigned long)dent->d_fsdata == fpos) {
104 if (dent->d_inode) 104 if (dent->d_inode)
105 dget_locked(dent); 105 dget(dent);
106 else 106 else
107 dent = NULL; 107 dent = NULL;
108 goto out_unlock; 108 goto out_unlock;
@@ -111,7 +111,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
111 } 111 }
112 dent = NULL; 112 dent = NULL;
113out_unlock: 113out_unlock:
114 spin_unlock(&dcache_lock); 114 spin_unlock(&parent->d_lock);
115 return dent; 115 return dent;
116} 116}
117 117
@@ -134,7 +134,7 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
134 qname->hash = full_name_hash(qname->name, qname->len); 134 qname->hash = full_name_hash(qname->name, qname->len);
135 135
136 if (dentry->d_op && dentry->d_op->d_hash) 136 if (dentry->d_op && dentry->d_op->d_hash)
137 if (dentry->d_op->d_hash(dentry, qname) != 0) 137 if (dentry->d_op->d_hash(dentry, inode, qname) != 0)
138 goto end_advance; 138 goto end_advance;
139 139
140 newdent = d_lookup(dentry, qname); 140 newdent = d_lookup(dentry, qname);
@@ -145,8 +145,8 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
145 goto end_advance; 145 goto end_advance;
146 } else { 146 } else {
147 hashed = 1; 147 hashed = 1;
148 memcpy((char *) newdent->d_name.name, qname->name, 148 /* dir i_mutex is locked because we're in readdir */
149 newdent->d_name.len); 149 dentry_update_name_case(newdent, qname);
150 } 150 }
151 151
152 if (!newdent->d_inode) { 152 if (!newdent->d_inode) {
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index f088ea2f6ac9..dd612f50749f 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -14,6 +14,7 @@
14#include <linux/ctype.h> 14#include <linux/ctype.h>
15#include <linux/net.h> 15#include <linux/net.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/namei.h>
17 18
18#include "smb_fs.h" 19#include "smb_fs.h"
19#include "smb_mount.h" 20#include "smb_mount.h"
@@ -274,9 +275,13 @@ smb_dir_open(struct inode *dir, struct file *file)
274 * Dentry operations routines 275 * Dentry operations routines
275 */ 276 */
276static int smb_lookup_validate(struct dentry *, struct nameidata *); 277static int smb_lookup_validate(struct dentry *, struct nameidata *);
277static int smb_hash_dentry(struct dentry *, struct qstr *); 278static int smb_hash_dentry(const struct dentry *, const struct inode *,
278static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 279 struct qstr *);
279static int smb_delete_dentry(struct dentry *); 280static int smb_compare_dentry(const struct dentry *,
281 const struct inode *,
282 const struct dentry *, const struct inode *,
283 unsigned int, const char *, const struct qstr *);
284static int smb_delete_dentry(const struct dentry *);
280 285
281static const struct dentry_operations smbfs_dentry_operations = 286static const struct dentry_operations smbfs_dentry_operations =
282{ 287{
@@ -297,13 +302,20 @@ static const struct dentry_operations smbfs_dentry_operations_case =
297 * This is the callback when the dcache has a lookup hit. 302 * This is the callback when the dcache has a lookup hit.
298 */ 303 */
299static int 304static int
300smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) 305smb_lookup_validate(struct dentry *dentry, struct nameidata *nd)
301{ 306{
302 struct smb_sb_info *server = server_from_dentry(dentry); 307 struct smb_sb_info *server;
303 struct inode * inode = dentry->d_inode; 308 struct inode *inode;
304 unsigned long age = jiffies - dentry->d_time; 309 unsigned long age;
305 int valid; 310 int valid;
306 311
312 if (nd->flags & LOOKUP_RCU)
313 return -ECHILD;
314
315 server = server_from_dentry(dentry);
316 inode = dentry->d_inode;
317 age = jiffies - dentry->d_time;
318
307 /* 319 /*
308 * The default validation is based on dentry age: 320 * The default validation is based on dentry age:
309 * we believe in dentries for a few seconds. (But each 321 * we believe in dentries for a few seconds. (But each
@@ -333,7 +345,8 @@ smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
333} 345}
334 346
335static int 347static int
336smb_hash_dentry(struct dentry *dir, struct qstr *this) 348smb_hash_dentry(const struct dentry *dir, const struct inode *inode,
349 struct qstr *this)
337{ 350{
338 unsigned long hash; 351 unsigned long hash;
339 int i; 352 int i;
@@ -347,14 +360,17 @@ smb_hash_dentry(struct dentry *dir, struct qstr *this)
347} 360}
348 361
349static int 362static int
350smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b) 363smb_compare_dentry(const struct dentry *parent,
364 const struct inode *pinode,
365 const struct dentry *dentry, const struct inode *inode,
366 unsigned int len, const char *str, const struct qstr *name)
351{ 367{
352 int i, result = 1; 368 int i, result = 1;
353 369
354 if (a->len != b->len) 370 if (len != name->len)
355 goto out; 371 goto out;
356 for (i=0; i < a->len; i++) { 372 for (i=0; i < len; i++) {
357 if (tolower(a->name[i]) != tolower(b->name[i])) 373 if (tolower(str[i]) != tolower(name->name[i]))
358 goto out; 374 goto out;
359 } 375 }
360 result = 0; 376 result = 0;
@@ -367,7 +383,7 @@ out:
367 * We use this to unhash dentries with bad inodes. 383 * We use this to unhash dentries with bad inodes.
368 */ 384 */
369static int 385static int
370smb_delete_dentry(struct dentry * dentry) 386smb_delete_dentry(const struct dentry *dentry)
371{ 387{
372 if (dentry->d_inode) { 388 if (dentry->d_inode) {
373 if (is_bad_inode(dentry->d_inode)) { 389 if (is_bad_inode(dentry->d_inode)) {
@@ -390,9 +406,9 @@ smb_new_dentry(struct dentry *dentry)
390 struct smb_sb_info *server = server_from_dentry(dentry); 406 struct smb_sb_info *server = server_from_dentry(dentry);
391 407
392 if (server->mnt->flags & SMB_MOUNT_CASE) 408 if (server->mnt->flags & SMB_MOUNT_CASE)
393 dentry->d_op = &smbfs_dentry_operations_case; 409 d_set_d_op(dentry, &smbfs_dentry_operations_case);
394 else 410 else
395 dentry->d_op = &smbfs_dentry_operations; 411 d_set_d_op(dentry, &smbfs_dentry_operations);
396 dentry->d_time = jiffies; 412 dentry->d_time = jiffies;
397} 413}
398 414
@@ -454,9 +470,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
454 add_entry: 470 add_entry:
455 server = server_from_dentry(dentry); 471 server = server_from_dentry(dentry);
456 if (server->mnt->flags & SMB_MOUNT_CASE) 472 if (server->mnt->flags & SMB_MOUNT_CASE)
457 dentry->d_op = &smbfs_dentry_operations_case; 473 d_set_d_op(dentry, &smbfs_dentry_operations_case);
458 else 474 else
459 dentry->d_op = &smbfs_dentry_operations; 475 d_set_d_op(dentry, &smbfs_dentry_operations);
460 476
461 d_add(dentry, inode); 477 d_add(dentry, inode);
462 smb_renew_times(dentry); 478 smb_renew_times(dentry);
diff --git a/drivers/staging/smbfs/file.c b/drivers/staging/smbfs/file.c
index 5dcd19c60eb9..31372e7b12de 100644
--- a/drivers/staging/smbfs/file.c
+++ b/drivers/staging/smbfs/file.c
@@ -407,11 +407,14 @@ smb_file_release(struct inode *inode, struct file * file)
407 * privileges, so we need our own check for this. 407 * privileges, so we need our own check for this.
408 */ 408 */
409static int 409static int
410smb_file_permission(struct inode *inode, int mask) 410smb_file_permission(struct inode *inode, int mask, unsigned int flags)
411{ 411{
412 int mode = inode->i_mode; 412 int mode = inode->i_mode;
413 int error = 0; 413 int error = 0;
414 414
415 if (flags & IPERM_FLAG_RCU)
416 return -ECHILD;
417
415 VERBOSE("mode=%x, mask=%x\n", mode, mask); 418 VERBOSE("mode=%x, mask=%x\n", mode, mask);
416 419
417 /* Look at user permissions */ 420 /* Look at user permissions */
diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c
index 540a984bb516..244319dc9702 100644
--- a/drivers/staging/smbfs/inode.c
+++ b/drivers/staging/smbfs/inode.c
@@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb)
62 return &ei->vfs_inode; 62 return &ei->vfs_inode;
63} 63}
64 64
65static void smb_destroy_inode(struct inode *inode) 65static void smb_i_callback(struct rcu_head *head)
66{ 66{
67 struct inode *inode = container_of(head, struct inode, i_rcu);
68 INIT_LIST_HEAD(&inode->i_dentry);
67 kmem_cache_free(smb_inode_cachep, SMB_I(inode)); 69 kmem_cache_free(smb_inode_cachep, SMB_I(inode));
68} 70}
69 71
72static void smb_destroy_inode(struct inode *inode)
73{
74 call_rcu(&inode->i_rcu, smb_i_callback);
75}
76
70static void init_once(void *foo) 77static void init_once(void *foo)
71{ 78{
72 struct smb_inode_info *ei = (struct smb_inode_info *) foo; 79 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index b690aa35df9a..1b125c224dcf 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -343,17 +343,19 @@ static int usbfs_empty (struct dentry *dentry)
343{ 343{
344 struct list_head *list; 344 struct list_head *list;
345 345
346 spin_lock(&dcache_lock); 346 spin_lock(&dentry->d_lock);
347
348 list_for_each(list, &dentry->d_subdirs) { 347 list_for_each(list, &dentry->d_subdirs) {
349 struct dentry *de = list_entry(list, struct dentry, d_u.d_child); 348 struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
349
350 spin_lock_nested(&de->d_lock, DENTRY_D_LOCK_NESTED);
350 if (usbfs_positive(de)) { 351 if (usbfs_positive(de)) {
351 spin_unlock(&dcache_lock); 352 spin_unlock(&de->d_lock);
353 spin_unlock(&dentry->d_lock);
352 return 0; 354 return 0;
353 } 355 }
356 spin_unlock(&de->d_lock);
354 } 357 }
355 358 spin_unlock(&dentry->d_lock);
356 spin_unlock(&dcache_lock);
357 return 1; 359 return 1;
358} 360}
359 361
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 12d602351dbe..6e58c4ca1e6e 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
91 return acl; 91 return acl;
92} 92}
93 93
94int v9fs_check_acl(struct inode *inode, int mask) 94int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
95{ 95{
96 struct posix_acl *acl; 96 struct posix_acl *acl;
97 struct v9fs_session_info *v9ses; 97 struct v9fs_session_info *v9ses;
98 98
99 if (flags & IPERM_FLAG_RCU)
100 return -ECHILD;
101
99 v9ses = v9fs_inode2v9ses(inode); 102 v9ses = v9fs_inode2v9ses(inode);
100 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
101 /* 104 /*
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 59e18c2e8c7e..7ef3ac9f6d95 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
16 16
17#ifdef CONFIG_9P_FS_POSIX_ACL 17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *); 18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask); 19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *); 22 struct posix_acl *, struct posix_acl *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50f3933..466d2a4fc5cb 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
51 * 51 *
52 */ 52 */
53 53
54static int v9fs_dentry_delete(struct dentry *dentry) 54static int v9fs_dentry_delete(const struct dentry *dentry)
55{ 55{
56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
57 dentry); 57 dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
68 * 68 *
69 */ 69 */
70 70
71static int v9fs_cached_dentry_delete(struct dentry *dentry) 71static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 34bf71b56542..59782981b225 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
237 * 237 *
238 */ 238 */
239 239
240void v9fs_destroy_inode(struct inode *inode) 240static void v9fs_i_callback(struct rcu_head *head)
241{ 241{
242 struct inode *inode = container_of(head, struct inode, i_rcu);
243 INIT_LIST_HEAD(&inode->i_dentry);
242 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 244 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
243} 245}
246
247void v9fs_destroy_inode(struct inode *inode)
248{
249 call_rcu(&inode->i_rcu, v9fs_i_callback);
250}
244#endif 251#endif
245 252
246/** 253/**
@@ -270,11 +277,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
270{ 277{
271 struct dentry *dentry; 278 struct dentry *dentry;
272 279
273 spin_lock(&dcache_lock); 280 spin_lock(&inode->i_lock);
274 /* Directory should have only one entry. */ 281 /* Directory should have only one entry. */
275 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); 282 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
276 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); 283 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
277 spin_unlock(&dcache_lock); 284 spin_unlock(&inode->i_lock);
278 return dentry; 285 return dentry;
279} 286}
280 287
@@ -628,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
628 } 635 }
629 636
630 if (v9ses->cache) 637 if (v9ses->cache)
631 dentry->d_op = &v9fs_cached_dentry_operations; 638 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
632 else 639 else
633 dentry->d_op = &v9fs_dentry_operations; 640 d_set_d_op(dentry, &v9fs_dentry_operations);
634 641
635 d_instantiate(dentry, inode); 642 d_instantiate(dentry, inode);
636 err = v9fs_fid_add(dentry, fid); 643 err = v9fs_fid_add(dentry, fid);
@@ -742,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
742 err); 749 err);
743 goto error; 750 goto error;
744 } 751 }
745 dentry->d_op = &v9fs_cached_dentry_operations; 752 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
746 d_instantiate(dentry, inode); 753 d_instantiate(dentry, inode);
747 err = v9fs_fid_add(dentry, fid); 754 err = v9fs_fid_add(dentry, fid);
748 if (err < 0) 755 if (err < 0)
@@ -760,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
760 err = PTR_ERR(inode); 767 err = PTR_ERR(inode);
761 goto error; 768 goto error;
762 } 769 }
763 dentry->d_op = &v9fs_dentry_operations; 770 d_set_d_op(dentry, &v9fs_dentry_operations);
764 d_instantiate(dentry, inode); 771 d_instantiate(dentry, inode);
765 } 772 }
766 /* Now set the ACL based on the default value */ 773 /* Now set the ACL based on the default value */
@@ -949,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
949 err); 956 err);
950 goto error; 957 goto error;
951 } 958 }
952 dentry->d_op = &v9fs_cached_dentry_operations; 959 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
953 d_instantiate(dentry, inode); 960 d_instantiate(dentry, inode);
954 err = v9fs_fid_add(dentry, fid); 961 err = v9fs_fid_add(dentry, fid);
955 if (err < 0) 962 if (err < 0)
@@ -966,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
966 err = PTR_ERR(inode); 973 err = PTR_ERR(inode);
967 goto error; 974 goto error;
968 } 975 }
969 dentry->d_op = &v9fs_dentry_operations; 976 d_set_d_op(dentry, &v9fs_dentry_operations);
970 d_instantiate(dentry, inode); 977 d_instantiate(dentry, inode);
971 } 978 }
972 /* Now set the ACL based on the default value */ 979 /* Now set the ACL based on the default value */
@@ -1034,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
1034 1041
1035inst_out: 1042inst_out:
1036 if (v9ses->cache) 1043 if (v9ses->cache)
1037 dentry->d_op = &v9fs_cached_dentry_operations; 1044 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1038 else 1045 else
1039 dentry->d_op = &v9fs_dentry_operations; 1046 d_set_d_op(dentry, &v9fs_dentry_operations);
1040 1047
1041 d_add(dentry, inode); 1048 d_add(dentry, inode);
1042 return NULL; 1049 return NULL;
@@ -1702,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1702 err); 1709 err);
1703 goto error; 1710 goto error;
1704 } 1711 }
1705 dentry->d_op = &v9fs_cached_dentry_operations; 1712 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1706 d_instantiate(dentry, inode); 1713 d_instantiate(dentry, inode);
1707 err = v9fs_fid_add(dentry, fid); 1714 err = v9fs_fid_add(dentry, fid);
1708 if (err < 0) 1715 if (err < 0)
@@ -1715,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1715 err = PTR_ERR(inode); 1722 err = PTR_ERR(inode);
1716 goto error; 1723 goto error;
1717 } 1724 }
1718 dentry->d_op = &v9fs_dentry_operations; 1725 d_set_d_op(dentry, &v9fs_dentry_operations);
1719 d_instantiate(dentry, inode); 1726 d_instantiate(dentry, inode);
1720 } 1727 }
1721 1728
@@ -1849,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1849 ihold(old_dentry->d_inode); 1856 ihold(old_dentry->d_inode);
1850 } 1857 }
1851 1858
1852 dentry->d_op = old_dentry->d_op; 1859 d_set_d_op(dentry, old_dentry->d_op);
1853 d_instantiate(dentry, old_dentry->d_inode); 1860 d_instantiate(dentry, old_dentry->d_inode);
1854 1861
1855 return err; 1862 return err;
@@ -1973,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1973 err); 1980 err);
1974 goto error; 1981 goto error;
1975 } 1982 }
1976 dentry->d_op = &v9fs_cached_dentry_operations; 1983 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1977 d_instantiate(dentry, inode); 1984 d_instantiate(dentry, inode);
1978 err = v9fs_fid_add(dentry, fid); 1985 err = v9fs_fid_add(dentry, fid);
1979 if (err < 0) 1986 if (err < 0)
@@ -1989,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1989 err = PTR_ERR(inode); 1996 err = PTR_ERR(inode);
1990 goto error; 1997 goto error;
1991 } 1998 }
1992 dentry->d_op = &v9fs_dentry_operations; 1999 d_set_d_op(dentry, &v9fs_dentry_operations);
1993 d_instantiate(dentry, inode); 2000 d_instantiate(dentry, inode);
1994 } 2001 }
1995 /* Now set the ACL based on the default value */ 2002 /* Now set the ACL based on the default value */
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index f4287e4de744..bf7693c384f9 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = {
201}; 201};
202 202
203static int 203static int
204adfs_hash(struct dentry *parent, struct qstr *qstr) 204adfs_hash(const struct dentry *parent, const struct inode *inode,
205 struct qstr *qstr)
205{ 206{
206 const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; 207 const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
207 const unsigned char *name; 208 const unsigned char *name;
@@ -237,17 +238,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr)
237 * requirements of the underlying filesystem. 238 * requirements of the underlying filesystem.
238 */ 239 */
239static int 240static int
240adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name) 241adfs_compare(const struct dentry *parent, const struct inode *pinode,
242 const struct dentry *dentry, const struct inode *inode,
243 unsigned int len, const char *str, const struct qstr *name)
241{ 244{
242 int i; 245 int i;
243 246
244 if (entry->len != name->len) 247 if (len != name->len)
245 return 1; 248 return 1;
246 249
247 for (i = 0; i < name->len; i++) { 250 for (i = 0; i < name->len; i++) {
248 char a, b; 251 char a, b;
249 252
250 a = entry->name[i]; 253 a = str[i];
251 b = name->name[i]; 254 b = name->name[i];
252 255
253 if (a >= 'A' && a <= 'Z') 256 if (a >= 'A' && a <= 'Z')
@@ -273,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
273 struct object_info obj; 276 struct object_info obj;
274 int error; 277 int error;
275 278
276 dentry->d_op = &adfs_dentry_operations; 279 d_set_d_op(dentry, &adfs_dentry_operations);
277 lock_kernel(); 280 lock_kernel();
278 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); 281 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
279 if (error == 0) { 282 if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 959dbff2d42d..a4041b52fbca 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
240 return &ei->vfs_inode; 240 return &ei->vfs_inode;
241} 241}
242 242
243static void adfs_destroy_inode(struct inode *inode) 243static void adfs_i_callback(struct rcu_head *head)
244{ 244{
245 struct inode *inode = container_of(head, struct inode, i_rcu);
246 INIT_LIST_HEAD(&inode->i_dentry);
245 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); 247 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
246} 248}
247 249
250static void adfs_destroy_inode(struct inode *inode)
251{
252 call_rcu(&inode->i_rcu, adfs_i_callback);
253}
254
248static void init_once(void *foo) 255static void init_once(void *foo)
249{ 256{
250 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 257 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
@@ -477,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
477 adfs_error(sb, "get root inode failed\n"); 484 adfs_error(sb, "get root inode failed\n");
478 goto error; 485 goto error;
479 } else 486 } else
480 sb->s_root->d_op = &adfs_dentry_operations; 487 d_set_d_op(sb->s_root, &adfs_dentry_operations);
481 unlock_kernel(); 488 unlock_kernel();
482 return 0; 489 return 0;
483 490
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 7d0f0a30f7a3..3a4557e8325c 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
128 void *data = dentry->d_fsdata; 128 void *data = dentry->d_fsdata;
129 struct list_head *head, *next; 129 struct list_head *head, *next;
130 130
131 spin_lock(&dcache_lock); 131 spin_lock(&inode->i_lock);
132 head = &inode->i_dentry; 132 head = &inode->i_dentry;
133 next = head->next; 133 next = head->next;
134 while (next != head) { 134 while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
139 } 139 }
140 next = next->next; 140 next = next->next;
141 } 141 }
142 spin_unlock(&dcache_lock); 142 spin_unlock(&inode->i_lock);
143} 143}
144 144
145 145
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 914d1c0bc07a..944a4042fb65 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,11 +13,19 @@
13typedef int (*toupper_t)(int); 13typedef int (*toupper_t)(int);
14 14
15static int affs_toupper(int ch); 15static int affs_toupper(int ch);
16static int affs_hash_dentry(struct dentry *, struct qstr *); 16static int affs_hash_dentry(const struct dentry *,
17static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 17 const struct inode *, struct qstr *);
18static int affs_compare_dentry(const struct dentry *parent,
19 const struct inode *pinode,
20 const struct dentry *dentry, const struct inode *inode,
21 unsigned int len, const char *str, const struct qstr *name);
18static int affs_intl_toupper(int ch); 22static int affs_intl_toupper(int ch);
19static int affs_intl_hash_dentry(struct dentry *, struct qstr *); 23static int affs_intl_hash_dentry(const struct dentry *,
20static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 24 const struct inode *, struct qstr *);
25static int affs_intl_compare_dentry(const struct dentry *parent,
26 const struct inode *pinode,
27 const struct dentry *dentry, const struct inode *inode,
28 unsigned int len, const char *str, const struct qstr *name);
21 29
22const struct dentry_operations affs_dentry_operations = { 30const struct dentry_operations affs_dentry_operations = {
23 .d_hash = affs_hash_dentry, 31 .d_hash = affs_hash_dentry,
@@ -58,13 +66,13 @@ affs_get_toupper(struct super_block *sb)
58 * Note: the dentry argument is the parent dentry. 66 * Note: the dentry argument is the parent dentry.
59 */ 67 */
60static inline int 68static inline int
61__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) 69__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
62{ 70{
63 const u8 *name = qstr->name; 71 const u8 *name = qstr->name;
64 unsigned long hash; 72 unsigned long hash;
65 int i; 73 int i;
66 74
67 i = affs_check_name(qstr->name,qstr->len); 75 i = affs_check_name(qstr->name, qstr->len);
68 if (i) 76 if (i)
69 return i; 77 return i;
70 78
@@ -78,39 +86,41 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
78} 86}
79 87
80static int 88static int
81affs_hash_dentry(struct dentry *dentry, struct qstr *qstr) 89affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
90 struct qstr *qstr)
82{ 91{
83 return __affs_hash_dentry(dentry, qstr, affs_toupper); 92 return __affs_hash_dentry(qstr, affs_toupper);
84} 93}
85static int 94static int
86affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) 95affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
96 struct qstr *qstr)
87{ 97{
88 return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); 98 return __affs_hash_dentry(qstr, affs_intl_toupper);
89} 99}
90 100
91static inline int 101static inline int __affs_compare_dentry(unsigned int len,
92__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper) 102 const char *str, const struct qstr *name, toupper_t toupper)
93{ 103{
94 const u8 *aname = a->name; 104 const u8 *aname = str;
95 const u8 *bname = b->name; 105 const u8 *bname = name->name;
96 int len;
97 106
98 /* 'a' is the qstr of an already existing dentry, so the name 107 /*
99 * must be valid. 'b' must be validated first. 108 * 'str' is the name of an already existing dentry, so the name
109 * must be valid. 'name' must be validated first.
100 */ 110 */
101 111
102 if (affs_check_name(b->name,b->len)) 112 if (affs_check_name(name->name, name->len))
103 return 1; 113 return 1;
104 114
105 /* If the names are longer than the allowed 30 chars, 115 /*
116 * If the names are longer than the allowed 30 chars,
106 * the excess is ignored, so their length may differ. 117 * the excess is ignored, so their length may differ.
107 */ 118 */
108 len = a->len;
109 if (len >= 30) { 119 if (len >= 30) {
110 if (b->len < 30) 120 if (name->len < 30)
111 return 1; 121 return 1;
112 len = 30; 122 len = 30;
113 } else if (len != b->len) 123 } else if (len != name->len)
114 return 1; 124 return 1;
115 125
116 for (; len > 0; len--) 126 for (; len > 0; len--)
@@ -121,14 +131,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou
121} 131}
122 132
123static int 133static int
124affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 134affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
135 const struct dentry *dentry, const struct inode *inode,
136 unsigned int len, const char *str, const struct qstr *name)
125{ 137{
126 return __affs_compare_dentry(dentry, a, b, affs_toupper); 138 return __affs_compare_dentry(len, str, name, affs_toupper);
127} 139}
128static int 140static int
129affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 141affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
142 const struct dentry *dentry, const struct inode *inode,
143 unsigned int len, const char *str, const struct qstr *name)
130{ 144{
131 return __affs_compare_dentry(dentry, a, b, affs_intl_toupper); 145 return __affs_compare_dentry(len, str, name, affs_intl_toupper);
132} 146}
133 147
134/* 148/*
@@ -226,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
226 if (IS_ERR(inode)) 240 if (IS_ERR(inode))
227 return ERR_CAST(inode); 241 return ERR_CAST(inode);
228 } 242 }
229 dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; 243 d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
230 d_add(dentry, inode); 244 d_add(dentry, inode);
231 return NULL; 245 return NULL;
232} 246}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0cf7f4384cbd..d39081bbe7ce 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
95 return &i->vfs_inode; 95 return &i->vfs_inode;
96} 96}
97 97
98static void affs_destroy_inode(struct inode *inode) 98static void affs_i_callback(struct rcu_head *head)
99{ 99{
100 struct inode *inode = container_of(head, struct inode, i_rcu);
101 INIT_LIST_HEAD(&inode->i_dentry);
100 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 102 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
101} 103}
102 104
105static void affs_destroy_inode(struct inode *inode)
106{
107 call_rcu(&inode->i_rcu, affs_i_callback);
108}
109
103static void init_once(void *foo) 110static void init_once(void *foo)
104{ 111{
105 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 112 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -475,7 +482,7 @@ got_root:
475 printk(KERN_ERR "AFFS: Get root inode failed\n"); 482 printk(KERN_ERR "AFFS: Get root inode failed\n");
476 goto out_error; 483 goto out_error;
477 } 484 }
478 sb->s_root->d_op = &affs_dentry_operations; 485 d_set_d_op(sb->s_root, &affs_dentry_operations);
479 486
480 pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); 487 pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
481 return 0; 488 return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1bc9a86..34a3263d60a4 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/namei.h>
16#include <linux/pagemap.h> 17#include <linux/pagemap.h>
17#include <linux/ctype.h> 18#include <linux/ctype.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
@@ -23,7 +24,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
23static int afs_dir_open(struct inode *inode, struct file *file); 24static int afs_dir_open(struct inode *inode, struct file *file);
24static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); 25static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
25static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); 26static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
26static int afs_d_delete(struct dentry *dentry); 27static int afs_d_delete(const struct dentry *dentry);
27static void afs_d_release(struct dentry *dentry); 28static void afs_d_release(struct dentry *dentry);
28static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, 29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
29 loff_t fpos, u64 ino, unsigned dtype); 30 loff_t fpos, u64 ino, unsigned dtype);
@@ -581,7 +582,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
581 } 582 }
582 583
583success: 584success:
584 dentry->d_op = &afs_fs_dentry_operations; 585 d_set_d_op(dentry, &afs_fs_dentry_operations);
585 586
586 d_add(dentry, inode); 587 d_add(dentry, inode);
587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", 588 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
@@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
607 void *dir_version; 608 void *dir_version;
608 int ret; 609 int ret;
609 610
611 if (nd->flags & LOOKUP_RCU)
612 return -ECHILD;
613
610 vnode = AFS_FS_I(dentry->d_inode); 614 vnode = AFS_FS_I(dentry->d_inode);
611 615
612 if (dentry->d_inode) 616 if (dentry->d_inode)
@@ -730,7 +734,7 @@ out_bad:
730 * - called from dput() when d_count is going to 0. 734 * - called from dput() when d_count is going to 0.
731 * - return 1 to request dentry be unhashed, 0 otherwise 735 * - return 1 to request dentry be unhashed, 0 otherwise
732 */ 736 */
733static int afs_d_delete(struct dentry *dentry) 737static int afs_d_delete(const struct dentry *dentry)
734{ 738{
735 _enter("%s", dentry->d_name.name); 739 _enter("%s", dentry->d_name.name);
736 740
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index cca8eef736fc..6d4bc1c8ff60 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *);
624extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 624extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
625extern void afs_zap_permits(struct rcu_head *); 625extern void afs_zap_permits(struct rcu_head *);
626extern struct key *afs_request_key(struct afs_cell *); 626extern struct key *afs_request_key(struct afs_cell *);
627extern int afs_permission(struct inode *, int); 627extern int afs_permission(struct inode *, int, unsigned int);
628 628
629/* 629/*
630 * server.c 630 * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index bb4ed144d0e4..f44b9d355377 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
285 * - AFS ACLs are attached to directories only, and a file is controlled by its 285 * - AFS ACLs are attached to directories only, and a file is controlled by its
286 * parent directory's ACL 286 * parent directory's ACL
287 */ 287 */
288int afs_permission(struct inode *inode, int mask) 288int afs_permission(struct inode *inode, int mask, unsigned int flags)
289{ 289{
290 struct afs_vnode *vnode = AFS_FS_I(inode); 290 struct afs_vnode *vnode = AFS_FS_I(inode);
291 afs_access_t uninitialized_var(access); 291 afs_access_t uninitialized_var(access);
292 struct key *key; 292 struct key *key;
293 int ret; 293 int ret;
294 294
295 if (flags & IPERM_FLAG_RCU)
296 return -ECHILD;
297
295 _enter("{{%x:%u},%lx},%x,", 298 _enter("{{%x:%u},%lx},%x,",
296 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); 299 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
297 300
@@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
347 } 350 }
348 351
349 key_put(key); 352 key_put(key);
350 ret = generic_permission(inode, mask, NULL); 353 ret = generic_permission(inode, mask, flags, NULL);
351 _leave(" = %d", ret); 354 _leave(" = %d", ret);
352 return ret; 355 return ret;
353 356
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 27201cffece4..f901a9d7c111 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
498 return &vnode->vfs_inode; 498 return &vnode->vfs_inode;
499} 499}
500 500
501static void afs_i_callback(struct rcu_head *head)
502{
503 struct inode *inode = container_of(head, struct inode, i_rcu);
504 struct afs_vnode *vnode = AFS_FS_I(inode);
505 INIT_LIST_HEAD(&inode->i_dentry);
506 kmem_cache_free(afs_inode_cachep, vnode);
507}
508
501/* 509/*
502 * destroy an AFS inode struct 510 * destroy an AFS inode struct
503 */ 511 */
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
511 519
512 ASSERTCMP(vnode->server, ==, NULL); 520 ASSERTCMP(vnode->server, ==, NULL);
513 521
514 kmem_cache_free(afs_inode_cachep, vnode); 522 call_rcu(&inode->i_rcu, afs_i_callback);
515 atomic_dec(&afs_count_active_inodes); 523 atomic_dec(&afs_count_active_inodes);
516} 524}
517 525
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 57ce55b2564c..5fd38112a6ca 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
102 this.name = name; 102 this.name = name;
103 this.len = strlen(name); 103 this.len = strlen(name);
104 this.hash = 0; 104 this.hash = 0;
105 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 105 path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
106 if (!path.dentry) 106 if (!path.dentry)
107 goto err_module; 107 goto err_module;
108 108
@@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
113 */ 113 */
114 ihold(anon_inode_inode); 114 ihold(anon_inode_inode);
115 115
116 path.dentry->d_op = &anon_inodefs_dentry_operations; 116 d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
117 d_instantiate(path.dentry, anon_inode_inode); 117 d_instantiate(path.dentry, anon_inode_inode);
118 118
119 error = -ENFILE; 119 error = -ENFILE;
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
232 return 0; 232 return 0;
233 233
234err_mntput: 234err_mntput:
235 mntput(anon_inode_mnt); 235 mntput_long(anon_inode_mnt);
236err_unregister_filesystem: 236err_unregister_filesystem:
237 unregister_filesystem(&anon_inode_fs_type); 237 unregister_filesystem(&anon_inode_fs_type);
238err_exit: 238err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3d283abf67d7..0fffe1c24cec 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
16#include <linux/auto_fs4.h> 16#include <linux/auto_fs4.h>
17#include <linux/auto_dev-ioctl.h> 17#include <linux/auto_dev-ioctl.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/spinlock.h>
19#include <linux/list.h> 20#include <linux/list.h>
20 21
21/* This is the range of ioctl() numbers we claim as ours */ 22/* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do { \
60 current->pid, __func__, ##args); \ 61 current->pid, __func__, ##args); \
61} while (0) 62} while (0)
62 63
64extern spinlock_t autofs4_lock;
65
63/* Unified info structure. This is pointed to by both the dentry and 66/* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 67 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 68 structure. It holds a reference to the dentry, so dentries are never
@@ -254,17 +257,15 @@ static inline int simple_positive(struct dentry *dentry)
254 return dentry->d_inode && !d_unhashed(dentry); 257 return dentry->d_inode && !d_unhashed(dentry);
255} 258}
256 259
257static inline int __simple_empty(struct dentry *dentry) 260static inline void __autofs4_add_expiring(struct dentry *dentry)
258{ 261{
259 struct dentry *child; 262 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
260 int ret = 0; 263 struct autofs_info *ino = autofs4_dentry_ino(dentry);
261 264 if (ino) {
262 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 265 if (list_empty(&ino->expiring))
263 if (simple_positive(child)) 266 list_add(&ino->expiring, &sbi->expiring_list);
264 goto out; 267 }
265 ret = 1; 268 return;
266out:
267 return ret;
268} 269}
269 270
270static inline void autofs4_add_expiring(struct dentry *dentry) 271static inline void autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c9417fb1..cc1d01365905 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,24 +91,64 @@ done:
91} 91}
92 92
93/* 93/*
94 * Calculate next entry in top down tree traversal. 94 * Calculate and dget next entry in top down tree traversal.
95 * From next_mnt in namespace.c - elegant.
96 */ 95 */
97static struct dentry *next_dentry(struct dentry *p, struct dentry *root) 96static struct dentry *get_next_positive_dentry(struct dentry *prev,
97 struct dentry *root)
98{ 98{
99 struct list_head *next = p->d_subdirs.next; 99 struct list_head *next;
100 struct dentry *p, *ret;
101
102 if (prev == NULL)
103 return dget(prev);
100 104
105 spin_lock(&autofs4_lock);
106relock:
107 p = prev;
108 spin_lock(&p->d_lock);
109again:
110 next = p->d_subdirs.next;
101 if (next == &p->d_subdirs) { 111 if (next == &p->d_subdirs) {
102 while (1) { 112 while (1) {
103 if (p == root) 113 struct dentry *parent;
114
115 if (p == root) {
116 spin_unlock(&p->d_lock);
117 spin_unlock(&autofs4_lock);
118 dput(prev);
104 return NULL; 119 return NULL;
120 }
121
122 parent = p->d_parent;
123 if (!spin_trylock(&parent->d_lock)) {
124 spin_unlock(&p->d_lock);
125 cpu_relax();
126 goto relock;
127 }
128 spin_unlock(&p->d_lock);
105 next = p->d_u.d_child.next; 129 next = p->d_u.d_child.next;
106 if (next != &p->d_parent->d_subdirs) 130 p = parent;
131 if (next != &parent->d_subdirs)
107 break; 132 break;
108 p = p->d_parent;
109 } 133 }
110 } 134 }
111 return list_entry(next, struct dentry, d_u.d_child); 135 ret = list_entry(next, struct dentry, d_u.d_child);
136
137 spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
138 /* Negative dentry - try next */
139 if (!simple_positive(ret)) {
140 spin_unlock(&ret->d_lock);
141 p = ret;
142 goto again;
143 }
144 dget_dlock(ret);
145 spin_unlock(&ret->d_lock);
146 spin_unlock(&p->d_lock);
147 spin_unlock(&autofs4_lock);
148
149 dput(prev);
150
151 return ret;
112} 152}
113 153
114/* 154/*
@@ -158,18 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
158 if (!simple_positive(top)) 198 if (!simple_positive(top))
159 return 1; 199 return 1;
160 200
161 spin_lock(&dcache_lock); 201 p = NULL;
162 for (p = top; p; p = next_dentry(p, top)) { 202 while ((p = get_next_positive_dentry(p, top))) {
163 /* Negative dentry - give up */
164 if (!simple_positive(p))
165 continue;
166
167 DPRINTK("dentry %p %.*s", 203 DPRINTK("dentry %p %.*s",
168 p, (int) p->d_name.len, p->d_name.name); 204 p, (int) p->d_name.len, p->d_name.name);
169 205
170 p = dget(p);
171 spin_unlock(&dcache_lock);
172
173 /* 206 /*
174 * Is someone visiting anywhere in the subtree ? 207 * Is someone visiting anywhere in the subtree ?
175 * If there's no mount we need to check the usage 208 * If there's no mount we need to check the usage
@@ -198,16 +231,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
198 else 231 else
199 ino_count++; 232 ino_count++;
200 233
201 if (atomic_read(&p->d_count) > ino_count) { 234 if (p->d_count > ino_count) {
202 top_ino->last_used = jiffies; 235 top_ino->last_used = jiffies;
203 dput(p); 236 dput(p);
204 return 1; 237 return 1;
205 } 238 }
206 } 239 }
207 dput(p);
208 spin_lock(&dcache_lock);
209 } 240 }
210 spin_unlock(&dcache_lock);
211 241
212 /* Timeout of a tree mount is ultimately determined by its top dentry */ 242 /* Timeout of a tree mount is ultimately determined by its top dentry */
213 if (!autofs4_can_expire(top, timeout, do_now)) 243 if (!autofs4_can_expire(top, timeout, do_now))
@@ -226,32 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
226 DPRINTK("parent %p %.*s", 256 DPRINTK("parent %p %.*s",
227 parent, (int)parent->d_name.len, parent->d_name.name); 257 parent, (int)parent->d_name.len, parent->d_name.name);
228 258
229 spin_lock(&dcache_lock); 259 p = NULL;
230 for (p = parent; p; p = next_dentry(p, parent)) { 260 while ((p = get_next_positive_dentry(p, parent))) {
231 /* Negative dentry - give up */
232 if (!simple_positive(p))
233 continue;
234
235 DPRINTK("dentry %p %.*s", 261 DPRINTK("dentry %p %.*s",
236 p, (int) p->d_name.len, p->d_name.name); 262 p, (int) p->d_name.len, p->d_name.name);
237 263
238 p = dget(p);
239 spin_unlock(&dcache_lock);
240
241 if (d_mountpoint(p)) { 264 if (d_mountpoint(p)) {
242 /* Can we umount this guy */ 265 /* Can we umount this guy */
243 if (autofs4_mount_busy(mnt, p)) 266 if (autofs4_mount_busy(mnt, p))
244 goto cont; 267 continue;
245 268
246 /* Can we expire this guy */ 269 /* Can we expire this guy */
247 if (autofs4_can_expire(p, timeout, do_now)) 270 if (autofs4_can_expire(p, timeout, do_now))
248 return p; 271 return p;
249 } 272 }
250cont:
251 dput(p);
252 spin_lock(&dcache_lock);
253 } 273 }
254 spin_unlock(&dcache_lock);
255 return NULL; 274 return NULL;
256} 275}
257 276
@@ -276,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
276 struct autofs_info *ino = autofs4_dentry_ino(root); 295 struct autofs_info *ino = autofs4_dentry_ino(root);
277 if (d_mountpoint(root)) { 296 if (d_mountpoint(root)) {
278 ino->flags |= AUTOFS_INF_MOUNTPOINT; 297 ino->flags |= AUTOFS_INF_MOUNTPOINT;
279 root->d_mounted--; 298 spin_lock(&root->d_lock);
299 root->d_flags &= ~DCACHE_MOUNTED;
300 spin_unlock(&root->d_lock);
280 } 301 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 302 ino->flags |= AUTOFS_INF_EXPIRING;
282 init_completion(&ino->expire_complete); 303 init_completion(&ino->expire_complete);
@@ -302,8 +323,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
302{ 323{
303 unsigned long timeout; 324 unsigned long timeout;
304 struct dentry *root = sb->s_root; 325 struct dentry *root = sb->s_root;
326 struct dentry *dentry;
305 struct dentry *expired = NULL; 327 struct dentry *expired = NULL;
306 struct list_head *next;
307 int do_now = how & AUTOFS_EXP_IMMEDIATE; 328 int do_now = how & AUTOFS_EXP_IMMEDIATE;
308 int exp_leaves = how & AUTOFS_EXP_LEAVES; 329 int exp_leaves = how & AUTOFS_EXP_LEAVES;
309 struct autofs_info *ino; 330 struct autofs_info *ino;
@@ -315,23 +336,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
315 now = jiffies; 336 now = jiffies;
316 timeout = sbi->exp_timeout; 337 timeout = sbi->exp_timeout;
317 338
318 spin_lock(&dcache_lock); 339 dentry = NULL;
319 next = root->d_subdirs.next; 340 while ((dentry = get_next_positive_dentry(dentry, root))) {
320
321 /* On exit from the loop expire is set to a dgot dentry
322 * to expire or it's NULL */
323 while ( next != &root->d_subdirs ) {
324 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
325
326 /* Negative dentry - give up */
327 if (!simple_positive(dentry)) {
328 next = next->next;
329 continue;
330 }
331
332 dentry = dget(dentry);
333 spin_unlock(&dcache_lock);
334
335 spin_lock(&sbi->fs_lock); 341 spin_lock(&sbi->fs_lock);
336 ino = autofs4_dentry_ino(dentry); 342 ino = autofs4_dentry_ino(dentry);
337 343
@@ -347,7 +353,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
347 353
348 /* Path walk currently on this dentry? */ 354 /* Path walk currently on this dentry? */
349 ino_count = atomic_read(&ino->count) + 2; 355 ino_count = atomic_read(&ino->count) + 2;
350 if (atomic_read(&dentry->d_count) > ino_count) 356 if (dentry->d_count > ino_count)
351 goto next; 357 goto next;
352 358
353 /* Can we umount this guy */ 359 /* Can we umount this guy */
@@ -369,7 +375,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
369 if (!exp_leaves) { 375 if (!exp_leaves) {
370 /* Path walk currently on this dentry? */ 376 /* Path walk currently on this dentry? */
371 ino_count = atomic_read(&ino->count) + 1; 377 ino_count = atomic_read(&ino->count) + 1;
372 if (atomic_read(&dentry->d_count) > ino_count) 378 if (dentry->d_count > ino_count)
373 goto next; 379 goto next;
374 380
375 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 381 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +389,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
383 } else { 389 } else {
384 /* Path walk currently on this dentry? */ 390 /* Path walk currently on this dentry? */
385 ino_count = atomic_read(&ino->count) + 1; 391 ino_count = atomic_read(&ino->count) + 1;
386 if (atomic_read(&dentry->d_count) > ino_count) 392 if (dentry->d_count > ino_count)
387 goto next; 393 goto next;
388 394
389 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 395 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
@@ -394,11 +400,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
394 } 400 }
395next: 401next:
396 spin_unlock(&sbi->fs_lock); 402 spin_unlock(&sbi->fs_lock);
397 dput(dentry);
398 spin_lock(&dcache_lock);
399 next = next->next;
400 } 403 }
401 spin_unlock(&dcache_lock);
402 return NULL; 404 return NULL;
403 405
404found: 406found:
@@ -408,9 +410,13 @@ found:
408 ino->flags |= AUTOFS_INF_EXPIRING; 410 ino->flags |= AUTOFS_INF_EXPIRING;
409 init_completion(&ino->expire_complete); 411 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 412 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 413 spin_lock(&autofs4_lock);
414 spin_lock(&expired->d_parent->d_lock);
415 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
412 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 416 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
413 spin_unlock(&dcache_lock); 417 spin_unlock(&expired->d_lock);
418 spin_unlock(&expired->d_parent->d_lock);
419 spin_unlock(&autofs4_lock);
414 return expired; 420 return expired;
415} 421}
416 422
@@ -499,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
499 505
500 spin_lock(&sbi->fs_lock); 506 spin_lock(&sbi->fs_lock);
501 if (ino->flags & AUTOFS_INF_MOUNTPOINT) { 507 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
502 sb->s_root->d_mounted++; 508 spin_lock(&sb->s_root->d_lock);
509 /*
510 * If we haven't been expired away, then reset
511 * mounted status.
512 */
513 if (mnt->mnt_parent != mnt)
514 sb->s_root->d_flags |= DCACHE_MOUNTED;
515 spin_unlock(&sb->s_root->d_lock);
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 516 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 517 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 518 ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49fa706..a7bdb9dcac84 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
309 goto fail_iput; 309 goto fail_iput;
310 pipe = NULL; 310 pipe = NULL;
311 311
312 root->d_op = &autofs4_sb_dentry_operations; 312 d_set_d_op(root, &autofs4_sb_dentry_operations);
313 root->d_fsdata = ino; 313 root->d_fsdata = ino;
314 314
315 /* Can this call block? */ 315 /* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d34896cfb19f..651e4ef563b1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,6 +23,8 @@
23 23
24#include "autofs_i.h" 24#include "autofs_i.h"
25 25
26DEFINE_SPINLOCK(autofs4_lock);
27
26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 28static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
27static int autofs4_dir_unlink(struct inode *,struct dentry *); 29static int autofs4_dir_unlink(struct inode *,struct dentry *);
28static int autofs4_dir_rmdir(struct inode *,struct dentry *); 30static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -142,12 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
142 * autofs file system so just let the libfs routines handle 144 * autofs file system so just let the libfs routines handle
143 * it. 145 * it.
144 */ 146 */
145 spin_lock(&dcache_lock); 147 spin_lock(&autofs4_lock);
148 spin_lock(&dentry->d_lock);
146 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 149 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
147 spin_unlock(&dcache_lock); 150 spin_unlock(&dentry->d_lock);
151 spin_unlock(&autofs4_lock);
148 return -ENOENT; 152 return -ENOENT;
149 } 153 }
150 spin_unlock(&dcache_lock); 154 spin_unlock(&dentry->d_lock);
155 spin_unlock(&autofs4_lock);
151 156
152out: 157out:
153 return dcache_dir_open(inode, file); 158 return dcache_dir_open(inode, file);
@@ -252,9 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
252 /* We trigger a mount for almost all flags */ 257 /* We trigger a mount for almost all flags */
253 lookup_type = autofs4_need_mount(nd->flags); 258 lookup_type = autofs4_need_mount(nd->flags);
254 spin_lock(&sbi->fs_lock); 259 spin_lock(&sbi->fs_lock);
255 spin_lock(&dcache_lock); 260 spin_lock(&autofs4_lock);
261 spin_lock(&dentry->d_lock);
256 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { 262 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
257 spin_unlock(&dcache_lock); 263 spin_unlock(&dentry->d_lock);
264 spin_unlock(&autofs4_lock);
258 spin_unlock(&sbi->fs_lock); 265 spin_unlock(&sbi->fs_lock);
259 goto follow; 266 goto follow;
260 } 267 }
@@ -266,7 +273,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
266 */ 273 */
267 if (ino->flags & AUTOFS_INF_PENDING || 274 if (ino->flags & AUTOFS_INF_PENDING ||
268 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { 275 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
269 spin_unlock(&dcache_lock); 276 spin_unlock(&dentry->d_lock);
277 spin_unlock(&autofs4_lock);
270 spin_unlock(&sbi->fs_lock); 278 spin_unlock(&sbi->fs_lock);
271 279
272 status = try_to_fill_dentry(dentry, nd->flags); 280 status = try_to_fill_dentry(dentry, nd->flags);
@@ -275,7 +283,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
275 283
276 goto follow; 284 goto follow;
277 } 285 }
278 spin_unlock(&dcache_lock); 286 spin_unlock(&dentry->d_lock);
287 spin_unlock(&autofs4_lock);
279 spin_unlock(&sbi->fs_lock); 288 spin_unlock(&sbi->fs_lock);
280follow: 289follow:
281 /* 290 /*
@@ -306,12 +315,19 @@ out_error:
306 */ 315 */
307static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) 316static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
308{ 317{
309 struct inode *dir = dentry->d_parent->d_inode; 318 struct inode *dir;
310 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 319 struct autofs_sb_info *sbi;
311 int oz_mode = autofs4_oz_mode(sbi); 320 int oz_mode;
312 int flags = nd ? nd->flags : 0; 321 int flags = nd ? nd->flags : 0;
313 int status = 1; 322 int status = 1;
314 323
324 if (flags & LOOKUP_RCU)
325 return -ECHILD;
326
327 dir = dentry->d_parent->d_inode;
328 sbi = autofs4_sbi(dir->i_sb);
329 oz_mode = autofs4_oz_mode(sbi);
330
315 /* Pending dentry */ 331 /* Pending dentry */
316 spin_lock(&sbi->fs_lock); 332 spin_lock(&sbi->fs_lock);
317 if (autofs4_ispending(dentry)) { 333 if (autofs4_ispending(dentry)) {
@@ -346,12 +362,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
346 return 0; 362 return 0;
347 363
348 /* Check for a non-mountpoint directory with no contents */ 364 /* Check for a non-mountpoint directory with no contents */
349 spin_lock(&dcache_lock); 365 spin_lock(&autofs4_lock);
366 spin_lock(&dentry->d_lock);
350 if (S_ISDIR(dentry->d_inode->i_mode) && 367 if (S_ISDIR(dentry->d_inode->i_mode) &&
351 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 368 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
352 DPRINTK("dentry=%p %.*s, emptydir", 369 DPRINTK("dentry=%p %.*s, emptydir",
353 dentry, dentry->d_name.len, dentry->d_name.name); 370 dentry, dentry->d_name.len, dentry->d_name.name);
354 spin_unlock(&dcache_lock); 371 spin_unlock(&dentry->d_lock);
372 spin_unlock(&autofs4_lock);
355 373
356 /* The daemon never causes a mount to trigger */ 374 /* The daemon never causes a mount to trigger */
357 if (oz_mode) 375 if (oz_mode)
@@ -367,7 +385,8 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
367 385
368 return status; 386 return status;
369 } 387 }
370 spin_unlock(&dcache_lock); 388 spin_unlock(&dentry->d_lock);
389 spin_unlock(&autofs4_lock);
371 390
372 return 1; 391 return 1;
373} 392}
@@ -422,7 +441,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
422 const unsigned char *str = name->name; 441 const unsigned char *str = name->name;
423 struct list_head *p, *head; 442 struct list_head *p, *head;
424 443
425 spin_lock(&dcache_lock); 444 spin_lock(&autofs4_lock);
426 spin_lock(&sbi->lookup_lock); 445 spin_lock(&sbi->lookup_lock);
427 head = &sbi->active_list; 446 head = &sbi->active_list;
428 list_for_each(p, head) { 447 list_for_each(p, head) {
@@ -436,7 +455,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
436 spin_lock(&active->d_lock); 455 spin_lock(&active->d_lock);
437 456
438 /* Already gone? */ 457 /* Already gone? */
439 if (atomic_read(&active->d_count) == 0) 458 if (active->d_count == 0)
440 goto next; 459 goto next;
441 460
442 qstr = &active->d_name; 461 qstr = &active->d_name;
@@ -452,17 +471,17 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
452 goto next; 471 goto next;
453 472
454 if (d_unhashed(active)) { 473 if (d_unhashed(active)) {
455 dget(active); 474 dget_dlock(active);
456 spin_unlock(&active->d_lock); 475 spin_unlock(&active->d_lock);
457 spin_unlock(&sbi->lookup_lock); 476 spin_unlock(&sbi->lookup_lock);
458 spin_unlock(&dcache_lock); 477 spin_unlock(&autofs4_lock);
459 return active; 478 return active;
460 } 479 }
461next: 480next:
462 spin_unlock(&active->d_lock); 481 spin_unlock(&active->d_lock);
463 } 482 }
464 spin_unlock(&sbi->lookup_lock); 483 spin_unlock(&sbi->lookup_lock);
465 spin_unlock(&dcache_lock); 484 spin_unlock(&autofs4_lock);
466 485
467 return NULL; 486 return NULL;
468} 487}
@@ -477,7 +496,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
477 const unsigned char *str = name->name; 496 const unsigned char *str = name->name;
478 struct list_head *p, *head; 497 struct list_head *p, *head;
479 498
480 spin_lock(&dcache_lock); 499 spin_lock(&autofs4_lock);
481 spin_lock(&sbi->lookup_lock); 500 spin_lock(&sbi->lookup_lock);
482 head = &sbi->expiring_list; 501 head = &sbi->expiring_list;
483 list_for_each(p, head) { 502 list_for_each(p, head) {
@@ -507,17 +526,17 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
507 goto next; 526 goto next;
508 527
509 if (d_unhashed(expiring)) { 528 if (d_unhashed(expiring)) {
510 dget(expiring); 529 dget_dlock(expiring);
511 spin_unlock(&expiring->d_lock); 530 spin_unlock(&expiring->d_lock);
512 spin_unlock(&sbi->lookup_lock); 531 spin_unlock(&sbi->lookup_lock);
513 spin_unlock(&dcache_lock); 532 spin_unlock(&autofs4_lock);
514 return expiring; 533 return expiring;
515 } 534 }
516next: 535next:
517 spin_unlock(&expiring->d_lock); 536 spin_unlock(&expiring->d_lock);
518 } 537 }
519 spin_unlock(&sbi->lookup_lock); 538 spin_unlock(&sbi->lookup_lock);
520 spin_unlock(&dcache_lock); 539 spin_unlock(&autofs4_lock);
521 540
522 return NULL; 541 return NULL;
523} 542}
@@ -559,7 +578,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
559 * we check for the hashed dentry and return the newly 578 * we check for the hashed dentry and return the newly
560 * hashed dentry. 579 * hashed dentry.
561 */ 580 */
562 dentry->d_op = &autofs4_root_dentry_operations; 581 d_set_d_op(dentry, &autofs4_root_dentry_operations);
563 582
564 /* 583 /*
565 * And we need to ensure that the same dentry is used for 584 * And we need to ensure that the same dentry is used for
@@ -698,9 +717,9 @@ static int autofs4_dir_symlink(struct inode *dir,
698 d_add(dentry, inode); 717 d_add(dentry, inode);
699 718
700 if (dir == dir->i_sb->s_root->d_inode) 719 if (dir == dir->i_sb->s_root->d_inode)
701 dentry->d_op = &autofs4_root_dentry_operations; 720 d_set_d_op(dentry, &autofs4_root_dentry_operations);
702 else 721 else
703 dentry->d_op = &autofs4_dentry_operations; 722 d_set_d_op(dentry, &autofs4_dentry_operations);
704 723
705 dentry->d_fsdata = ino; 724 dentry->d_fsdata = ino;
706 ino->dentry = dget(dentry); 725 ino->dentry = dget(dentry);
@@ -753,12 +772,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
753 772
754 dir->i_mtime = CURRENT_TIME; 773 dir->i_mtime = CURRENT_TIME;
755 774
756 spin_lock(&dcache_lock); 775 spin_lock(&autofs4_lock);
757 autofs4_add_expiring(dentry); 776 autofs4_add_expiring(dentry);
758 spin_lock(&dentry->d_lock); 777 spin_lock(&dentry->d_lock);
759 __d_drop(dentry); 778 __d_drop(dentry);
760 spin_unlock(&dentry->d_lock); 779 spin_unlock(&dentry->d_lock);
761 spin_unlock(&dcache_lock); 780 spin_unlock(&autofs4_lock);
762 781
763 return 0; 782 return 0;
764} 783}
@@ -775,16 +794,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
775 if (!autofs4_oz_mode(sbi)) 794 if (!autofs4_oz_mode(sbi))
776 return -EACCES; 795 return -EACCES;
777 796
778 spin_lock(&dcache_lock); 797 spin_lock(&autofs4_lock);
798 spin_lock(&sbi->lookup_lock);
799 spin_lock(&dentry->d_lock);
779 if (!list_empty(&dentry->d_subdirs)) { 800 if (!list_empty(&dentry->d_subdirs)) {
780 spin_unlock(&dcache_lock); 801 spin_unlock(&dentry->d_lock);
802 spin_unlock(&sbi->lookup_lock);
803 spin_unlock(&autofs4_lock);
781 return -ENOTEMPTY; 804 return -ENOTEMPTY;
782 } 805 }
783 autofs4_add_expiring(dentry); 806 __autofs4_add_expiring(dentry);
784 spin_lock(&dentry->d_lock); 807 spin_unlock(&sbi->lookup_lock);
785 __d_drop(dentry); 808 __d_drop(dentry);
786 spin_unlock(&dentry->d_lock); 809 spin_unlock(&dentry->d_lock);
787 spin_unlock(&dcache_lock); 810 spin_unlock(&autofs4_lock);
788 811
789 if (atomic_dec_and_test(&ino->count)) { 812 if (atomic_dec_and_test(&ino->count)) {
790 p_ino = autofs4_dentry_ino(dentry->d_parent); 813 p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -829,9 +852,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
829 d_add(dentry, inode); 852 d_add(dentry, inode);
830 853
831 if (dir == dir->i_sb->s_root->d_inode) 854 if (dir == dir->i_sb->s_root->d_inode)
832 dentry->d_op = &autofs4_root_dentry_operations; 855 d_set_d_op(dentry, &autofs4_root_dentry_operations);
833 else 856 else
834 dentry->d_op = &autofs4_dentry_operations; 857 d_set_d_op(dentry, &autofs4_dentry_operations);
835 858
836 dentry->d_fsdata = ino; 859 dentry->d_fsdata = ino;
837 ino->dentry = dget(dentry); 860 ino->dentry = dget(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2341375386f8..c5f8459c905e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
186{ 186{
187 struct dentry *root = sbi->sb->s_root; 187 struct dentry *root = sbi->sb->s_root;
188 struct dentry *tmp; 188 struct dentry *tmp;
189 char *buf = *name; 189 char *buf;
190 char *p; 190 char *p;
191 int len = 0; 191 int len;
192 unsigned seq;
192 193
193 spin_lock(&dcache_lock); 194rename_retry:
195 buf = *name;
196 len = 0;
197
198 seq = read_seqbegin(&rename_lock);
199 rcu_read_lock();
200 spin_lock(&autofs4_lock);
194 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
195 len += tmp->d_name.len + 1; 202 len += tmp->d_name.len + 1;
196 203
197 if (!len || --len > NAME_MAX) { 204 if (!len || --len > NAME_MAX) {
198 spin_unlock(&dcache_lock); 205 spin_unlock(&autofs4_lock);
206 rcu_read_unlock();
207 if (read_seqretry(&rename_lock, seq))
208 goto rename_retry;
199 return 0; 209 return 0;
200 } 210 }
201 211
@@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
208 p -= tmp->d_name.len; 218 p -= tmp->d_name.len;
209 strncpy(p, tmp->d_name.name, tmp->d_name.len); 219 strncpy(p, tmp->d_name.name, tmp->d_name.len);
210 } 220 }
211 spin_unlock(&dcache_lock); 221 spin_unlock(&autofs4_lock);
222 rcu_read_unlock();
223 if (read_seqretry(&rename_lock, seq))
224 goto rename_retry;
212 225
213 return len; 226 return len;
214} 227}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f024d8aaddef..9ad2369d9e35 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
229 return -EIO; 229 return -EIO;
230} 230}
231 231
232static int bad_inode_permission(struct inode *inode, int mask) 232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
233{ 233{
234 if (flags & IPERM_FLAG_RCU)
235 return -ECHILD;
236
234 return -EIO; 237 return -EIO;
235} 238}
236 239
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index aa4e7c7ae3c6..de93581b79a2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
284 return &bi->vfs_inode; 284 return &bi->vfs_inode;
285} 285}
286 286
287static void 287static void befs_i_callback(struct rcu_head *head)
288befs_destroy_inode(struct inode *inode)
289{ 288{
289 struct inode *inode = container_of(head, struct inode, i_rcu);
290 INIT_LIST_HEAD(&inode->i_dentry);
290 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 291 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
291} 292}
292 293
294static void befs_destroy_inode(struct inode *inode)
295{
296 call_rcu(&inode->i_rcu, befs_i_callback);
297}
298
293static void init_once(void *foo) 299static void init_once(void *foo)
294{ 300{
295 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 301 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 76db6d7d49bb..a8e37f81d097 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
248 return &bi->vfs_inode; 248 return &bi->vfs_inode;
249} 249}
250 250
251static void bfs_destroy_inode(struct inode *inode) 251static void bfs_i_callback(struct rcu_head *head)
252{ 252{
253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
253 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 255 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
254} 256}
255 257
258static void bfs_destroy_inode(struct inode *inode)
259{
260 call_rcu(&inode->i_rcu, bfs_i_callback);
261}
262
256static void init_once(void *foo) 263static void init_once(void *foo)
257{ 264{
258 struct bfs_inode_info *bi = foo; 265 struct bfs_inode_info *bi = foo;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4230252fd689..771f23527010 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
409 return &ei->vfs_inode; 409 return &ei->vfs_inode;
410} 410}
411 411
412static void bdev_destroy_inode(struct inode *inode) 412static void bdev_i_callback(struct rcu_head *head)
413{ 413{
414 struct inode *inode = container_of(head, struct inode, i_rcu);
414 struct bdev_inode *bdi = BDEV_I(inode); 415 struct bdev_inode *bdi = BDEV_I(inode);
415 416
417 INIT_LIST_HEAD(&inode->i_dentry);
416 kmem_cache_free(bdev_cachep, bdi); 418 kmem_cache_free(bdev_cachep, bdi);
417} 419}
418 420
421static void bdev_destroy_inode(struct inode *inode)
422{
423 call_rcu(&inode->i_rcu, bdev_i_callback);
424}
425
419static void init_once(void *foo) 426static void init_once(void *foo)
420{ 427{
421 struct bdev_inode *ei = (struct bdev_inode *) foo; 428 struct bdev_inode *ei = (struct bdev_inode *) foo;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d161c7b6..6ae2c8cac9d5 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
185 return ret; 185 return ret;
186} 186}
187 187
188int btrfs_check_acl(struct inode *inode, int mask) 188int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
189{ 189{
190 struct posix_acl *acl;
191 int error = -EAGAIN; 190 int error = -EAGAIN;
192 191
193 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 192 if (flags & IPERM_FLAG_RCU) {
193 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
194 error = -ECHILD;
194 195
195 if (IS_ERR(acl)) 196 } else {
196 return PTR_ERR(acl); 197 struct posix_acl *acl;
197 if (acl) { 198 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
198 error = posix_acl_permission(inode, acl, mask); 199 if (IS_ERR(acl))
199 posix_acl_release(acl); 200 return PTR_ERR(acl);
201 if (acl) {
202 error = posix_acl_permission(inode, acl, mask);
203 posix_acl_release(acl);
204 }
200 } 205 }
201 206
202 return error; 207 return error;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af52f6d7a4d8..a142d204b526 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
2544 2544
2545/* acl.c */ 2545/* acl.c */
2546#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2546#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2547int btrfs_check_acl(struct inode *inode, int mask); 2547int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
2548#else 2548#else
2549#define btrfs_check_acl NULL 2549#define btrfs_check_acl NULL
2550#endif 2550#endif
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 659f532d26a0..0ccf9a8afcdf 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
110 110
111 dentry = d_obtain_alias(inode); 111 dentry = d_obtain_alias(inode);
112 if (!IS_ERR(dentry)) 112 if (!IS_ERR(dentry))
113 dentry->d_op = &btrfs_dentry_operations; 113 d_set_d_op(dentry, &btrfs_dentry_operations);
114 return dentry; 114 return dentry;
115fail: 115fail:
116 srcu_read_unlock(&fs_info->subvol_srcu, index); 116 srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
225 key.offset = 0; 225 key.offset = 0;
226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); 226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
227 if (!IS_ERR(dentry)) 227 if (!IS_ERR(dentry))
228 dentry->d_op = &btrfs_dentry_operations; 228 d_set_d_op(dentry, &btrfs_dentry_operations);
229 return dentry; 229 return dentry;
230fail: 230fail:
231 btrfs_free_path(path); 231 btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 72f31ecb5c90..a0ff46a47895 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4084 int index; 4084 int index;
4085 int ret; 4085 int ret;
4086 4086
4087 dentry->d_op = &btrfs_dentry_operations; 4087 d_set_d_op(dentry, &btrfs_dentry_operations);
4088 4088
4089 if (dentry->d_name.len > BTRFS_NAME_LEN) 4089 if (dentry->d_name.len > BTRFS_NAME_LEN)
4090 return ERR_PTR(-ENAMETOOLONG); 4090 return ERR_PTR(-ENAMETOOLONG);
@@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4127 return inode; 4127 return inode;
4128} 4128}
4129 4129
4130static int btrfs_dentry_delete(struct dentry *dentry) 4130static int btrfs_dentry_delete(const struct dentry *dentry)
4131{ 4131{
4132 struct btrfs_root *root; 4132 struct btrfs_root *root;
4133 4133
@@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6495 return inode; 6495 return inode;
6496} 6496}
6497 6497
6498static void btrfs_i_callback(struct rcu_head *head)
6499{
6500 struct inode *inode = container_of(head, struct inode, i_rcu);
6501 INIT_LIST_HEAD(&inode->i_dentry);
6502 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6503}
6504
6498void btrfs_destroy_inode(struct inode *inode) 6505void btrfs_destroy_inode(struct inode *inode)
6499{ 6506{
6500 struct btrfs_ordered_extent *ordered; 6507 struct btrfs_ordered_extent *ordered;
@@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
6564 inode_tree_del(inode); 6571 inode_tree_del(inode);
6565 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6572 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6566free: 6573free:
6567 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6574 call_rcu(&inode->i_rcu, btrfs_i_callback);
6568} 6575}
6569 6576
6570int btrfs_drop_inode(struct inode *inode) 6577int btrfs_drop_inode(struct inode *inode)
@@ -7204,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page)
7204 return __set_page_dirty_nobuffers(page); 7211 return __set_page_dirty_nobuffers(page);
7205} 7212}
7206 7213
7207static int btrfs_permission(struct inode *inode, int mask) 7214static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
7208{ 7215{
7209 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7216 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
7210 return -EACCES; 7217 return -EACCES;
7211 return generic_permission(inode, mask, btrfs_check_acl); 7218 return generic_permission(inode, mask, flags, btrfs_check_acl);
7212} 7219}
7213 7220
7214static const struct inode_operations btrfs_dir_inode_operations = { 7221static const struct inode_operations btrfs_dir_inode_operations = {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d902948a90d8..fa7ca04ee816 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry)
42 42
43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ 43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
45 dentry->d_op = &ceph_dentry_ops; 45 d_set_d_op(dentry, &ceph_dentry_ops);
46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
47 dentry->d_op = &ceph_snapdir_dentry_ops; 47 d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
48 else 48 else
49 dentry->d_op = &ceph_snap_dentry_ops; 49 d_set_d_op(dentry, &ceph_snap_dentry_ops);
50 50
51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
52 if (!di) 52 if (!di)
@@ -112,7 +112,7 @@ static int __dcache_readdir(struct file *filp,
112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, 112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
113 last); 113 last);
114 114
115 spin_lock(&dcache_lock); 115 spin_lock(&parent->d_lock);
116 116
117 /* start at beginning? */ 117 /* start at beginning? */
118 if (filp->f_pos == 2 || last == NULL || 118 if (filp->f_pos == 2 || last == NULL ||
@@ -136,6 +136,7 @@ more:
136 fi->at_end = 1; 136 fi->at_end = 1;
137 goto out_unlock; 137 goto out_unlock;
138 } 138 }
139 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
139 if (!d_unhashed(dentry) && dentry->d_inode && 140 if (!d_unhashed(dentry) && dentry->d_inode &&
140 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 141 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
141 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 142 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -145,13 +146,15 @@ more:
145 dentry->d_name.len, dentry->d_name.name, di->offset, 146 dentry->d_name.len, dentry->d_name.name, di->offset,
146 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", 147 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
147 !dentry->d_inode ? " null" : ""); 148 !dentry->d_inode ? " null" : "");
149 spin_unlock(&dentry->d_lock);
148 p = p->prev; 150 p = p->prev;
149 dentry = list_entry(p, struct dentry, d_u.d_child); 151 dentry = list_entry(p, struct dentry, d_u.d_child);
150 di = ceph_dentry(dentry); 152 di = ceph_dentry(dentry);
151 } 153 }
152 154
153 atomic_inc(&dentry->d_count); 155 dget_dlock(dentry);
154 spin_unlock(&dcache_lock); 156 spin_unlock(&dentry->d_lock);
157 spin_unlock(&parent->d_lock);
155 158
156 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, 159 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
157 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 160 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -177,19 +180,19 @@ more:
177 180
178 filp->f_pos++; 181 filp->f_pos++;
179 182
180 /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ 183 /* make sure a dentry wasn't dropped while we didn't have parent lock */
181 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { 184 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
182 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); 185 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
183 err = -EAGAIN; 186 err = -EAGAIN;
184 goto out; 187 goto out;
185 } 188 }
186 189
187 spin_lock(&dcache_lock); 190 spin_lock(&parent->d_lock);
188 p = p->prev; /* advance to next dentry */ 191 p = p->prev; /* advance to next dentry */
189 goto more; 192 goto more;
190 193
191out_unlock: 194out_unlock:
192 spin_unlock(&dcache_lock); 195 spin_unlock(&parent->d_lock);
193out: 196out:
194 if (last) 197 if (last)
195 dput(last); 198 dput(last);
@@ -987,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
987 */ 990 */
988static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) 991static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
989{ 992{
990 struct inode *dir = dentry->d_parent->d_inode; 993 struct inode *dir;
994
995 if (nd->flags & LOOKUP_RCU)
996 return -ECHILD;
997
998 dir = dentry->d_parent->d_inode;
991 999
992 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 1000 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
993 dentry->d_name.len, dentry->d_name.name, dentry->d_inode, 1001 dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bf1286588f26..e61de4f7b99d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
368 return &ci->vfs_inode; 368 return &ci->vfs_inode;
369} 369}
370 370
371static void ceph_i_callback(struct rcu_head *head)
372{
373 struct inode *inode = container_of(head, struct inode, i_rcu);
374 struct ceph_inode_info *ci = ceph_inode(inode);
375
376 INIT_LIST_HEAD(&inode->i_dentry);
377 kmem_cache_free(ceph_inode_cachep, ci);
378}
379
371void ceph_destroy_inode(struct inode *inode) 380void ceph_destroy_inode(struct inode *inode)
372{ 381{
373 struct ceph_inode_info *ci = ceph_inode(inode); 382 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
407 if (ci->i_xattrs.prealloc_blob) 416 if (ci->i_xattrs.prealloc_blob)
408 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 417 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
409 418
410 kmem_cache_free(ceph_inode_cachep, ci); 419 call_rcu(&inode->i_rcu, ceph_i_callback);
411} 420}
412 421
413 422
@@ -841,13 +850,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
841 di->offset = ceph_inode(inode)->i_max_offset++; 850 di->offset = ceph_inode(inode)->i_max_offset++;
842 spin_unlock(&inode->i_lock); 851 spin_unlock(&inode->i_lock);
843 852
844 spin_lock(&dcache_lock); 853 spin_lock(&dir->d_lock);
845 spin_lock(&dn->d_lock); 854 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
846 list_move(&dn->d_u.d_child, &dir->d_subdirs); 855 list_move(&dn->d_u.d_child, &dir->d_subdirs);
847 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 856 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
848 dn->d_u.d_child.prev, dn->d_u.d_child.next); 857 dn->d_u.d_child.prev, dn->d_u.d_child.next);
849 spin_unlock(&dn->d_lock); 858 spin_unlock(&dn->d_lock);
850 spin_unlock(&dcache_lock); 859 spin_unlock(&dir->d_lock);
851} 860}
852 861
853/* 862/*
@@ -879,8 +888,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
879 } else if (realdn) { 888 } else if (realdn) {
880 dout("dn %p (%d) spliced with %p (%d) " 889 dout("dn %p (%d) spliced with %p (%d) "
881 "inode %p ino %llx.%llx\n", 890 "inode %p ino %llx.%llx\n",
882 dn, atomic_read(&dn->d_count), 891 dn, dn->d_count,
883 realdn, atomic_read(&realdn->d_count), 892 realdn, realdn->d_count,
884 realdn->d_inode, ceph_vinop(realdn->d_inode)); 893 realdn->d_inode, ceph_vinop(realdn->d_inode));
885 dput(dn); 894 dput(dn);
886 dn = realdn; 895 dn = realdn;
@@ -1231,11 +1240,11 @@ retry_lookup:
1231 goto retry_lookup; 1240 goto retry_lookup;
1232 } else { 1241 } else {
1233 /* reorder parent's d_subdirs */ 1242 /* reorder parent's d_subdirs */
1234 spin_lock(&dcache_lock); 1243 spin_lock(&parent->d_lock);
1235 spin_lock(&dn->d_lock); 1244 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
1236 list_move(&dn->d_u.d_child, &parent->d_subdirs); 1245 list_move(&dn->d_u.d_child, &parent->d_subdirs);
1237 spin_unlock(&dn->d_lock); 1246 spin_unlock(&dn->d_lock);
1238 spin_unlock(&dcache_lock); 1247 spin_unlock(&parent->d_lock);
1239 } 1248 }
1240 1249
1241 di = dn->d_fsdata; 1250 di = dn->d_fsdata;
@@ -1772,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
1772 * Check inode permissions. We verify we have a valid value for 1781 * Check inode permissions. We verify we have a valid value for
1773 * the AUTH cap, then call the generic handler. 1782 * the AUTH cap, then call the generic handler.
1774 */ 1783 */
1775int ceph_permission(struct inode *inode, int mask) 1784int ceph_permission(struct inode *inode, int mask, unsigned int flags)
1776{ 1785{
1777 int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1786 int err;
1787
1788 if (flags & IPERM_FLAG_RCU)
1789 return -ECHILD;
1790
1791 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
1778 1792
1779 if (!err) 1793 if (!err)
1780 err = generic_permission(inode, mask, NULL); 1794 err = generic_permission(inode, mask, flags, NULL);
1781 return err; 1795 return err;
1782} 1796}
1783 1797
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 38800eaa81d0..a50fca1e03be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1486,7 +1486,7 @@ retry:
1486 *base = ceph_ino(temp->d_inode); 1486 *base = ceph_ino(temp->d_inode);
1487 *plen = len; 1487 *plen = len;
1488 dout("build_path on %p %d built %llx '%.*s'\n", 1488 dout("build_path on %p %d built %llx '%.*s'\n",
1489 dentry, atomic_read(&dentry->d_count), *base, len, path); 1489 dentry, dentry->d_count, *base, len, path);
1490 return path; 1490 return path;
1491} 1491}
1492 1492
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7f01728a4657..4553d8829edb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
665extern void ceph_queue_writeback(struct inode *inode); 665extern void ceph_queue_writeback(struct inode *inode);
666 666
667extern int ceph_do_getattr(struct inode *inode, int mask); 667extern int ceph_do_getattr(struct inode *inode, int mask);
668extern int ceph_permission(struct inode *inode, int mask); 668extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
669extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 669extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
670extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 670extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
671 struct kstat *stat); 671 struct kstat *stat);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3936aa7f2c22..8e21e0fe65d5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
283 return 0; 283 return 0;
284} 284}
285 285
286static int cifs_permission(struct inode *inode, int mask) 286static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
287{ 287{
288 struct cifs_sb_info *cifs_sb; 288 struct cifs_sb_info *cifs_sb;
289 289
290 if (flags & IPERM_FLAG_RCU)
291 return -ECHILD;
292
290 cifs_sb = CIFS_SB(inode->i_sb); 293 cifs_sb = CIFS_SB(inode->i_sb);
291 294
292 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { 295 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask)
298 on the client (above and beyond ACL on servers) for 301 on the client (above and beyond ACL on servers) for
299 servers which do not support setting and viewing mode bits, 302 servers which do not support setting and viewing mode bits,
300 so allowing client to check permissions is useful */ 303 so allowing client to check permissions is useful */
301 return generic_permission(inode, mask, NULL); 304 return generic_permission(inode, mask, flags, NULL);
302} 305}
303 306
304static struct kmem_cache *cifs_inode_cachep; 307static struct kmem_cache *cifs_inode_cachep;
@@ -334,10 +337,17 @@ cifs_alloc_inode(struct super_block *sb)
334 return &cifs_inode->vfs_inode; 337 return &cifs_inode->vfs_inode;
335} 338}
336 339
340static void cifs_i_callback(struct rcu_head *head)
341{
342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
345}
346
337static void 347static void
338cifs_destroy_inode(struct inode *inode) 348cifs_destroy_inode(struct inode *inode)
339{ 349{
340 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); 350 call_rcu(&inode->i_rcu, cifs_i_callback);
341} 351}
342 352
343static void 353static void
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3840eddbfb7a..db2a58c00f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
135 struct inode *newinode) 135 struct inode *newinode)
136{ 136{
137 if (tcon->nocase) 137 if (tcon->nocase)
138 direntry->d_op = &cifs_ci_dentry_ops; 138 d_set_d_op(direntry, &cifs_ci_dentry_ops);
139 else 139 else
140 direntry->d_op = &cifs_dentry_ops; 140 d_set_d_op(direntry, &cifs_dentry_ops);
141 d_instantiate(direntry, newinode); 141 d_instantiate(direntry, newinode);
142} 142}
143 143
@@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
421 rc = cifs_get_inode_info_unix(&newinode, full_path, 421 rc = cifs_get_inode_info_unix(&newinode, full_path,
422 inode->i_sb, xid); 422 inode->i_sb, xid);
423 if (pTcon->nocase) 423 if (pTcon->nocase)
424 direntry->d_op = &cifs_ci_dentry_ops; 424 d_set_d_op(direntry, &cifs_ci_dentry_ops);
425 else 425 else
426 direntry->d_op = &cifs_dentry_ops; 426 d_set_d_op(direntry, &cifs_dentry_ops);
427 427
428 if (rc == 0) 428 if (rc == 0)
429 d_instantiate(direntry, newinode); 429 d_instantiate(direntry, newinode);
@@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
604 604
605 if ((rc == 0) && (newInode != NULL)) { 605 if ((rc == 0) && (newInode != NULL)) {
606 if (pTcon->nocase) 606 if (pTcon->nocase)
607 direntry->d_op = &cifs_ci_dentry_ops; 607 d_set_d_op(direntry, &cifs_ci_dentry_ops);
608 else 608 else
609 direntry->d_op = &cifs_dentry_ops; 609 d_set_d_op(direntry, &cifs_dentry_ops);
610 d_add(direntry, newInode); 610 d_add(direntry, newInode);
611 if (posix_open) { 611 if (posix_open) {
612 filp = lookup_instantiate_filp(nd, direntry, 612 filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
634 rc = 0; 634 rc = 0;
635 direntry->d_time = jiffies; 635 direntry->d_time = jiffies;
636 if (pTcon->nocase) 636 if (pTcon->nocase)
637 direntry->d_op = &cifs_ci_dentry_ops; 637 d_set_d_op(direntry, &cifs_ci_dentry_ops);
638 else 638 else
639 direntry->d_op = &cifs_dentry_ops; 639 d_set_d_op(direntry, &cifs_dentry_ops);
640 d_add(direntry, NULL); 640 d_add(direntry, NULL);
641 /* if it was once a directory (but how can we tell?) we could do 641 /* if it was once a directory (but how can we tell?) we could do
642 shrink_dcache_parent(direntry); */ 642 shrink_dcache_parent(direntry); */
@@ -656,22 +656,37 @@ lookup_out:
656static int 656static int
657cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) 657cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
658{ 658{
659 int isValid = 1; 659 if (nd->flags & LOOKUP_RCU)
660 return -ECHILD;
660 661
661 if (direntry->d_inode) { 662 if (direntry->d_inode) {
662 if (cifs_revalidate_dentry(direntry)) 663 if (cifs_revalidate_dentry(direntry))
663 return 0; 664 return 0;
664 } else { 665 else
665 cFYI(1, "neg dentry 0x%p name = %s", 666 return 1;
666 direntry, direntry->d_name.name);
667 if (time_after(jiffies, direntry->d_time + HZ) ||
668 !lookupCacheEnabled) {
669 d_drop(direntry);
670 isValid = 0;
671 }
672 } 667 }
673 668
674 return isValid; 669 /*
670 * This may be nfsd (or something), anyway, we can't see the
671 * intent of this. So, since this can be for creation, drop it.
672 */
673 if (!nd)
674 return 0;
675
676 /*
677 * Drop the negative dentry, in order to make sure to use the
678 * case sensitive name which is specified by user if this is
679 * for creation.
680 */
681 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
682 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
683 return 0;
684 }
685
686 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
687 return 0;
688
689 return 1;
675} 690}
676 691
677/* static int cifs_d_delete(struct dentry *direntry) 692/* static int cifs_d_delete(struct dentry *direntry)
@@ -688,9 +703,10 @@ const struct dentry_operations cifs_dentry_ops = {
688/* d_delete: cifs_d_delete, */ /* not needed except for debugging */ 703/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
689}; 704};
690 705
691static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) 706static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
707 struct qstr *q)
692{ 708{
693 struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; 709 struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
694 unsigned long hash; 710 unsigned long hash;
695 int i; 711 int i;
696 712
@@ -703,21 +719,16 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
703 return 0; 719 return 0;
704} 720}
705 721
706static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, 722static int cifs_ci_compare(const struct dentry *parent,
707 struct qstr *b) 723 const struct inode *pinode,
724 const struct dentry *dentry, const struct inode *inode,
725 unsigned int len, const char *str, const struct qstr *name)
708{ 726{
709 struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; 727 struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
710 728
711 if ((a->len == b->len) && 729 if ((name->len == len) &&
712 (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) { 730 (nls_strnicmp(codepage, name->name, str, len) == 0))
713 /*
714 * To preserve case, don't let an existing negative dentry's
715 * case take precedence. If a is not a negative dentry, this
716 * should have no side effects
717 */
718 memcpy((void *)a->name, b->name, a->len);
719 return 0; 731 return 0;
720 }
721 return 1; 732 return 1;
722} 733}
723 734
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 589f3e3f6e00..a853a89857a5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -809,14 +809,14 @@ inode_has_hashed_dentries(struct inode *inode)
809{ 809{
810 struct dentry *dentry; 810 struct dentry *dentry;
811 811
812 spin_lock(&dcache_lock); 812 spin_lock(&inode->i_lock);
813 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 813 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
814 if (!d_unhashed(dentry) || IS_ROOT(dentry)) { 814 if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
815 spin_unlock(&dcache_lock); 815 spin_unlock(&inode->i_lock);
816 return true; 816 return true;
817 } 817 }
818 } 818 }
819 spin_unlock(&dcache_lock); 819 spin_unlock(&inode->i_lock);
820 return false; 820 return false;
821} 821}
822 822
@@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1319 to set uid/gid */ 1319 to set uid/gid */
1320 inc_nlink(inode); 1320 inc_nlink(inode);
1321 if (pTcon->nocase) 1321 if (pTcon->nocase)
1322 direntry->d_op = &cifs_ci_dentry_ops; 1322 d_set_d_op(direntry, &cifs_ci_dentry_ops);
1323 else 1323 else
1324 direntry->d_op = &cifs_dentry_ops; 1324 d_set_d_op(direntry, &cifs_dentry_ops);
1325 1325
1326 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1326 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1327 cifs_fill_uniqueid(inode->i_sb, &fattr); 1327 cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1363,9 +1363,9 @@ mkdir_get_info:
1363 inode->i_sb, xid, NULL); 1363 inode->i_sb, xid, NULL);
1364 1364
1365 if (pTcon->nocase) 1365 if (pTcon->nocase)
1366 direntry->d_op = &cifs_ci_dentry_ops; 1366 d_set_d_op(direntry, &cifs_ci_dentry_ops);
1367 else 1367 else
1368 direntry->d_op = &cifs_dentry_ops; 1368 d_set_d_op(direntry, &cifs_dentry_ops);
1369 d_instantiate(direntry, newinode); 1369 d_instantiate(direntry, newinode);
1370 /* setting nlink not necessary except in cases where we 1370 /* setting nlink not necessary except in cases where we
1371 * failed to get it from the server or was set bogus */ 1371 * failed to get it from the server or was set bogus */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf831e7b..fe2f6a93c49e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
525 rc); 525 rc);
526 } else { 526 } else {
527 if (pTcon->nocase) 527 if (pTcon->nocase)
528 direntry->d_op = &cifs_ci_dentry_ops; 528 d_set_d_op(direntry, &cifs_ci_dentry_ops);
529 else 529 else
530 direntry->d_op = &cifs_dentry_ops; 530 d_set_d_op(direntry, &cifs_dentry_ops);
531 d_instantiate(direntry, newinode); 531 d_instantiate(direntry, newinode);
532 } 532 }
533 } 533 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a73eb9f4bdaf..ec5b68e3b928 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
79 cFYI(1, "For %s", name->name); 79 cFYI(1, "For %s", name->name);
80 80
81 if (parent->d_op && parent->d_op->d_hash) 81 if (parent->d_op && parent->d_op->d_hash)
82 parent->d_op->d_hash(parent, name); 82 parent->d_op->d_hash(parent, parent->d_inode, name);
83 else 83 else
84 name->hash = full_name_hash(name->name, name->len); 84 name->hash = full_name_hash(name->name, name->len);
85 85
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
103 } 103 }
104 104
105 if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) 105 if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
106 dentry->d_op = &cifs_ci_dentry_ops; 106 d_set_d_op(dentry, &cifs_ci_dentry_ops);
107 else 107 else
108 dentry->d_op = &cifs_dentry_ops; 108 d_set_d_op(dentry, &cifs_dentry_ops);
109 109
110 alias = d_materialise_unique(dentry, inode); 110 alias = d_materialise_unique(dentry, inode);
111 if (alias != NULL) { 111 if (alias != NULL) {
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 9060f08e70cf..5525e1c660fd 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
93 struct list_head *child; 93 struct list_head *child;
94 struct dentry *de; 94 struct dentry *de;
95 95
96 spin_lock(&dcache_lock); 96 spin_lock(&parent->d_lock);
97 list_for_each(child, &parent->d_subdirs) 97 list_for_each(child, &parent->d_subdirs)
98 { 98 {
99 de = list_entry(child, struct dentry, d_u.d_child); 99 de = list_entry(child, struct dentry, d_u.d_child);
@@ -102,7 +102,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
102 continue; 102 continue;
103 coda_flag_inode(de->d_inode, flag); 103 coda_flag_inode(de->d_inode, flag);
104 } 104 }
105 spin_unlock(&dcache_lock); 105 spin_unlock(&parent->d_lock);
106 return; 106 return;
107} 107}
108 108
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b35539601..29badd91360f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -18,6 +18,7 @@
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/namei.h>
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
@@ -47,7 +48,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
47 48
48/* dentry ops */ 49/* dentry ops */
49static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); 50static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
50static int coda_dentry_delete(struct dentry *); 51static int coda_dentry_delete(const struct dentry *);
51 52
52/* support routines */ 53/* support routines */
53static int coda_venus_readdir(struct file *coda_file, void *buf, 54static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -125,7 +126,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
125 return ERR_PTR(error); 126 return ERR_PTR(error);
126 127
127exit: 128exit:
128 entry->d_op = &coda_dentry_operations; 129 d_set_d_op(entry, &coda_dentry_operations);
129 130
130 if (inode && (type & CODA_NOCACHE)) 131 if (inode && (type & CODA_NOCACHE))
131 coda_flag_inode(inode, C_VATTR | C_PURGE); 132 coda_flag_inode(inode, C_VATTR | C_PURGE);
@@ -134,10 +135,13 @@ exit:
134} 135}
135 136
136 137
137int coda_permission(struct inode *inode, int mask) 138int coda_permission(struct inode *inode, int mask, unsigned int flags)
138{ 139{
139 int error; 140 int error;
140 141
142 if (flags & IPERM_FLAG_RCU)
143 return -ECHILD;
144
141 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 145 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
142 146
143 if (!mask) 147 if (!mask)
@@ -541,9 +545,13 @@ out:
541/* called when a cache lookup succeeds */ 545/* called when a cache lookup succeeds */
542static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) 546static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
543{ 547{
544 struct inode *inode = de->d_inode; 548 struct inode *inode;
545 struct coda_inode_info *cii; 549 struct coda_inode_info *cii;
546 550
551 if (nd->flags & LOOKUP_RCU)
552 return -ECHILD;
553
554 inode = de->d_inode;
547 if (!inode || coda_isroot(inode)) 555 if (!inode || coda_isroot(inode))
548 goto out; 556 goto out;
549 if (is_bad_inode(inode)) 557 if (is_bad_inode(inode))
@@ -559,7 +567,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
559 if (cii->c_flags & C_FLUSH) 567 if (cii->c_flags & C_FLUSH)
560 coda_flag_inode_children(inode, C_FLUSH); 568 coda_flag_inode_children(inode, C_FLUSH);
561 569
562 if (atomic_read(&de->d_count) > 1) 570 if (de->d_count > 1)
563 /* pretend it's valid, but don't change the flags */ 571 /* pretend it's valid, but don't change the flags */
564 goto out; 572 goto out;
565 573
@@ -577,7 +585,7 @@ out:
577 * This is the callback from dput() when d_count is going to 0. 585 * This is the callback from dput() when d_count is going to 0.
578 * We use this to unhash dentries with bad inodes. 586 * We use this to unhash dentries with bad inodes.
579 */ 587 */
580static int coda_dentry_delete(struct dentry * dentry) 588static int coda_dentry_delete(const struct dentry * dentry)
581{ 589{
582 int flags; 590 int flags;
583 591
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5ea57c8c7f97..50dc7d189f56 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
56 return &ei->vfs_inode; 56 return &ei->vfs_inode;
57} 57}
58 58
59static void coda_destroy_inode(struct inode *inode) 59static void coda_i_callback(struct rcu_head *head)
60{ 60{
61 struct inode *inode = container_of(head, struct inode, i_rcu);
62 INIT_LIST_HEAD(&inode->i_dentry);
61 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 63 kmem_cache_free(coda_inode_cachep, ITOC(inode));
62} 64}
63 65
66static void coda_destroy_inode(struct inode *inode)
67{
68 call_rcu(&inode->i_rcu, coda_i_callback);
69}
70
64static void init_once(void *foo) 71static void init_once(void *foo)
65{ 72{
66 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 73 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 2fd89b5c5c7b..741f0bd03918 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask); 27static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
28static long coda_pioctl(struct file *filp, unsigned int cmd, 28static long coda_pioctl(struct file *filp, unsigned int cmd,
29 unsigned long user_data); 29 unsigned long user_data);
30 30
@@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = {
41}; 41};
42 42
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask) 44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
45{ 45{
46 if (flags & IPERM_FLAG_RCU)
47 return -ECHILD;
46 return (mask & MAY_EXEC) ? -EACCES : 0; 48 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 49}
48 50
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da6061a6df40..026cf68553a4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
120{ 120{
121 struct config_item * item = NULL; 121 struct config_item * item = NULL;
122 122
123 spin_lock(&dcache_lock); 123 spin_lock(&dentry->d_lock);
124 if (!d_unhashed(dentry)) { 124 if (!d_unhashed(dentry)) {
125 struct configfs_dirent * sd = dentry->d_fsdata; 125 struct configfs_dirent * sd = dentry->d_fsdata;
126 if (sd->s_type & CONFIGFS_ITEM_LINK) { 126 if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
129 } else 129 } else
130 item = config_item_get(sd->s_element); 130 item = config_item_get(sd->s_element);
131 } 131 }
132 spin_unlock(&dcache_lock); 132 spin_unlock(&dentry->d_lock);
133 133
134 return item; 134 return item;
135} 135}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0b502f80c691..36637a8c1ed3 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
67 * We _must_ delete our dentries on last dput, as the chain-to-parent 67 * We _must_ delete our dentries on last dput, as the chain-to-parent
68 * behavior is required to clear the parents of default_groups. 68 * behavior is required to clear the parents of default_groups.
69 */ 69 */
70static int configfs_d_delete(struct dentry *dentry) 70static int configfs_d_delete(const struct dentry *dentry)
71{ 71{
72 return 1; 72 return 1;
73} 73}
@@ -232,10 +232,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
232 232
233 sd->s_mode = mode; 233 sd->s_mode = mode;
234 sd->s_dentry = dentry; 234 sd->s_dentry = dentry;
235 if (dentry) { 235 if (dentry)
236 dentry->d_fsdata = configfs_get(sd); 236 dentry->d_fsdata = configfs_get(sd);
237 dentry->d_op = &configfs_dentry_ops;
238 }
239 237
240 return 0; 238 return 0;
241} 239}
@@ -278,7 +276,6 @@ static int create_dir(struct config_item * k, struct dentry * p,
278 error = configfs_create(d, mode, init_dir); 276 error = configfs_create(d, mode, init_dir);
279 if (!error) { 277 if (!error) {
280 inc_nlink(p->d_inode); 278 inc_nlink(p->d_inode);
281 (d)->d_op = &configfs_dentry_ops;
282 } else { 279 } else {
283 struct configfs_dirent *sd = d->d_fsdata; 280 struct configfs_dirent *sd = d->d_fsdata;
284 if (sd) { 281 if (sd) {
@@ -371,9 +368,7 @@ int configfs_create_link(struct configfs_symlink *sl,
371 CONFIGFS_ITEM_LINK); 368 CONFIGFS_ITEM_LINK);
372 if (!err) { 369 if (!err) {
373 err = configfs_create(dentry, mode, init_symlink); 370 err = configfs_create(dentry, mode, init_symlink);
374 if (!err) 371 if (err) {
375 dentry->d_op = &configfs_dentry_ops;
376 else {
377 struct configfs_dirent *sd = dentry->d_fsdata; 372 struct configfs_dirent *sd = dentry->d_fsdata;
378 if (sd) { 373 if (sd) {
379 spin_lock(&configfs_dirent_lock); 374 spin_lock(&configfs_dirent_lock);
@@ -399,8 +394,7 @@ static void remove_dir(struct dentry * d)
399 if (d->d_inode) 394 if (d->d_inode)
400 simple_rmdir(parent->d_inode,d); 395 simple_rmdir(parent->d_inode,d);
401 396
402 pr_debug(" o %s removing done (%d)\n",d->d_name.name, 397 pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
403 atomic_read(&d->d_count));
404 398
405 dput(parent); 399 dput(parent);
406} 400}
@@ -448,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
448 return error; 442 return error;
449 } 443 }
450 444
451 dentry->d_op = &configfs_dentry_ops; 445 d_set_d_op(dentry, &configfs_dentry_ops);
452 d_rehash(dentry); 446 d_rehash(dentry);
453 447
454 return 0; 448 return 0;
@@ -493,7 +487,11 @@ static struct dentry * configfs_lookup(struct inode *dir,
493 * If it doesn't exist and it isn't a NOT_PINNED item, 487 * If it doesn't exist and it isn't a NOT_PINNED item,
494 * it must be negative. 488 * it must be negative.
495 */ 489 */
496 return simple_lookup(dir, dentry, nd); 490 if (dentry->d_name.len > NAME_MAX)
491 return ERR_PTR(-ENAMETOOLONG);
492 d_set_d_op(dentry, &configfs_dentry_ops);
493 d_add(dentry, NULL);
494 return NULL;
497 } 495 }
498 496
499out: 497out:
@@ -685,6 +683,7 @@ static int create_default_group(struct config_group *parent_group,
685 ret = -ENOMEM; 683 ret = -ENOMEM;
686 child = d_alloc(parent, &name); 684 child = d_alloc(parent, &name);
687 if (child) { 685 if (child) {
686 d_set_d_op(child, &configfs_dentry_ops);
688 d_add(child, NULL); 687 d_add(child, NULL);
689 688
690 ret = configfs_attach_group(&parent_group->cg_item, 689 ret = configfs_attach_group(&parent_group->cg_item,
@@ -1682,6 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1682 err = -ENOMEM; 1681 err = -ENOMEM;
1683 dentry = d_alloc(configfs_sb->s_root, &name); 1682 dentry = d_alloc(configfs_sb->s_root, &name);
1684 if (dentry) { 1683 if (dentry) {
1684 d_set_d_op(dentry, &configfs_dentry_ops);
1685 d_add(dentry, NULL); 1685 d_add(dentry, NULL);
1686 1686
1687 err = configfs_attach_group(sd->s_element, &group->cg_item, 1687 err = configfs_attach_group(sd->s_element, &group->cg_item,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d78ed8..c83f4768eeaa 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
250 struct dentry * dentry = sd->s_dentry; 250 struct dentry * dentry = sd->s_dentry;
251 251
252 if (dentry) { 252 if (dentry) {
253 spin_lock(&dcache_lock);
254 spin_lock(&dentry->d_lock); 253 spin_lock(&dentry->d_lock);
255 if (!(d_unhashed(dentry) && dentry->d_inode)) { 254 if (!(d_unhashed(dentry) && dentry->d_inode)) {
256 dget_locked(dentry); 255 dget_dlock(dentry);
257 __d_drop(dentry); 256 __d_drop(dentry);
258 spin_unlock(&dentry->d_lock); 257 spin_unlock(&dentry->d_lock);
259 spin_unlock(&dcache_lock);
260 simple_unlink(parent->d_inode, dentry); 258 simple_unlink(parent->d_inode, dentry);
261 } else { 259 } else
262 spin_unlock(&dentry->d_lock); 260 spin_unlock(&dentry->d_lock);
263 spin_unlock(&dcache_lock);
264 }
265 } 261 }
266} 262}
267 263
diff --git a/fs/dcache.c b/fs/dcache.c
index 23702a9d4e6d..5699d4c027cb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,58 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h>
36#include "internal.h" 38#include "internal.h"
37 39
40/*
41 * Usage:
42 * dcache->d_inode->i_lock protects:
43 * - i_dentry, d_alias, d_inode of aliases
44 * dcache_hash_bucket lock protects:
45 * - the dcache hash table
46 * s_anon bl list spinlock protects:
47 * - the s_anon list (see __d_drop)
48 * dcache_lru_lock protects:
49 * - the dcache lru lists and counters
50 * d_lock protects:
51 * - d_flags
52 * - d_name
53 * - d_lru
54 * - d_count
55 * - d_unhashed()
56 * - d_parent and d_subdirs
57 * - childrens' d_child and d_parent
58 * - d_alias, d_inode
59 *
60 * Ordering:
61 * dentry->d_inode->i_lock
62 * dentry->d_lock
63 * dcache_lru_lock
64 * dcache_hash_bucket lock
65 * s_anon lock
66 *
67 * If there is an ancestor relationship:
68 * dentry->d_parent->...->d_parent->d_lock
69 * ...
70 * dentry->d_parent->d_lock
71 * dentry->d_lock
72 *
73 * If no ancestor relationship:
74 * if (dentry1 < dentry2)
75 * dentry1->d_lock
76 * dentry2->d_lock
77 */
38int sysctl_vfs_cache_pressure __read_mostly = 100; 78int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 79EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 80
41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 82__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
43 83
44EXPORT_SYMBOL(dcache_lock); 84EXPORT_SYMBOL(rename_lock);
45 85
46static struct kmem_cache *dentry_cache __read_mostly; 86static struct kmem_cache *dentry_cache __read_mostly;
47 87
48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
49
50/* 88/*
51 * This is the single most critical data structure when it comes 89 * This is the single most critical data structure when it comes
52 * to the dcache: the hashtable for lookups. Somebody should try 90 * to the dcache: the hashtable for lookups. Somebody should try
@@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly;
60 98
61static unsigned int d_hash_mask __read_mostly; 99static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 100static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 101
102struct dcache_hash_bucket {
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
108 unsigned long hash)
109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK);
113}
114
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
64 124
65/* Statistics gathering. */ 125/* Statistics gathering. */
66struct dentry_stat_t dentry_stat = { 126struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 127 .age_limit = 45,
68}; 128};
69 129
70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; 130static DEFINE_PER_CPU(unsigned int, nr_dentry);
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72 131
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 132#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
133static int get_nr_dentry(void)
134{
135 int i;
136 int sum = 0;
137 for_each_possible_cpu(i)
138 sum += per_cpu(nr_dentry, i);
139 return sum < 0 ? 0 : sum;
140}
141
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 142int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos) 143 size_t *lenp, loff_t *ppos)
76{ 144{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); 145 dentry_stat.nr_dentry = get_nr_dentry();
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos); 146 return proc_dointvec(table, write, buffer, lenp, ppos);
80} 147}
81#endif 148#endif
@@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head)
91} 158}
92 159
93/* 160/*
94 * no dcache_lock, please. 161 * no locks, please.
95 */ 162 */
96static void d_free(struct dentry *dentry) 163static void d_free(struct dentry *dentry)
97{ 164{
98 percpu_counter_dec(&nr_dentry); 165 BUG_ON(dentry->d_count);
166 this_cpu_dec(nr_dentry);
99 if (dentry->d_op && dentry->d_op->d_release) 167 if (dentry->d_op && dentry->d_op->d_release)
100 dentry->d_op->d_release(dentry); 168 dentry->d_op->d_release(dentry);
101 169
102 /* if dentry was never inserted into hash, immediate free is OK */ 170 /* if dentry was never inserted into hash, immediate free is OK */
103 if (hlist_unhashed(&dentry->d_hash)) 171 if (hlist_bl_unhashed(&dentry->d_hash))
104 __d_free(&dentry->d_u.d_rcu); 172 __d_free(&dentry->d_u.d_rcu);
105 else 173 else
106 call_rcu(&dentry->d_u.d_rcu, __d_free); 174 call_rcu(&dentry->d_u.d_rcu, __d_free);
107} 175}
108 176
177/**
178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
179 * After this call, in-progress rcu-walk path lookup will fail. This
180 * should be called after unhashing, and after changing d_inode (if
181 * the dentry has not already been unhashed).
182 */
183static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
184{
185 assert_spin_locked(&dentry->d_lock);
186 /* Go through a barrier */
187 write_seqcount_barrier(&dentry->d_seq);
188}
189
109/* 190/*
110 * Release the dentry's inode, using the filesystem 191 * Release the dentry's inode, using the filesystem
111 * d_iput() operation if defined. 192 * d_iput() operation if defined. Dentry has no refcount
193 * and is unhashed.
112 */ 194 */
113static void dentry_iput(struct dentry * dentry) 195static void dentry_iput(struct dentry * dentry)
114 __releases(dentry->d_lock) 196 __releases(dentry->d_lock)
115 __releases(dcache_lock) 197 __releases(dentry->d_inode->i_lock)
116{ 198{
117 struct inode *inode = dentry->d_inode; 199 struct inode *inode = dentry->d_inode;
118 if (inode) { 200 if (inode) {
119 dentry->d_inode = NULL; 201 dentry->d_inode = NULL;
120 list_del_init(&dentry->d_alias); 202 list_del_init(&dentry->d_alias);
121 spin_unlock(&dentry->d_lock); 203 spin_unlock(&dentry->d_lock);
122 spin_unlock(&dcache_lock); 204 spin_unlock(&inode->i_lock);
123 if (!inode->i_nlink) 205 if (!inode->i_nlink)
124 fsnotify_inoderemove(inode); 206 fsnotify_inoderemove(inode);
125 if (dentry->d_op && dentry->d_op->d_iput) 207 if (dentry->d_op && dentry->d_op->d_iput)
@@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry)
128 iput(inode); 210 iput(inode);
129 } else { 211 } else {
130 spin_unlock(&dentry->d_lock); 212 spin_unlock(&dentry->d_lock);
131 spin_unlock(&dcache_lock);
132 } 213 }
133} 214}
134 215
135/* 216/*
136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. 217 * Release the dentry's inode, using the filesystem
218 * d_iput() operation if defined. dentry remains in-use.
219 */
220static void dentry_unlink_inode(struct dentry * dentry)
221 __releases(dentry->d_lock)
222 __releases(dentry->d_inode->i_lock)
223{
224 struct inode *inode = dentry->d_inode;
225 dentry->d_inode = NULL;
226 list_del_init(&dentry->d_alias);
227 dentry_rcuwalk_barrier(dentry);
228 spin_unlock(&dentry->d_lock);
229 spin_unlock(&inode->i_lock);
230 if (!inode->i_nlink)
231 fsnotify_inoderemove(inode);
232 if (dentry->d_op && dentry->d_op->d_iput)
233 dentry->d_op->d_iput(dentry, inode);
234 else
235 iput(inode);
236}
237
238/*
239 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
137 */ 240 */
138static void dentry_lru_add(struct dentry *dentry) 241static void dentry_lru_add(struct dentry *dentry)
139{ 242{
140 if (list_empty(&dentry->d_lru)) { 243 if (list_empty(&dentry->d_lru)) {
244 spin_lock(&dcache_lru_lock);
141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 245 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
142 dentry->d_sb->s_nr_dentry_unused++; 246 dentry->d_sb->s_nr_dentry_unused++;
143 percpu_counter_inc(&nr_dentry_unused); 247 dentry_stat.nr_unused++;
248 spin_unlock(&dcache_lru_lock);
144 } 249 }
145} 250}
146 251
252static void __dentry_lru_del(struct dentry *dentry)
253{
254 list_del_init(&dentry->d_lru);
255 dentry->d_sb->s_nr_dentry_unused--;
256 dentry_stat.nr_unused--;
257}
258
147static void dentry_lru_del(struct dentry *dentry) 259static void dentry_lru_del(struct dentry *dentry)
148{ 260{
149 if (!list_empty(&dentry->d_lru)) { 261 if (!list_empty(&dentry->d_lru)) {
150 list_del_init(&dentry->d_lru); 262 spin_lock(&dcache_lru_lock);
151 dentry->d_sb->s_nr_dentry_unused--; 263 __dentry_lru_del(dentry);
152 percpu_counter_dec(&nr_dentry_unused); 264 spin_unlock(&dcache_lru_lock);
153 } 265 }
154} 266}
155 267
156static void dentry_lru_move_tail(struct dentry *dentry) 268static void dentry_lru_move_tail(struct dentry *dentry)
157{ 269{
270 spin_lock(&dcache_lru_lock);
158 if (list_empty(&dentry->d_lru)) { 271 if (list_empty(&dentry->d_lru)) {
159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 272 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
160 dentry->d_sb->s_nr_dentry_unused++; 273 dentry->d_sb->s_nr_dentry_unused++;
161 percpu_counter_inc(&nr_dentry_unused); 274 dentry_stat.nr_unused++;
162 } else { 275 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 276 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
164 } 277 }
278 spin_unlock(&dcache_lru_lock);
165} 279}
166 280
167/** 281/**
@@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry)
171 * The dentry must already be unhashed and removed from the LRU. 285 * The dentry must already be unhashed and removed from the LRU.
172 * 286 *
173 * If this is the root of the dentry tree, return NULL. 287 * If this is the root of the dentry tree, return NULL.
288 *
289 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
290 * d_kill.
174 */ 291 */
175static struct dentry *d_kill(struct dentry *dentry) 292static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
176 __releases(dentry->d_lock) 293 __releases(dentry->d_lock)
177 __releases(dcache_lock) 294 __releases(parent->d_lock)
295 __releases(dentry->d_inode->i_lock)
178{ 296{
179 struct dentry *parent; 297 dentry->d_parent = NULL;
180
181 list_del(&dentry->d_u.d_child); 298 list_del(&dentry->d_u.d_child);
182 /*drops the locks, at that point nobody can reach this dentry */ 299 if (parent)
300 spin_unlock(&parent->d_lock);
183 dentry_iput(dentry); 301 dentry_iput(dentry);
302 /*
303 * dentry_iput drops the locks, at which point nobody (except
304 * transient RCU lookups) can reach this dentry.
305 */
306 d_free(dentry);
307 return parent;
308}
309
310/**
311 * d_drop - drop a dentry
312 * @dentry: dentry to drop
313 *
314 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
315 * be found through a VFS lookup any more. Note that this is different from
316 * deleting the dentry - d_delete will try to mark the dentry negative if
317 * possible, giving a successful _negative_ lookup, while d_drop will
318 * just make the cache lookup fail.
319 *
320 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
321 * reason (NFS timeouts or autofs deletes).
322 *
323 * __d_drop requires dentry->d_lock.
324 */
325void __d_drop(struct dentry *dentry)
326{
327 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
328 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
329 bit_spin_lock(0,
330 (unsigned long *)&dentry->d_sb->s_anon.first);
331 dentry->d_flags |= DCACHE_UNHASHED;
332 hlist_bl_del_init(&dentry->d_hash);
333 __bit_spin_unlock(0,
334 (unsigned long *)&dentry->d_sb->s_anon.first);
335 } else {
336 struct dcache_hash_bucket *b;
337 b = d_hash(dentry->d_parent, dentry->d_name.hash);
338 spin_lock_bucket(b);
339 /*
340 * We may not actually need to put DCACHE_UNHASHED
341 * manipulations under the hash lock, but follow
342 * the principle of least surprise.
343 */
344 dentry->d_flags |= DCACHE_UNHASHED;
345 hlist_bl_del_rcu(&dentry->d_hash);
346 spin_unlock_bucket(b);
347 dentry_rcuwalk_barrier(dentry);
348 }
349 }
350}
351EXPORT_SYMBOL(__d_drop);
352
353void d_drop(struct dentry *dentry)
354{
355 spin_lock(&dentry->d_lock);
356 __d_drop(dentry);
357 spin_unlock(&dentry->d_lock);
358}
359EXPORT_SYMBOL(d_drop);
360
361/*
362 * Finish off a dentry we've decided to kill.
363 * dentry->d_lock must be held, returns with it unlocked.
364 * If ref is non-zero, then decrement the refcount too.
365 * Returns dentry requiring refcount drop, or NULL if we're done.
366 */
367static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
368 __releases(dentry->d_lock)
369{
370 struct inode *inode;
371 struct dentry *parent;
372
373 inode = dentry->d_inode;
374 if (inode && !spin_trylock(&inode->i_lock)) {
375relock:
376 spin_unlock(&dentry->d_lock);
377 cpu_relax();
378 return dentry; /* try again with same dentry */
379 }
184 if (IS_ROOT(dentry)) 380 if (IS_ROOT(dentry))
185 parent = NULL; 381 parent = NULL;
186 else 382 else
187 parent = dentry->d_parent; 383 parent = dentry->d_parent;
188 d_free(dentry); 384 if (parent && !spin_trylock(&parent->d_lock)) {
189 return parent; 385 if (inode)
386 spin_unlock(&inode->i_lock);
387 goto relock;
388 }
389
390 if (ref)
391 dentry->d_count--;
392 /* if dentry was on the d_lru list delete it from there */
393 dentry_lru_del(dentry);
394 /* if it was on the hash then remove it */
395 __d_drop(dentry);
396 return d_kill(dentry, parent);
190} 397}
191 398
192/* 399/*
@@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry)
214 * call the dentry unlink method as well as removing it from the queues and 421 * call the dentry unlink method as well as removing it from the queues and
215 * releasing its resources. If the parent dentries were scheduled for release 422 * releasing its resources. If the parent dentries were scheduled for release
216 * they too may now get deleted. 423 * they too may now get deleted.
217 *
218 * no dcache lock, please.
219 */ 424 */
220
221void dput(struct dentry *dentry) 425void dput(struct dentry *dentry)
222{ 426{
223 if (!dentry) 427 if (!dentry)
224 return; 428 return;
225 429
226repeat: 430repeat:
227 if (atomic_read(&dentry->d_count) == 1) 431 if (dentry->d_count == 1)
228 might_sleep(); 432 might_sleep();
229 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
230 return;
231
232 spin_lock(&dentry->d_lock); 433 spin_lock(&dentry->d_lock);
233 if (atomic_read(&dentry->d_count)) { 434 BUG_ON(!dentry->d_count);
435 if (dentry->d_count > 1) {
436 dentry->d_count--;
234 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
235 spin_unlock(&dcache_lock);
236 return; 438 return;
237 } 439 }
238 440
239 /* 441 if (dentry->d_flags & DCACHE_OP_DELETE) {
240 * AV: ->d_delete() is _NOT_ allowed to block now.
241 */
242 if (dentry->d_op && dentry->d_op->d_delete) {
243 if (dentry->d_op->d_delete(dentry)) 442 if (dentry->d_op->d_delete(dentry))
244 goto unhash_it; 443 goto kill_it;
245 } 444 }
246 445
247 /* Unreachable? Get rid of it */ 446 /* Unreachable? Get rid of it */
@@ -252,16 +451,12 @@ repeat:
252 dentry->d_flags |= DCACHE_REFERENCED; 451 dentry->d_flags |= DCACHE_REFERENCED;
253 dentry_lru_add(dentry); 452 dentry_lru_add(dentry);
254 453
255 spin_unlock(&dentry->d_lock); 454 dentry->d_count--;
256 spin_unlock(&dcache_lock); 455 spin_unlock(&dentry->d_lock);
257 return; 456 return;
258 457
259unhash_it:
260 __d_drop(dentry);
261kill_it: 458kill_it:
262 /* if dentry was on the d_lru list delete it from there */ 459 dentry = dentry_kill(dentry, 1);
263 dentry_lru_del(dentry);
264 dentry = d_kill(dentry);
265 if (dentry) 460 if (dentry)
266 goto repeat; 461 goto repeat;
267} 462}
@@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry)
284 /* 479 /*
285 * If it's already been dropped, return OK. 480 * If it's already been dropped, return OK.
286 */ 481 */
287 spin_lock(&dcache_lock); 482 spin_lock(&dentry->d_lock);
288 if (d_unhashed(dentry)) { 483 if (d_unhashed(dentry)) {
289 spin_unlock(&dcache_lock); 484 spin_unlock(&dentry->d_lock);
290 return 0; 485 return 0;
291 } 486 }
292 /* 487 /*
@@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry)
294 * to get rid of unused child entries. 489 * to get rid of unused child entries.
295 */ 490 */
296 if (!list_empty(&dentry->d_subdirs)) { 491 if (!list_empty(&dentry->d_subdirs)) {
297 spin_unlock(&dcache_lock); 492 spin_unlock(&dentry->d_lock);
298 shrink_dcache_parent(dentry); 493 shrink_dcache_parent(dentry);
299 spin_lock(&dcache_lock); 494 spin_lock(&dentry->d_lock);
300 } 495 }
301 496
302 /* 497 /*
@@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry)
309 * we might still populate it if it was a 504 * we might still populate it if it was a
310 * working directory or similar). 505 * working directory or similar).
311 */ 506 */
312 spin_lock(&dentry->d_lock); 507 if (dentry->d_count > 1) {
313 if (atomic_read(&dentry->d_count) > 1) {
314 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 508 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
315 spin_unlock(&dentry->d_lock); 509 spin_unlock(&dentry->d_lock);
316 spin_unlock(&dcache_lock);
317 return -EBUSY; 510 return -EBUSY;
318 } 511 }
319 } 512 }
320 513
321 __d_drop(dentry); 514 __d_drop(dentry);
322 spin_unlock(&dentry->d_lock); 515 spin_unlock(&dentry->d_lock);
323 spin_unlock(&dcache_lock);
324 return 0; 516 return 0;
325} 517}
326EXPORT_SYMBOL(d_invalidate); 518EXPORT_SYMBOL(d_invalidate);
327 519
328/* This should be called _only_ with dcache_lock held */ 520/* This must be called with d_lock held */
329static inline struct dentry * __dget_locked(struct dentry *dentry) 521static inline void __dget_dlock(struct dentry *dentry)
330{ 522{
331 atomic_inc(&dentry->d_count); 523 dentry->d_count++;
332 dentry_lru_del(dentry);
333 return dentry;
334} 524}
335 525
336struct dentry * dget_locked(struct dentry *dentry) 526static inline void __dget(struct dentry *dentry)
337{ 527{
338 return __dget_locked(dentry); 528 spin_lock(&dentry->d_lock);
529 __dget_dlock(dentry);
530 spin_unlock(&dentry->d_lock);
531}
532
533struct dentry *dget_parent(struct dentry *dentry)
534{
535 struct dentry *ret;
536
537repeat:
538 /*
539 * Don't need rcu_dereference because we re-check it was correct under
540 * the lock.
541 */
542 rcu_read_lock();
543 ret = dentry->d_parent;
544 if (!ret) {
545 rcu_read_unlock();
546 goto out;
547 }
548 spin_lock(&ret->d_lock);
549 if (unlikely(ret != dentry->d_parent)) {
550 spin_unlock(&ret->d_lock);
551 rcu_read_unlock();
552 goto repeat;
553 }
554 rcu_read_unlock();
555 BUG_ON(!ret->d_count);
556 ret->d_count++;
557 spin_unlock(&ret->d_lock);
558out:
559 return ret;
339} 560}
340EXPORT_SYMBOL(dget_locked); 561EXPORT_SYMBOL(dget_parent);
341 562
342/** 563/**
343 * d_find_alias - grab a hashed alias of inode 564 * d_find_alias - grab a hashed alias of inode
@@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked);
355 * any other hashed alias over that one unless @want_discon is set, 576 * any other hashed alias over that one unless @want_discon is set,
356 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 577 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
357 */ 578 */
358 579static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
359static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
360{ 580{
361 struct list_head *head, *next, *tmp; 581 struct dentry *alias, *discon_alias;
362 struct dentry *alias, *discon_alias=NULL;
363 582
364 head = &inode->i_dentry; 583again:
365 next = inode->i_dentry.next; 584 discon_alias = NULL;
366 while (next != head) { 585 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
367 tmp = next; 586 spin_lock(&alias->d_lock);
368 next = tmp->next;
369 prefetch(next);
370 alias = list_entry(tmp, struct dentry, d_alias);
371 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
372 if (IS_ROOT(alias) && 588 if (IS_ROOT(alias) &&
373 (alias->d_flags & DCACHE_DISCONNECTED)) 589 (alias->d_flags & DCACHE_DISCONNECTED)) {
374 discon_alias = alias; 590 discon_alias = alias;
375 else if (!want_discon) { 591 } else if (!want_discon) {
376 __dget_locked(alias); 592 __dget_dlock(alias);
593 spin_unlock(&alias->d_lock);
594 return alias;
595 }
596 }
597 spin_unlock(&alias->d_lock);
598 }
599 if (discon_alias) {
600 alias = discon_alias;
601 spin_lock(&alias->d_lock);
602 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
603 if (IS_ROOT(alias) &&
604 (alias->d_flags & DCACHE_DISCONNECTED)) {
605 __dget_dlock(alias);
606 spin_unlock(&alias->d_lock);
377 return alias; 607 return alias;
378 } 608 }
379 } 609 }
610 spin_unlock(&alias->d_lock);
611 goto again;
380 } 612 }
381 if (discon_alias) 613 return NULL;
382 __dget_locked(discon_alias);
383 return discon_alias;
384} 614}
385 615
386struct dentry * d_find_alias(struct inode *inode) 616struct dentry *d_find_alias(struct inode *inode)
387{ 617{
388 struct dentry *de = NULL; 618 struct dentry *de = NULL;
389 619
390 if (!list_empty(&inode->i_dentry)) { 620 if (!list_empty(&inode->i_dentry)) {
391 spin_lock(&dcache_lock); 621 spin_lock(&inode->i_lock);
392 de = __d_find_alias(inode, 0); 622 de = __d_find_alias(inode, 0);
393 spin_unlock(&dcache_lock); 623 spin_unlock(&inode->i_lock);
394 } 624 }
395 return de; 625 return de;
396} 626}
@@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode)
404{ 634{
405 struct dentry *dentry; 635 struct dentry *dentry;
406restart: 636restart:
407 spin_lock(&dcache_lock); 637 spin_lock(&inode->i_lock);
408 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 638 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
409 spin_lock(&dentry->d_lock); 639 spin_lock(&dentry->d_lock);
410 if (!atomic_read(&dentry->d_count)) { 640 if (!dentry->d_count) {
411 __dget_locked(dentry); 641 __dget_dlock(dentry);
412 __d_drop(dentry); 642 __d_drop(dentry);
413 spin_unlock(&dentry->d_lock); 643 spin_unlock(&dentry->d_lock);
414 spin_unlock(&dcache_lock); 644 spin_unlock(&inode->i_lock);
415 dput(dentry); 645 dput(dentry);
416 goto restart; 646 goto restart;
417 } 647 }
418 spin_unlock(&dentry->d_lock); 648 spin_unlock(&dentry->d_lock);
419 } 649 }
420 spin_unlock(&dcache_lock); 650 spin_unlock(&inode->i_lock);
421} 651}
422EXPORT_SYMBOL(d_prune_aliases); 652EXPORT_SYMBOL(d_prune_aliases);
423 653
424/* 654/*
425 * Throw away a dentry - free the inode, dput the parent. This requires that 655 * Try to throw away a dentry - free the inode, dput the parent.
426 * the LRU list has already been removed. 656 * Requires dentry->d_lock is held, and dentry->d_count == 0.
657 * Releases dentry->d_lock.
427 * 658 *
428 * Try to prune ancestors as well. This is necessary to prevent 659 * This may fail if locks cannot be acquired no problem, just try again.
429 * quadratic behavior of shrink_dcache_parent(), but is also expected
430 * to be beneficial in reducing dentry cache fragmentation.
431 */ 660 */
432static void prune_one_dentry(struct dentry * dentry) 661static void try_prune_one_dentry(struct dentry *dentry)
433 __releases(dentry->d_lock) 662 __releases(dentry->d_lock)
434 __releases(dcache_lock)
435 __acquires(dcache_lock)
436{ 663{
437 __d_drop(dentry); 664 struct dentry *parent;
438 dentry = d_kill(dentry);
439 665
666 parent = dentry_kill(dentry, 0);
440 /* 667 /*
441 * Prune ancestors. Locking is simpler than in dput(), 668 * If dentry_kill returns NULL, we have nothing more to do.
442 * because dcache_lock needs to be taken anyway. 669 * if it returns the same dentry, trylocks failed. In either
670 * case, just loop again.
671 *
672 * Otherwise, we need to prune ancestors too. This is necessary
673 * to prevent quadratic behavior of shrink_dcache_parent(), but
674 * is also expected to be beneficial in reducing dentry cache
675 * fragmentation.
443 */ 676 */
444 spin_lock(&dcache_lock); 677 if (!parent)
678 return;
679 if (parent == dentry)
680 return;
681
682 /* Prune ancestors. */
683 dentry = parent;
445 while (dentry) { 684 while (dentry) {
446 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 685 spin_lock(&dentry->d_lock);
686 if (dentry->d_count > 1) {
687 dentry->d_count--;
688 spin_unlock(&dentry->d_lock);
447 return; 689 return;
448 690 }
449 if (dentry->d_op && dentry->d_op->d_delete) 691 dentry = dentry_kill(dentry, 1);
450 dentry->d_op->d_delete(dentry);
451 dentry_lru_del(dentry);
452 __d_drop(dentry);
453 dentry = d_kill(dentry);
454 spin_lock(&dcache_lock);
455 } 692 }
456} 693}
457 694
@@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list)
459{ 696{
460 struct dentry *dentry; 697 struct dentry *dentry;
461 698
462 while (!list_empty(list)) { 699 rcu_read_lock();
463 dentry = list_entry(list->prev, struct dentry, d_lru); 700 for (;;) {
464 dentry_lru_del(dentry); 701 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
702 if (&dentry->d_lru == list)
703 break; /* empty */
704 spin_lock(&dentry->d_lock);
705 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
706 spin_unlock(&dentry->d_lock);
707 continue;
708 }
465 709
466 /* 710 /*
467 * We found an inuse dentry which was not removed from 711 * We found an inuse dentry which was not removed from
468 * the LRU because of laziness during lookup. Do not free 712 * the LRU because of laziness during lookup. Do not free
469 * it - just keep it off the LRU list. 713 * it - just keep it off the LRU list.
470 */ 714 */
471 spin_lock(&dentry->d_lock); 715 if (dentry->d_count) {
472 if (atomic_read(&dentry->d_count)) { 716 dentry_lru_del(dentry);
473 spin_unlock(&dentry->d_lock); 717 spin_unlock(&dentry->d_lock);
474 continue; 718 continue;
475 } 719 }
476 prune_one_dentry(dentry); 720
477 /* dentry->d_lock was dropped in prune_one_dentry() */ 721 rcu_read_unlock();
478 cond_resched_lock(&dcache_lock); 722
723 try_prune_one_dentry(dentry);
724
725 rcu_read_lock();
479 } 726 }
727 rcu_read_unlock();
480} 728}
481 729
482/** 730/**
@@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
495 LIST_HEAD(tmp); 743 LIST_HEAD(tmp);
496 int cnt = *count; 744 int cnt = *count;
497 745
498 spin_lock(&dcache_lock); 746relock:
747 spin_lock(&dcache_lru_lock);
499 while (!list_empty(&sb->s_dentry_lru)) { 748 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev, 749 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru); 750 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb); 751 BUG_ON(dentry->d_sb != sb);
503 752
753 if (!spin_trylock(&dentry->d_lock)) {
754 spin_unlock(&dcache_lru_lock);
755 cpu_relax();
756 goto relock;
757 }
758
504 /* 759 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the 760 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag 761 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU. 762 * and put it back on the LRU.
508 */ 763 */
509 if (flags & DCACHE_REFERENCED) { 764 if (flags & DCACHE_REFERENCED &&
510 spin_lock(&dentry->d_lock); 765 dentry->d_flags & DCACHE_REFERENCED) {
511 if (dentry->d_flags & DCACHE_REFERENCED) { 766 dentry->d_flags &= ~DCACHE_REFERENCED;
512 dentry->d_flags &= ~DCACHE_REFERENCED; 767 list_move(&dentry->d_lru, &referenced);
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock); 768 spin_unlock(&dentry->d_lock);
769 } else {
770 list_move_tail(&dentry->d_lru, &tmp);
771 spin_unlock(&dentry->d_lock);
772 if (!--cnt)
773 break;
519 } 774 }
520 775 cond_resched_lock(&dcache_lru_lock);
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 } 776 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
530 if (!list_empty(&referenced)) 777 if (!list_empty(&referenced))
531 list_splice(&referenced, &sb->s_dentry_lru); 778 list_splice(&referenced, &sb->s_dentry_lru);
532 spin_unlock(&dcache_lock); 779 spin_unlock(&dcache_lru_lock);
533 780
781 shrink_dentry_list(&tmp);
782
783 *count = cnt;
534} 784}
535 785
536/** 786/**
@@ -546,13 +796,12 @@ static void prune_dcache(int count)
546{ 796{
547 struct super_block *sb, *p = NULL; 797 struct super_block *sb, *p = NULL;
548 int w_count; 798 int w_count;
549 int unused = percpu_counter_sum_positive(&nr_dentry_unused); 799 int unused = dentry_stat.nr_unused;
550 int prune_ratio; 800 int prune_ratio;
551 int pruned; 801 int pruned;
552 802
553 if (unused == 0 || count == 0) 803 if (unused == 0 || count == 0)
554 return; 804 return;
555 spin_lock(&dcache_lock);
556 if (count >= unused) 805 if (count >= unused)
557 prune_ratio = 1; 806 prune_ratio = 1;
558 else 807 else
@@ -589,11 +838,9 @@ static void prune_dcache(int count)
589 if (down_read_trylock(&sb->s_umount)) { 838 if (down_read_trylock(&sb->s_umount)) {
590 if ((sb->s_root != NULL) && 839 if ((sb->s_root != NULL) &&
591 (!list_empty(&sb->s_dentry_lru))) { 840 (!list_empty(&sb->s_dentry_lru))) {
592 spin_unlock(&dcache_lock);
593 __shrink_dcache_sb(sb, &w_count, 841 __shrink_dcache_sb(sb, &w_count,
594 DCACHE_REFERENCED); 842 DCACHE_REFERENCED);
595 pruned -= w_count; 843 pruned -= w_count;
596 spin_lock(&dcache_lock);
597 } 844 }
598 up_read(&sb->s_umount); 845 up_read(&sb->s_umount);
599 } 846 }
@@ -609,7 +856,6 @@ static void prune_dcache(int count)
609 if (p) 856 if (p)
610 __put_super(p); 857 __put_super(p);
611 spin_unlock(&sb_lock); 858 spin_unlock(&sb_lock);
612 spin_unlock(&dcache_lock);
613} 859}
614 860
615/** 861/**
@@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb)
623{ 869{
624 LIST_HEAD(tmp); 870 LIST_HEAD(tmp);
625 871
626 spin_lock(&dcache_lock); 872 spin_lock(&dcache_lru_lock);
627 while (!list_empty(&sb->s_dentry_lru)) { 873 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp); 874 list_splice_init(&sb->s_dentry_lru, &tmp);
875 spin_unlock(&dcache_lru_lock);
629 shrink_dentry_list(&tmp); 876 shrink_dentry_list(&tmp);
877 spin_lock(&dcache_lru_lock);
630 } 878 }
631 spin_unlock(&dcache_lock); 879 spin_unlock(&dcache_lru_lock);
632} 880}
633EXPORT_SYMBOL(shrink_dcache_sb); 881EXPORT_SYMBOL(shrink_dcache_sb);
634 882
@@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
645 BUG_ON(!IS_ROOT(dentry)); 893 BUG_ON(!IS_ROOT(dentry));
646 894
647 /* detach this root from the system */ 895 /* detach this root from the system */
648 spin_lock(&dcache_lock); 896 spin_lock(&dentry->d_lock);
649 dentry_lru_del(dentry); 897 dentry_lru_del(dentry);
650 __d_drop(dentry); 898 __d_drop(dentry);
651 spin_unlock(&dcache_lock); 899 spin_unlock(&dentry->d_lock);
652 900
653 for (;;) { 901 for (;;) {
654 /* descend to the first leaf in the current subtree */ 902 /* descend to the first leaf in the current subtree */
@@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
657 905
658 /* this is a branch with children - detach all of them 906 /* this is a branch with children - detach all of them
659 * from the system in one go */ 907 * from the system in one go */
660 spin_lock(&dcache_lock); 908 spin_lock(&dentry->d_lock);
661 list_for_each_entry(loop, &dentry->d_subdirs, 909 list_for_each_entry(loop, &dentry->d_subdirs,
662 d_u.d_child) { 910 d_u.d_child) {
911 spin_lock_nested(&loop->d_lock,
912 DENTRY_D_LOCK_NESTED);
663 dentry_lru_del(loop); 913 dentry_lru_del(loop);
664 __d_drop(loop); 914 __d_drop(loop);
665 cond_resched_lock(&dcache_lock); 915 spin_unlock(&loop->d_lock);
666 } 916 }
667 spin_unlock(&dcache_lock); 917 spin_unlock(&dentry->d_lock);
668 918
669 /* move to the first child */ 919 /* move to the first child */
670 dentry = list_entry(dentry->d_subdirs.next, 920 dentry = list_entry(dentry->d_subdirs.next,
@@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
676 do { 926 do {
677 struct inode *inode; 927 struct inode *inode;
678 928
679 if (atomic_read(&dentry->d_count) != 0) { 929 if (dentry->d_count != 0) {
680 printk(KERN_ERR 930 printk(KERN_ERR
681 "BUG: Dentry %p{i=%lx,n=%s}" 931 "BUG: Dentry %p{i=%lx,n=%s}"
682 " still in use (%d)" 932 " still in use (%d)"
@@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
685 dentry->d_inode ? 935 dentry->d_inode ?
686 dentry->d_inode->i_ino : 0UL, 936 dentry->d_inode->i_ino : 0UL,
687 dentry->d_name.name, 937 dentry->d_name.name,
688 atomic_read(&dentry->d_count), 938 dentry->d_count,
689 dentry->d_sb->s_type->name, 939 dentry->d_sb->s_type->name,
690 dentry->d_sb->s_id); 940 dentry->d_sb->s_id);
691 BUG(); 941 BUG();
692 } 942 }
693 943
694 if (IS_ROOT(dentry)) 944 if (IS_ROOT(dentry)) {
695 parent = NULL; 945 parent = NULL;
696 else { 946 list_del(&dentry->d_u.d_child);
947 } else {
697 parent = dentry->d_parent; 948 parent = dentry->d_parent;
698 atomic_dec(&parent->d_count); 949 spin_lock(&parent->d_lock);
950 parent->d_count--;
951 list_del(&dentry->d_u.d_child);
952 spin_unlock(&parent->d_lock);
699 } 953 }
700 954
701 list_del(&dentry->d_u.d_child);
702 detached++; 955 detached++;
703 956
704 inode = dentry->d_inode; 957 inode = dentry->d_inode;
@@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
728 981
729/* 982/*
730 * destroy the dentries attached to a superblock on unmounting 983 * destroy the dentries attached to a superblock on unmounting
731 * - we don't need to use dentry->d_lock, and only need dcache_lock when 984 * - we don't need to use dentry->d_lock because:
732 * removing the dentry from the system lists and hashes because:
733 * - the superblock is detached from all mountings and open files, so the 985 * - the superblock is detached from all mountings and open files, so the
734 * dentry trees will not be rearranged by the VFS 986 * dentry trees will not be rearranged by the VFS
735 * - s_umount is write-locked, so the memory pressure shrinker will ignore 987 * - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb)
746 998
747 dentry = sb->s_root; 999 dentry = sb->s_root;
748 sb->s_root = NULL; 1000 sb->s_root = NULL;
749 atomic_dec(&dentry->d_count); 1001 spin_lock(&dentry->d_lock);
1002 dentry->d_count--;
1003 spin_unlock(&dentry->d_lock);
750 shrink_dcache_for_umount_subtree(dentry); 1004 shrink_dcache_for_umount_subtree(dentry);
751 1005
752 while (!hlist_empty(&sb->s_anon)) { 1006 while (!hlist_bl_empty(&sb->s_anon)) {
753 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 1007 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
754 shrink_dcache_for_umount_subtree(dentry); 1008 shrink_dcache_for_umount_subtree(dentry);
755 } 1009 }
756} 1010}
@@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
768 * Return true if the parent or its subdirectories contain 1022 * Return true if the parent or its subdirectories contain
769 * a mount point 1023 * a mount point
770 */ 1024 */
771
772int have_submounts(struct dentry *parent) 1025int have_submounts(struct dentry *parent)
773{ 1026{
774 struct dentry *this_parent = parent; 1027 struct dentry *this_parent;
775 struct list_head *next; 1028 struct list_head *next;
1029 unsigned seq;
1030 int locked = 0;
1031
1032 seq = read_seqbegin(&rename_lock);
1033again:
1034 this_parent = parent;
776 1035
777 spin_lock(&dcache_lock);
778 if (d_mountpoint(parent)) 1036 if (d_mountpoint(parent))
779 goto positive; 1037 goto positive;
1038 spin_lock(&this_parent->d_lock);
780repeat: 1039repeat:
781 next = this_parent->d_subdirs.next; 1040 next = this_parent->d_subdirs.next;
782resume: 1041resume:
@@ -784,27 +1043,65 @@ resume:
784 struct list_head *tmp = next; 1043 struct list_head *tmp = next;
785 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1044 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
786 next = tmp->next; 1045 next = tmp->next;
1046
1047 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
787 /* Have we found a mount point ? */ 1048 /* Have we found a mount point ? */
788 if (d_mountpoint(dentry)) 1049 if (d_mountpoint(dentry)) {
1050 spin_unlock(&dentry->d_lock);
1051 spin_unlock(&this_parent->d_lock);
789 goto positive; 1052 goto positive;
1053 }
790 if (!list_empty(&dentry->d_subdirs)) { 1054 if (!list_empty(&dentry->d_subdirs)) {
1055 spin_unlock(&this_parent->d_lock);
1056 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
791 this_parent = dentry; 1057 this_parent = dentry;
1058 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
792 goto repeat; 1059 goto repeat;
793 } 1060 }
1061 spin_unlock(&dentry->d_lock);
794 } 1062 }
795 /* 1063 /*
796 * All done at this level ... ascend and resume the search. 1064 * All done at this level ... ascend and resume the search.
797 */ 1065 */
798 if (this_parent != parent) { 1066 if (this_parent != parent) {
799 next = this_parent->d_u.d_child.next; 1067 struct dentry *tmp;
800 this_parent = this_parent->d_parent; 1068 struct dentry *child;
1069
1070 tmp = this_parent->d_parent;
1071 rcu_read_lock();
1072 spin_unlock(&this_parent->d_lock);
1073 child = this_parent;
1074 this_parent = tmp;
1075 spin_lock(&this_parent->d_lock);
1076 /* might go back up the wrong parent if we have had a rename
1077 * or deletion */
1078 if (this_parent != child->d_parent ||
1079 (!locked && read_seqretry(&rename_lock, seq))) {
1080 spin_unlock(&this_parent->d_lock);
1081 rcu_read_unlock();
1082 goto rename_retry;
1083 }
1084 rcu_read_unlock();
1085 next = child->d_u.d_child.next;
801 goto resume; 1086 goto resume;
802 } 1087 }
803 spin_unlock(&dcache_lock); 1088 spin_unlock(&this_parent->d_lock);
1089 if (!locked && read_seqretry(&rename_lock, seq))
1090 goto rename_retry;
1091 if (locked)
1092 write_sequnlock(&rename_lock);
804 return 0; /* No mount points found in tree */ 1093 return 0; /* No mount points found in tree */
805positive: 1094positive:
806 spin_unlock(&dcache_lock); 1095 if (!locked && read_seqretry(&rename_lock, seq))
1096 goto rename_retry;
1097 if (locked)
1098 write_sequnlock(&rename_lock);
807 return 1; 1099 return 1;
1100
1101rename_retry:
1102 locked = 1;
1103 write_seqlock(&rename_lock);
1104 goto again;
808} 1105}
809EXPORT_SYMBOL(have_submounts); 1106EXPORT_SYMBOL(have_submounts);
810 1107
@@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts);
824 */ 1121 */
825static int select_parent(struct dentry * parent) 1122static int select_parent(struct dentry * parent)
826{ 1123{
827 struct dentry *this_parent = parent; 1124 struct dentry *this_parent;
828 struct list_head *next; 1125 struct list_head *next;
1126 unsigned seq;
829 int found = 0; 1127 int found = 0;
1128 int locked = 0;
830 1129
831 spin_lock(&dcache_lock); 1130 seq = read_seqbegin(&rename_lock);
1131again:
1132 this_parent = parent;
1133 spin_lock(&this_parent->d_lock);
832repeat: 1134repeat:
833 next = this_parent->d_subdirs.next; 1135 next = this_parent->d_subdirs.next;
834resume: 1136resume:
@@ -837,11 +1139,13 @@ resume:
837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
838 next = tmp->next; 1140 next = tmp->next;
839 1141
1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1143
840 /* 1144 /*
841 * move only zero ref count dentries to the end 1145 * move only zero ref count dentries to the end
842 * of the unused list for prune_dcache 1146 * of the unused list for prune_dcache
843 */ 1147 */
844 if (!atomic_read(&dentry->d_count)) { 1148 if (!dentry->d_count) {
845 dentry_lru_move_tail(dentry); 1149 dentry_lru_move_tail(dentry);
846 found++; 1150 found++;
847 } else { 1151 } else {
@@ -853,28 +1157,63 @@ resume:
853 * ensures forward progress). We'll be coming back to find 1157 * ensures forward progress). We'll be coming back to find
854 * the rest. 1158 * the rest.
855 */ 1159 */
856 if (found && need_resched()) 1160 if (found && need_resched()) {
1161 spin_unlock(&dentry->d_lock);
857 goto out; 1162 goto out;
1163 }
858 1164
859 /* 1165 /*
860 * Descend a level if the d_subdirs list is non-empty. 1166 * Descend a level if the d_subdirs list is non-empty.
861 */ 1167 */
862 if (!list_empty(&dentry->d_subdirs)) { 1168 if (!list_empty(&dentry->d_subdirs)) {
1169 spin_unlock(&this_parent->d_lock);
1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
863 this_parent = dentry; 1171 this_parent = dentry;
1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
864 goto repeat; 1173 goto repeat;
865 } 1174 }
1175
1176 spin_unlock(&dentry->d_lock);
866 } 1177 }
867 /* 1178 /*
868 * All done at this level ... ascend and resume the search. 1179 * All done at this level ... ascend and resume the search.
869 */ 1180 */
870 if (this_parent != parent) { 1181 if (this_parent != parent) {
871 next = this_parent->d_u.d_child.next; 1182 struct dentry *tmp;
872 this_parent = this_parent->d_parent; 1183 struct dentry *child;
1184
1185 tmp = this_parent->d_parent;
1186 rcu_read_lock();
1187 spin_unlock(&this_parent->d_lock);
1188 child = this_parent;
1189 this_parent = tmp;
1190 spin_lock(&this_parent->d_lock);
1191 /* might go back up the wrong parent if we have had a rename
1192 * or deletion */
1193 if (this_parent != child->d_parent ||
1194 (!locked && read_seqretry(&rename_lock, seq))) {
1195 spin_unlock(&this_parent->d_lock);
1196 rcu_read_unlock();
1197 goto rename_retry;
1198 }
1199 rcu_read_unlock();
1200 next = child->d_u.d_child.next;
873 goto resume; 1201 goto resume;
874 } 1202 }
875out: 1203out:
876 spin_unlock(&dcache_lock); 1204 spin_unlock(&this_parent->d_lock);
1205 if (!locked && read_seqretry(&rename_lock, seq))
1206 goto rename_retry;
1207 if (locked)
1208 write_sequnlock(&rename_lock);
877 return found; 1209 return found;
1210
1211rename_retry:
1212 if (found)
1213 return found;
1214 locked = 1;
1215 write_seqlock(&rename_lock);
1216 goto again;
878} 1217}
879 1218
880/** 1219/**
@@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent);
908 */ 1247 */
909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1248static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
910{ 1249{
911 int nr_unused;
912
913 if (nr) { 1250 if (nr) {
914 if (!(gfp_mask & __GFP_FS)) 1251 if (!(gfp_mask & __GFP_FS))
915 return -1; 1252 return -1;
916 prune_dcache(nr); 1253 prune_dcache(nr);
917 } 1254 }
918 1255
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); 1256 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
921} 1257}
922 1258
923static struct shrinker dcache_shrinker = { 1259static struct shrinker dcache_shrinker = {
@@ -960,38 +1296,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
960 memcpy(dname, name->name, name->len); 1296 memcpy(dname, name->name, name->len);
961 dname[name->len] = 0; 1297 dname[name->len] = 0;
962 1298
963 atomic_set(&dentry->d_count, 1); 1299 dentry->d_count = 1;
964 dentry->d_flags = DCACHE_UNHASHED; 1300 dentry->d_flags = DCACHE_UNHASHED;
965 spin_lock_init(&dentry->d_lock); 1301 spin_lock_init(&dentry->d_lock);
1302 seqcount_init(&dentry->d_seq);
966 dentry->d_inode = NULL; 1303 dentry->d_inode = NULL;
967 dentry->d_parent = NULL; 1304 dentry->d_parent = NULL;
968 dentry->d_sb = NULL; 1305 dentry->d_sb = NULL;
969 dentry->d_op = NULL; 1306 dentry->d_op = NULL;
970 dentry->d_fsdata = NULL; 1307 dentry->d_fsdata = NULL;
971 dentry->d_mounted = 0; 1308 INIT_HLIST_BL_NODE(&dentry->d_hash);
972 INIT_HLIST_NODE(&dentry->d_hash);
973 INIT_LIST_HEAD(&dentry->d_lru); 1309 INIT_LIST_HEAD(&dentry->d_lru);
974 INIT_LIST_HEAD(&dentry->d_subdirs); 1310 INIT_LIST_HEAD(&dentry->d_subdirs);
975 INIT_LIST_HEAD(&dentry->d_alias); 1311 INIT_LIST_HEAD(&dentry->d_alias);
1312 INIT_LIST_HEAD(&dentry->d_u.d_child);
976 1313
977 if (parent) { 1314 if (parent) {
978 dentry->d_parent = dget(parent); 1315 spin_lock(&parent->d_lock);
1316 /*
1317 * don't need child lock because it is not subject
1318 * to concurrency here
1319 */
1320 __dget_dlock(parent);
1321 dentry->d_parent = parent;
979 dentry->d_sb = parent->d_sb; 1322 dentry->d_sb = parent->d_sb;
980 } else {
981 INIT_LIST_HEAD(&dentry->d_u.d_child);
982 }
983
984 spin_lock(&dcache_lock);
985 if (parent)
986 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1323 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
987 spin_unlock(&dcache_lock); 1324 spin_unlock(&parent->d_lock);
1325 }
988 1326
989 percpu_counter_inc(&nr_dentry); 1327 this_cpu_inc(nr_dentry);
990 1328
991 return dentry; 1329 return dentry;
992} 1330}
993EXPORT_SYMBOL(d_alloc); 1331EXPORT_SYMBOL(d_alloc);
994 1332
1333struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1334{
1335 struct dentry *dentry = d_alloc(NULL, name);
1336 if (dentry) {
1337 dentry->d_sb = sb;
1338 dentry->d_parent = dentry;
1339 dentry->d_flags |= DCACHE_DISCONNECTED;
1340 }
1341 return dentry;
1342}
1343EXPORT_SYMBOL(d_alloc_pseudo);
1344
995struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1345struct dentry *d_alloc_name(struct dentry *parent, const char *name)
996{ 1346{
997 struct qstr q; 1347 struct qstr q;
@@ -1003,12 +1353,36 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1003} 1353}
1004EXPORT_SYMBOL(d_alloc_name); 1354EXPORT_SYMBOL(d_alloc_name);
1005 1355
1006/* the caller must hold dcache_lock */ 1356void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1357{
1358 BUG_ON(dentry->d_op);
1359 BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
1360 DCACHE_OP_COMPARE |
1361 DCACHE_OP_REVALIDATE |
1362 DCACHE_OP_DELETE ));
1363 dentry->d_op = op;
1364 if (!op)
1365 return;
1366 if (op->d_hash)
1367 dentry->d_flags |= DCACHE_OP_HASH;
1368 if (op->d_compare)
1369 dentry->d_flags |= DCACHE_OP_COMPARE;
1370 if (op->d_revalidate)
1371 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1372 if (op->d_delete)
1373 dentry->d_flags |= DCACHE_OP_DELETE;
1374
1375}
1376EXPORT_SYMBOL(d_set_d_op);
1377
1007static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1378static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1008{ 1379{
1380 spin_lock(&dentry->d_lock);
1009 if (inode) 1381 if (inode)
1010 list_add(&dentry->d_alias, &inode->i_dentry); 1382 list_add(&dentry->d_alias, &inode->i_dentry);
1011 dentry->d_inode = inode; 1383 dentry->d_inode = inode;
1384 dentry_rcuwalk_barrier(dentry);
1385 spin_unlock(&dentry->d_lock);
1012 fsnotify_d_instantiate(dentry, inode); 1386 fsnotify_d_instantiate(dentry, inode);
1013} 1387}
1014 1388
@@ -1030,9 +1404,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1030void d_instantiate(struct dentry *entry, struct inode * inode) 1404void d_instantiate(struct dentry *entry, struct inode * inode)
1031{ 1405{
1032 BUG_ON(!list_empty(&entry->d_alias)); 1406 BUG_ON(!list_empty(&entry->d_alias));
1033 spin_lock(&dcache_lock); 1407 if (inode)
1408 spin_lock(&inode->i_lock);
1034 __d_instantiate(entry, inode); 1409 __d_instantiate(entry, inode);
1035 spin_unlock(&dcache_lock); 1410 if (inode)
1411 spin_unlock(&inode->i_lock);
1036 security_d_instantiate(entry, inode); 1412 security_d_instantiate(entry, inode);
1037} 1413}
1038EXPORT_SYMBOL(d_instantiate); 1414EXPORT_SYMBOL(d_instantiate);
@@ -1069,15 +1445,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1069 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1445 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1070 struct qstr *qstr = &alias->d_name; 1446 struct qstr *qstr = &alias->d_name;
1071 1447
1448 /*
1449 * Don't need alias->d_lock here, because aliases with
1450 * d_parent == entry->d_parent are not subject to name or
1451 * parent changes, because the parent inode i_mutex is held.
1452 */
1072 if (qstr->hash != hash) 1453 if (qstr->hash != hash)
1073 continue; 1454 continue;
1074 if (alias->d_parent != entry->d_parent) 1455 if (alias->d_parent != entry->d_parent)
1075 continue; 1456 continue;
1076 if (qstr->len != len) 1457 if (dentry_cmp(qstr->name, qstr->len, name, len))
1077 continue; 1458 continue;
1078 if (memcmp(qstr->name, name, len)) 1459 __dget(alias);
1079 continue;
1080 dget_locked(alias);
1081 return alias; 1460 return alias;
1082 } 1461 }
1083 1462
@@ -1091,9 +1470,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1091 1470
1092 BUG_ON(!list_empty(&entry->d_alias)); 1471 BUG_ON(!list_empty(&entry->d_alias));
1093 1472
1094 spin_lock(&dcache_lock); 1473 if (inode)
1474 spin_lock(&inode->i_lock);
1095 result = __d_instantiate_unique(entry, inode); 1475 result = __d_instantiate_unique(entry, inode);
1096 spin_unlock(&dcache_lock); 1476 if (inode)
1477 spin_unlock(&inode->i_lock);
1097 1478
1098 if (!result) { 1479 if (!result) {
1099 security_d_instantiate(entry, inode); 1480 security_d_instantiate(entry, inode);
@@ -1134,14 +1515,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1134} 1515}
1135EXPORT_SYMBOL(d_alloc_root); 1516EXPORT_SYMBOL(d_alloc_root);
1136 1517
1137static inline struct hlist_head *d_hash(struct dentry *parent,
1138 unsigned long hash)
1139{
1140 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1141 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1142 return dentry_hashtable + (hash & D_HASHMASK);
1143}
1144
1145/** 1518/**
1146 * d_obtain_alias - find or allocate a dentry for a given inode 1519 * d_obtain_alias - find or allocate a dentry for a given inode
1147 * @inode: inode to allocate the dentry for 1520 * @inode: inode to allocate the dentry for
@@ -1182,10 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1182 } 1555 }
1183 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1556 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1184 1557
1185 spin_lock(&dcache_lock); 1558
1559 spin_lock(&inode->i_lock);
1186 res = __d_find_alias(inode, 0); 1560 res = __d_find_alias(inode, 0);
1187 if (res) { 1561 if (res) {
1188 spin_unlock(&dcache_lock); 1562 spin_unlock(&inode->i_lock);
1189 dput(tmp); 1563 dput(tmp);
1190 goto out_iput; 1564 goto out_iput;
1191 } 1565 }
@@ -1195,12 +1569,14 @@ struct dentry *d_obtain_alias(struct inode *inode)
1195 tmp->d_sb = inode->i_sb; 1569 tmp->d_sb = inode->i_sb;
1196 tmp->d_inode = inode; 1570 tmp->d_inode = inode;
1197 tmp->d_flags |= DCACHE_DISCONNECTED; 1571 tmp->d_flags |= DCACHE_DISCONNECTED;
1198 tmp->d_flags &= ~DCACHE_UNHASHED;
1199 list_add(&tmp->d_alias, &inode->i_dentry); 1572 list_add(&tmp->d_alias, &inode->i_dentry);
1200 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1573 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1574 tmp->d_flags &= ~DCACHE_UNHASHED;
1575 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1576 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1201 spin_unlock(&tmp->d_lock); 1577 spin_unlock(&tmp->d_lock);
1578 spin_unlock(&inode->i_lock);
1202 1579
1203 spin_unlock(&dcache_lock);
1204 return tmp; 1580 return tmp;
1205 1581
1206 out_iput: 1582 out_iput:
@@ -1230,18 +1606,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1230 struct dentry *new = NULL; 1606 struct dentry *new = NULL;
1231 1607
1232 if (inode && S_ISDIR(inode->i_mode)) { 1608 if (inode && S_ISDIR(inode->i_mode)) {
1233 spin_lock(&dcache_lock); 1609 spin_lock(&inode->i_lock);
1234 new = __d_find_alias(inode, 1); 1610 new = __d_find_alias(inode, 1);
1235 if (new) { 1611 if (new) {
1236 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1612 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1237 spin_unlock(&dcache_lock); 1613 spin_unlock(&inode->i_lock);
1238 security_d_instantiate(new, inode); 1614 security_d_instantiate(new, inode);
1239 d_move(new, dentry); 1615 d_move(new, dentry);
1240 iput(inode); 1616 iput(inode);
1241 } else { 1617 } else {
1242 /* already taking dcache_lock, so d_add() by hand */ 1618 /* already taking inode->i_lock, so d_add() by hand */
1243 __d_instantiate(dentry, inode); 1619 __d_instantiate(dentry, inode);
1244 spin_unlock(&dcache_lock); 1620 spin_unlock(&inode->i_lock);
1245 security_d_instantiate(dentry, inode); 1621 security_d_instantiate(dentry, inode);
1246 d_rehash(dentry); 1622 d_rehash(dentry);
1247 } 1623 }
@@ -1314,10 +1690,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1314 * Negative dentry: instantiate it unless the inode is a directory and 1690 * Negative dentry: instantiate it unless the inode is a directory and
1315 * already has a dentry. 1691 * already has a dentry.
1316 */ 1692 */
1317 spin_lock(&dcache_lock); 1693 spin_lock(&inode->i_lock);
1318 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1694 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1319 __d_instantiate(found, inode); 1695 __d_instantiate(found, inode);
1320 spin_unlock(&dcache_lock); 1696 spin_unlock(&inode->i_lock);
1321 security_d_instantiate(found, inode); 1697 security_d_instantiate(found, inode);
1322 return found; 1698 return found;
1323 } 1699 }
@@ -1327,8 +1703,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1327 * reference to it, move it in place and use it. 1703 * reference to it, move it in place and use it.
1328 */ 1704 */
1329 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1705 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1330 dget_locked(new); 1706 __dget(new);
1331 spin_unlock(&dcache_lock); 1707 spin_unlock(&inode->i_lock);
1332 security_d_instantiate(found, inode); 1708 security_d_instantiate(found, inode);
1333 d_move(new, found); 1709 d_move(new, found);
1334 iput(inode); 1710 iput(inode);
@@ -1342,6 +1718,112 @@ err_out:
1342EXPORT_SYMBOL(d_add_ci); 1718EXPORT_SYMBOL(d_add_ci);
1343 1719
1344/** 1720/**
1721 * __d_lookup_rcu - search for a dentry (racy, store-free)
1722 * @parent: parent dentry
1723 * @name: qstr of name we wish to find
1724 * @seq: returns d_seq value at the point where the dentry was found
1725 * @inode: returns dentry->d_inode when the inode was found valid.
1726 * Returns: dentry, or NULL
1727 *
1728 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1729 * resolution (store-free path walking) design described in
1730 * Documentation/filesystems/path-lookup.txt.
1731 *
1732 * This is not to be used outside core vfs.
1733 *
1734 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1735 * held, and rcu_read_lock held. The returned dentry must not be stored into
1736 * without taking d_lock and checking d_seq sequence count against @seq
1737 * returned here.
1738 *
1739 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1740 * function.
1741 *
1742 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1743 * the returned dentry, so long as its parent's seqlock is checked after the
1744 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1745 * is formed, giving integrity down the path walk.
1746 */
1747struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 unsigned *seq, struct inode **inode)
1749{
1750 unsigned int len = name->len;
1751 unsigned int hash = name->hash;
1752 const unsigned char *str = name->name;
1753 struct dcache_hash_bucket *b = d_hash(parent, hash);
1754 struct hlist_bl_node *node;
1755 struct dentry *dentry;
1756
1757 /*
1758 * Note: There is significant duplication with __d_lookup_rcu which is
1759 * required to prevent single threaded performance regressions
1760 * especially on architectures where smp_rmb (in seqcounts) are costly.
1761 * Keep the two functions in sync.
1762 */
1763
1764 /*
1765 * The hash list is protected using RCU.
1766 *
1767 * Carefully use d_seq when comparing a candidate dentry, to avoid
1768 * races with d_move().
1769 *
1770 * It is possible that concurrent renames can mess up our list
1771 * walk here and result in missing our dentry, resulting in the
1772 * false-negative result. d_lookup() protects against concurrent
1773 * renames using rename_lock seqlock.
1774 *
1775 * See Documentation/vfs/dcache-locking.txt for more details.
1776 */
1777 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1778 struct inode *i;
1779 const char *tname;
1780 int tlen;
1781
1782 if (dentry->d_name.hash != hash)
1783 continue;
1784
1785seqretry:
1786 *seq = read_seqcount_begin(&dentry->d_seq);
1787 if (dentry->d_parent != parent)
1788 continue;
1789 if (d_unhashed(dentry))
1790 continue;
1791 tlen = dentry->d_name.len;
1792 tname = dentry->d_name.name;
1793 i = dentry->d_inode;
1794 prefetch(tname);
1795 if (i)
1796 prefetch(i);
1797 /*
1798 * This seqcount check is required to ensure name and
1799 * len are loaded atomically, so as not to walk off the
1800 * edge of memory when walking. If we could load this
1801 * atomically some other way, we could drop this check.
1802 */
1803 if (read_seqcount_retry(&dentry->d_seq, *seq))
1804 goto seqretry;
1805 if (parent->d_flags & DCACHE_OP_COMPARE) {
1806 if (parent->d_op->d_compare(parent, *inode,
1807 dentry, i,
1808 tlen, tname, name))
1809 continue;
1810 } else {
1811 if (dentry_cmp(tname, tlen, str, len))
1812 continue;
1813 }
1814 /*
1815 * No extra seqcount check is required after the name
1816 * compare. The caller must perform a seqcount check in
1817 * order to do anything useful with the returned dentry
1818 * anyway.
1819 */
1820 *inode = i;
1821 return dentry;
1822 }
1823 return NULL;
1824}
1825
1826/**
1345 * d_lookup - search for a dentry 1827 * d_lookup - search for a dentry
1346 * @parent: parent dentry 1828 * @parent: parent dentry
1347 * @name: qstr of name we wish to find 1829 * @name: qstr of name we wish to find
@@ -1352,10 +1834,10 @@ EXPORT_SYMBOL(d_add_ci);
1352 * dentry is returned. The caller must use dput to free the entry when it has 1834 * dentry is returned. The caller must use dput to free the entry when it has
1353 * finished using it. %NULL is returned if the dentry does not exist. 1835 * finished using it. %NULL is returned if the dentry does not exist.
1354 */ 1836 */
1355struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1837struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1356{ 1838{
1357 struct dentry * dentry = NULL; 1839 struct dentry *dentry;
1358 unsigned long seq; 1840 unsigned seq;
1359 1841
1360 do { 1842 do {
1361 seq = read_seqbegin(&rename_lock); 1843 seq = read_seqbegin(&rename_lock);
@@ -1367,7 +1849,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1367} 1849}
1368EXPORT_SYMBOL(d_lookup); 1850EXPORT_SYMBOL(d_lookup);
1369 1851
1370/* 1852/**
1371 * __d_lookup - search for a dentry (racy) 1853 * __d_lookup - search for a dentry (racy)
1372 * @parent: parent dentry 1854 * @parent: parent dentry
1373 * @name: qstr of name we wish to find 1855 * @name: qstr of name we wish to find
@@ -1382,17 +1864,24 @@ EXPORT_SYMBOL(d_lookup);
1382 * 1864 *
1383 * __d_lookup callers must be commented. 1865 * __d_lookup callers must be commented.
1384 */ 1866 */
1385struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1867struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1386{ 1868{
1387 unsigned int len = name->len; 1869 unsigned int len = name->len;
1388 unsigned int hash = name->hash; 1870 unsigned int hash = name->hash;
1389 const unsigned char *str = name->name; 1871 const unsigned char *str = name->name;
1390 struct hlist_head *head = d_hash(parent,hash); 1872 struct dcache_hash_bucket *b = d_hash(parent, hash);
1873 struct hlist_bl_node *node;
1391 struct dentry *found = NULL; 1874 struct dentry *found = NULL;
1392 struct hlist_node *node;
1393 struct dentry *dentry; 1875 struct dentry *dentry;
1394 1876
1395 /* 1877 /*
1878 * Note: There is significant duplication with __d_lookup_rcu which is
1879 * required to prevent single threaded performance regressions
1880 * especially on architectures where smp_rmb (in seqcounts) are costly.
1881 * Keep the two functions in sync.
1882 */
1883
1884 /*
1396 * The hash list is protected using RCU. 1885 * The hash list is protected using RCU.
1397 * 1886 *
1398 * Take d_lock when comparing a candidate dentry, to avoid races 1887 * Take d_lock when comparing a candidate dentry, to avoid races
@@ -1407,25 +1896,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1407 */ 1896 */
1408 rcu_read_lock(); 1897 rcu_read_lock();
1409 1898
1410 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1899 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1411 struct qstr *qstr; 1900 const char *tname;
1901 int tlen;
1412 1902
1413 if (dentry->d_name.hash != hash) 1903 if (dentry->d_name.hash != hash)
1414 continue; 1904 continue;
1415 if (dentry->d_parent != parent)
1416 continue;
1417 1905
1418 spin_lock(&dentry->d_lock); 1906 spin_lock(&dentry->d_lock);
1419
1420 /*
1421 * Recheck the dentry after taking the lock - d_move may have
1422 * changed things. Don't bother checking the hash because
1423 * we're about to compare the whole name anyway.
1424 */
1425 if (dentry->d_parent != parent) 1907 if (dentry->d_parent != parent)
1426 goto next; 1908 goto next;
1427
1428 /* non-existing due to RCU? */
1429 if (d_unhashed(dentry)) 1909 if (d_unhashed(dentry))
1430 goto next; 1910 goto next;
1431 1911
@@ -1433,18 +1913,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1433 * It is safe to compare names since d_move() cannot 1913 * It is safe to compare names since d_move() cannot
1434 * change the qstr (protected by d_lock). 1914 * change the qstr (protected by d_lock).
1435 */ 1915 */
1436 qstr = &dentry->d_name; 1916 tlen = dentry->d_name.len;
1437 if (parent->d_op && parent->d_op->d_compare) { 1917 tname = dentry->d_name.name;
1438 if (parent->d_op->d_compare(parent, qstr, name)) 1918 if (parent->d_flags & DCACHE_OP_COMPARE) {
1919 if (parent->d_op->d_compare(parent, parent->d_inode,
1920 dentry, dentry->d_inode,
1921 tlen, tname, name))
1439 goto next; 1922 goto next;
1440 } else { 1923 } else {
1441 if (qstr->len != len) 1924 if (dentry_cmp(tname, tlen, str, len))
1442 goto next;
1443 if (memcmp(qstr->name, str, len))
1444 goto next; 1925 goto next;
1445 } 1926 }
1446 1927
1447 atomic_inc(&dentry->d_count); 1928 dentry->d_count++;
1448 found = dentry; 1929 found = dentry;
1449 spin_unlock(&dentry->d_lock); 1930 spin_unlock(&dentry->d_lock);
1450 break; 1931 break;
@@ -1473,8 +1954,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1473 * routine may choose to leave the hash value unchanged. 1954 * routine may choose to leave the hash value unchanged.
1474 */ 1955 */
1475 name->hash = full_name_hash(name->name, name->len); 1956 name->hash = full_name_hash(name->name, name->len);
1476 if (dir->d_op && dir->d_op->d_hash) { 1957 if (dir->d_flags & DCACHE_OP_HASH) {
1477 if (dir->d_op->d_hash(dir, name) < 0) 1958 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1478 goto out; 1959 goto out;
1479 } 1960 }
1480 dentry = d_lookup(dir, name); 1961 dentry = d_lookup(dir, name);
@@ -1483,34 +1964,32 @@ out:
1483} 1964}
1484 1965
1485/** 1966/**
1486 * d_validate - verify dentry provided from insecure source 1967 * d_validate - verify dentry provided from insecure source (deprecated)
1487 * @dentry: The dentry alleged to be valid child of @dparent 1968 * @dentry: The dentry alleged to be valid child of @dparent
1488 * @dparent: The parent dentry (known to be valid) 1969 * @dparent: The parent dentry (known to be valid)
1489 * 1970 *
1490 * An insecure source has sent us a dentry, here we verify it and dget() it. 1971 * An insecure source has sent us a dentry, here we verify it and dget() it.
1491 * This is used by ncpfs in its readdir implementation. 1972 * This is used by ncpfs in its readdir implementation.
1492 * Zero is returned in the dentry is invalid. 1973 * Zero is returned in the dentry is invalid.
1974 *
1975 * This function is slow for big directories, and deprecated, do not use it.
1493 */ 1976 */
1494int d_validate(struct dentry *dentry, struct dentry *parent) 1977int d_validate(struct dentry *dentry, struct dentry *dparent)
1495{ 1978{
1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash); 1979 struct dentry *child;
1497 struct hlist_node *node;
1498 struct dentry *d;
1499
1500 /* Check whether the ptr might be valid at all.. */
1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1502 return 0;
1503 if (dentry->d_parent != parent)
1504 return 0;
1505 1980
1506 rcu_read_lock(); 1981 spin_lock(&dparent->d_lock);
1507 hlist_for_each_entry_rcu(d, node, head, d_hash) { 1982 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1508 if (d == dentry) { 1983 if (dentry == child) {
1509 dget(dentry); 1984 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1985 __dget_dlock(dentry);
1986 spin_unlock(&dentry->d_lock);
1987 spin_unlock(&dparent->d_lock);
1510 return 1; 1988 return 1;
1511 } 1989 }
1512 } 1990 }
1513 rcu_read_unlock(); 1991 spin_unlock(&dparent->d_lock);
1992
1514 return 0; 1993 return 0;
1515} 1994}
1516EXPORT_SYMBOL(d_validate); 1995EXPORT_SYMBOL(d_validate);
@@ -1538,16 +2017,23 @@ EXPORT_SYMBOL(d_validate);
1538 2017
1539void d_delete(struct dentry * dentry) 2018void d_delete(struct dentry * dentry)
1540{ 2019{
2020 struct inode *inode;
1541 int isdir = 0; 2021 int isdir = 0;
1542 /* 2022 /*
1543 * Are we the only user? 2023 * Are we the only user?
1544 */ 2024 */
1545 spin_lock(&dcache_lock); 2025again:
1546 spin_lock(&dentry->d_lock); 2026 spin_lock(&dentry->d_lock);
1547 isdir = S_ISDIR(dentry->d_inode->i_mode); 2027 inode = dentry->d_inode;
1548 if (atomic_read(&dentry->d_count) == 1) { 2028 isdir = S_ISDIR(inode->i_mode);
2029 if (dentry->d_count == 1) {
2030 if (inode && !spin_trylock(&inode->i_lock)) {
2031 spin_unlock(&dentry->d_lock);
2032 cpu_relax();
2033 goto again;
2034 }
1549 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2035 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1550 dentry_iput(dentry); 2036 dentry_unlink_inode(dentry);
1551 fsnotify_nameremove(dentry, isdir); 2037 fsnotify_nameremove(dentry, isdir);
1552 return; 2038 return;
1553 } 2039 }
@@ -1556,17 +2042,18 @@ void d_delete(struct dentry * dentry)
1556 __d_drop(dentry); 2042 __d_drop(dentry);
1557 2043
1558 spin_unlock(&dentry->d_lock); 2044 spin_unlock(&dentry->d_lock);
1559 spin_unlock(&dcache_lock);
1560 2045
1561 fsnotify_nameremove(dentry, isdir); 2046 fsnotify_nameremove(dentry, isdir);
1562} 2047}
1563EXPORT_SYMBOL(d_delete); 2048EXPORT_SYMBOL(d_delete);
1564 2049
1565static void __d_rehash(struct dentry * entry, struct hlist_head *list) 2050static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
1566{ 2051{
1567 2052 BUG_ON(!d_unhashed(entry));
2053 spin_lock_bucket(b);
1568 entry->d_flags &= ~DCACHE_UNHASHED; 2054 entry->d_flags &= ~DCACHE_UNHASHED;
1569 hlist_add_head_rcu(&entry->d_hash, list); 2055 hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
2056 spin_unlock_bucket(b);
1570} 2057}
1571 2058
1572static void _d_rehash(struct dentry * entry) 2059static void _d_rehash(struct dentry * entry)
@@ -1583,25 +2070,39 @@ static void _d_rehash(struct dentry * entry)
1583 2070
1584void d_rehash(struct dentry * entry) 2071void d_rehash(struct dentry * entry)
1585{ 2072{
1586 spin_lock(&dcache_lock);
1587 spin_lock(&entry->d_lock); 2073 spin_lock(&entry->d_lock);
1588 _d_rehash(entry); 2074 _d_rehash(entry);
1589 spin_unlock(&entry->d_lock); 2075 spin_unlock(&entry->d_lock);
1590 spin_unlock(&dcache_lock);
1591} 2076}
1592EXPORT_SYMBOL(d_rehash); 2077EXPORT_SYMBOL(d_rehash);
1593 2078
1594/* 2079/**
1595 * When switching names, the actual string doesn't strictly have to 2080 * dentry_update_name_case - update case insensitive dentry with a new name
1596 * be preserved in the target - because we're dropping the target 2081 * @dentry: dentry to be updated
1597 * anyway. As such, we can just do a simple memcpy() to copy over 2082 * @name: new name
1598 * the new name before we switch.
1599 * 2083 *
1600 * Note that we have to be a lot more careful about getting the hash 2084 * Update a case insensitive dentry with new case of name.
1601 * switched - we have to switch the hash value properly even if it 2085 *
1602 * then no longer matches the actual (corrupted) string of the target. 2086 * dentry must have been returned by d_lookup with name @name. Old and new
1603 * The hash value has to match the hash queue that the dentry is on.. 2087 * name lengths must match (ie. no d_compare which allows mismatched name
2088 * lengths).
2089 *
2090 * Parent inode i_mutex must be held over d_lookup and into this call (to
2091 * keep renames and concurrent inserts, and readdir(2) away).
1604 */ 2092 */
2093void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2094{
2095 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2096 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2097
2098 spin_lock(&dentry->d_lock);
2099 write_seqcount_begin(&dentry->d_seq);
2100 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2101 write_seqcount_end(&dentry->d_seq);
2102 spin_unlock(&dentry->d_lock);
2103}
2104EXPORT_SYMBOL(dentry_update_name_case);
2105
1605static void switch_names(struct dentry *dentry, struct dentry *target) 2106static void switch_names(struct dentry *dentry, struct dentry *target)
1606{ 2107{
1607 if (dname_external(target)) { 2108 if (dname_external(target)) {
@@ -1643,54 +2144,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1643 swap(dentry->d_name.len, target->d_name.len); 2144 swap(dentry->d_name.len, target->d_name.len);
1644} 2145}
1645 2146
2147static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2148{
2149 /*
2150 * XXXX: do we really need to take target->d_lock?
2151 */
2152 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2153 spin_lock(&target->d_parent->d_lock);
2154 else {
2155 if (d_ancestor(dentry->d_parent, target->d_parent)) {
2156 spin_lock(&dentry->d_parent->d_lock);
2157 spin_lock_nested(&target->d_parent->d_lock,
2158 DENTRY_D_LOCK_NESTED);
2159 } else {
2160 spin_lock(&target->d_parent->d_lock);
2161 spin_lock_nested(&dentry->d_parent->d_lock,
2162 DENTRY_D_LOCK_NESTED);
2163 }
2164 }
2165 if (target < dentry) {
2166 spin_lock_nested(&target->d_lock, 2);
2167 spin_lock_nested(&dentry->d_lock, 3);
2168 } else {
2169 spin_lock_nested(&dentry->d_lock, 2);
2170 spin_lock_nested(&target->d_lock, 3);
2171 }
2172}
2173
2174static void dentry_unlock_parents_for_move(struct dentry *dentry,
2175 struct dentry *target)
2176{
2177 if (target->d_parent != dentry->d_parent)
2178 spin_unlock(&dentry->d_parent->d_lock);
2179 if (target->d_parent != target)
2180 spin_unlock(&target->d_parent->d_lock);
2181}
2182
1646/* 2183/*
1647 * We cannibalize "target" when moving dentry on top of it, 2184 * When switching names, the actual string doesn't strictly have to
1648 * because it's going to be thrown away anyway. We could be more 2185 * be preserved in the target - because we're dropping the target
1649 * polite about it, though. 2186 * anyway. As such, we can just do a simple memcpy() to copy over
1650 * 2187 * the new name before we switch.
1651 * This forceful removal will result in ugly /proc output if 2188 *
1652 * somebody holds a file open that got deleted due to a rename. 2189 * Note that we have to be a lot more careful about getting the hash
1653 * We could be nicer about the deleted file, and let it show 2190 * switched - we have to switch the hash value properly even if it
1654 * up under the name it had before it was deleted rather than 2191 * then no longer matches the actual (corrupted) string of the target.
1655 * under the original name of the file that was moved on top of it. 2192 * The hash value has to match the hash queue that the dentry is on..
1656 */ 2193 */
1657
1658/* 2194/*
1659 * d_move_locked - move a dentry 2195 * d_move - move a dentry
1660 * @dentry: entry to move 2196 * @dentry: entry to move
1661 * @target: new dentry 2197 * @target: new dentry
1662 * 2198 *
1663 * Update the dcache to reflect the move of a file name. Negative 2199 * Update the dcache to reflect the move of a file name. Negative
1664 * dcache entries should not be moved in this way. 2200 * dcache entries should not be moved in this way.
1665 */ 2201 */
1666static void d_move_locked(struct dentry * dentry, struct dentry * target) 2202void d_move(struct dentry * dentry, struct dentry * target)
1667{ 2203{
1668 struct hlist_head *list;
1669
1670 if (!dentry->d_inode) 2204 if (!dentry->d_inode)
1671 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2205 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1672 2206
2207 BUG_ON(d_ancestor(dentry, target));
2208 BUG_ON(d_ancestor(target, dentry));
2209
1673 write_seqlock(&rename_lock); 2210 write_seqlock(&rename_lock);
1674 /*
1675 * XXXX: do we really need to take target->d_lock?
1676 */
1677 if (target < dentry) {
1678 spin_lock(&target->d_lock);
1679 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1680 } else {
1681 spin_lock(&dentry->d_lock);
1682 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1683 }
1684 2211
1685 /* Move the dentry to the target hash queue, if on different bucket */ 2212 dentry_lock_for_move(dentry, target);
1686 if (d_unhashed(dentry))
1687 goto already_unhashed;
1688 2213
1689 hlist_del_rcu(&dentry->d_hash); 2214 write_seqcount_begin(&dentry->d_seq);
2215 write_seqcount_begin(&target->d_seq);
1690 2216
1691already_unhashed: 2217 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
1692 list = d_hash(target->d_parent, target->d_name.hash); 2218
1693 __d_rehash(dentry, list); 2219 /*
2220 * Move the dentry to the target hash queue. Don't bother checking
2221 * for the same hash queue because of how unlikely it is.
2222 */
2223 __d_drop(dentry);
2224 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1694 2225
1695 /* Unhash the target: dput() will then get rid of it */ 2226 /* Unhash the target: dput() will then get rid of it */
1696 __d_drop(target); 2227 __d_drop(target);
@@ -1715,27 +2246,16 @@ already_unhashed:
1715 } 2246 }
1716 2247
1717 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2248 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2249
2250 write_seqcount_end(&target->d_seq);
2251 write_seqcount_end(&dentry->d_seq);
2252
2253 dentry_unlock_parents_for_move(dentry, target);
1718 spin_unlock(&target->d_lock); 2254 spin_unlock(&target->d_lock);
1719 fsnotify_d_move(dentry); 2255 fsnotify_d_move(dentry);
1720 spin_unlock(&dentry->d_lock); 2256 spin_unlock(&dentry->d_lock);
1721 write_sequnlock(&rename_lock); 2257 write_sequnlock(&rename_lock);
1722} 2258}
1723
1724/**
1725 * d_move - move a dentry
1726 * @dentry: entry to move
1727 * @target: new dentry
1728 *
1729 * Update the dcache to reflect the move of a file name. Negative
1730 * dcache entries should not be moved in this way.
1731 */
1732
1733void d_move(struct dentry * dentry, struct dentry * target)
1734{
1735 spin_lock(&dcache_lock);
1736 d_move_locked(dentry, target);
1737 spin_unlock(&dcache_lock);
1738}
1739EXPORT_SYMBOL(d_move); 2259EXPORT_SYMBOL(d_move);
1740 2260
1741/** 2261/**
@@ -1761,13 +2281,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1761 * This helper attempts to cope with remotely renamed directories 2281 * This helper attempts to cope with remotely renamed directories
1762 * 2282 *
1763 * It assumes that the caller is already holding 2283 * It assumes that the caller is already holding
1764 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 2284 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
1765 * 2285 *
1766 * Note: If ever the locking in lock_rename() changes, then please 2286 * Note: If ever the locking in lock_rename() changes, then please
1767 * remember to update this too... 2287 * remember to update this too...
1768 */ 2288 */
1769static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 2289static struct dentry *__d_unalias(struct inode *inode,
1770 __releases(dcache_lock) 2290 struct dentry *dentry, struct dentry *alias)
1771{ 2291{
1772 struct mutex *m1 = NULL, *m2 = NULL; 2292 struct mutex *m1 = NULL, *m2 = NULL;
1773 struct dentry *ret; 2293 struct dentry *ret;
@@ -1790,10 +2310,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1790 goto out_err; 2310 goto out_err;
1791 m2 = &alias->d_parent->d_inode->i_mutex; 2311 m2 = &alias->d_parent->d_inode->i_mutex;
1792out_unalias: 2312out_unalias:
1793 d_move_locked(alias, dentry); 2313 d_move(alias, dentry);
1794 ret = alias; 2314 ret = alias;
1795out_err: 2315out_err:
1796 spin_unlock(&dcache_lock); 2316 spin_unlock(&inode->i_lock);
1797 if (m2) 2317 if (m2)
1798 mutex_unlock(m2); 2318 mutex_unlock(m2);
1799 if (m1) 2319 if (m1)
@@ -1804,17 +2324,23 @@ out_err:
1804/* 2324/*
1805 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2325 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1806 * named dentry in place of the dentry to be replaced. 2326 * named dentry in place of the dentry to be replaced.
2327 * returns with anon->d_lock held!
1807 */ 2328 */
1808static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2329static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1809{ 2330{
1810 struct dentry *dparent, *aparent; 2331 struct dentry *dparent, *aparent;
1811 2332
1812 switch_names(dentry, anon); 2333 dentry_lock_for_move(anon, dentry);
1813 swap(dentry->d_name.hash, anon->d_name.hash); 2334
2335 write_seqcount_begin(&dentry->d_seq);
2336 write_seqcount_begin(&anon->d_seq);
1814 2337
1815 dparent = dentry->d_parent; 2338 dparent = dentry->d_parent;
1816 aparent = anon->d_parent; 2339 aparent = anon->d_parent;
1817 2340
2341 switch_names(dentry, anon);
2342 swap(dentry->d_name.hash, anon->d_name.hash);
2343
1818 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2344 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1819 list_del(&dentry->d_u.d_child); 2345 list_del(&dentry->d_u.d_child);
1820 if (!IS_ROOT(dentry)) 2346 if (!IS_ROOT(dentry))
@@ -1829,6 +2355,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1829 else 2355 else
1830 INIT_LIST_HEAD(&anon->d_u.d_child); 2356 INIT_LIST_HEAD(&anon->d_u.d_child);
1831 2357
2358 write_seqcount_end(&dentry->d_seq);
2359 write_seqcount_end(&anon->d_seq);
2360
2361 dentry_unlock_parents_for_move(anon, dentry);
2362 spin_unlock(&dentry->d_lock);
2363
2364 /* anon->d_lock still locked, returns locked */
1832 anon->d_flags &= ~DCACHE_DISCONNECTED; 2365 anon->d_flags &= ~DCACHE_DISCONNECTED;
1833} 2366}
1834 2367
@@ -1846,14 +2379,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1846 2379
1847 BUG_ON(!d_unhashed(dentry)); 2380 BUG_ON(!d_unhashed(dentry));
1848 2381
1849 spin_lock(&dcache_lock);
1850
1851 if (!inode) { 2382 if (!inode) {
1852 actual = dentry; 2383 actual = dentry;
1853 __d_instantiate(dentry, NULL); 2384 __d_instantiate(dentry, NULL);
1854 goto found_lock; 2385 d_rehash(actual);
2386 goto out_nolock;
1855 } 2387 }
1856 2388
2389 spin_lock(&inode->i_lock);
2390
1857 if (S_ISDIR(inode->i_mode)) { 2391 if (S_ISDIR(inode->i_mode)) {
1858 struct dentry *alias; 2392 struct dentry *alias;
1859 2393
@@ -1864,13 +2398,12 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1864 /* Is this an anonymous mountpoint that we could splice 2398 /* Is this an anonymous mountpoint that we could splice
1865 * into our tree? */ 2399 * into our tree? */
1866 if (IS_ROOT(alias)) { 2400 if (IS_ROOT(alias)) {
1867 spin_lock(&alias->d_lock);
1868 __d_materialise_dentry(dentry, alias); 2401 __d_materialise_dentry(dentry, alias);
1869 __d_drop(alias); 2402 __d_drop(alias);
1870 goto found; 2403 goto found;
1871 } 2404 }
1872 /* Nope, but we must(!) avoid directory aliasing */ 2405 /* Nope, but we must(!) avoid directory aliasing */
1873 actual = __d_unalias(dentry, alias); 2406 actual = __d_unalias(inode, dentry, alias);
1874 if (IS_ERR(actual)) 2407 if (IS_ERR(actual))
1875 dput(alias); 2408 dput(alias);
1876 goto out_nolock; 2409 goto out_nolock;
@@ -1881,15 +2414,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1881 actual = __d_instantiate_unique(dentry, inode); 2414 actual = __d_instantiate_unique(dentry, inode);
1882 if (!actual) 2415 if (!actual)
1883 actual = dentry; 2416 actual = dentry;
1884 else if (unlikely(!d_unhashed(actual))) 2417 else
1885 goto shouldnt_be_hashed; 2418 BUG_ON(!d_unhashed(actual));
1886 2419
1887found_lock:
1888 spin_lock(&actual->d_lock); 2420 spin_lock(&actual->d_lock);
1889found: 2421found:
1890 _d_rehash(actual); 2422 _d_rehash(actual);
1891 spin_unlock(&actual->d_lock); 2423 spin_unlock(&actual->d_lock);
1892 spin_unlock(&dcache_lock); 2424 spin_unlock(&inode->i_lock);
1893out_nolock: 2425out_nolock:
1894 if (actual == dentry) { 2426 if (actual == dentry) {
1895 security_d_instantiate(dentry, inode); 2427 security_d_instantiate(dentry, inode);
@@ -1898,10 +2430,6 @@ out_nolock:
1898 2430
1899 iput(inode); 2431 iput(inode);
1900 return actual; 2432 return actual;
1901
1902shouldnt_be_hashed:
1903 spin_unlock(&dcache_lock);
1904 BUG();
1905} 2433}
1906EXPORT_SYMBOL_GPL(d_materialise_unique); 2434EXPORT_SYMBOL_GPL(d_materialise_unique);
1907 2435
@@ -1928,7 +2456,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1928 * @buffer: pointer to the end of the buffer 2456 * @buffer: pointer to the end of the buffer
1929 * @buflen: pointer to buffer length 2457 * @buflen: pointer to buffer length
1930 * 2458 *
1931 * Caller holds the dcache_lock. 2459 * Caller holds the rename_lock.
1932 * 2460 *
1933 * If path is not reachable from the supplied root, then the value of 2461 * If path is not reachable from the supplied root, then the value of
1934 * root is changed (without modifying refcounts). 2462 * root is changed (without modifying refcounts).
@@ -1956,7 +2484,9 @@ static int prepend_path(const struct path *path, struct path *root,
1956 } 2484 }
1957 parent = dentry->d_parent; 2485 parent = dentry->d_parent;
1958 prefetch(parent); 2486 prefetch(parent);
2487 spin_lock(&dentry->d_lock);
1959 error = prepend_name(buffer, buflen, &dentry->d_name); 2488 error = prepend_name(buffer, buflen, &dentry->d_name);
2489 spin_unlock(&dentry->d_lock);
1960 if (!error) 2490 if (!error)
1961 error = prepend(buffer, buflen, "/", 1); 2491 error = prepend(buffer, buflen, "/", 1);
1962 if (error) 2492 if (error)
@@ -2012,9 +2542,9 @@ char *__d_path(const struct path *path, struct path *root,
2012 int error; 2542 int error;
2013 2543
2014 prepend(&res, &buflen, "\0", 1); 2544 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock); 2545 write_seqlock(&rename_lock);
2016 error = prepend_path(path, root, &res, &buflen); 2546 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock); 2547 write_sequnlock(&rename_lock);
2018 2548
2019 if (error) 2549 if (error)
2020 return ERR_PTR(error); 2550 return ERR_PTR(error);
@@ -2076,12 +2606,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
2076 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2606 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2077 2607
2078 get_fs_root(current->fs, &root); 2608 get_fs_root(current->fs, &root);
2079 spin_lock(&dcache_lock); 2609 write_seqlock(&rename_lock);
2080 tmp = root; 2610 tmp = root;
2081 error = path_with_deleted(path, &tmp, &res, &buflen); 2611 error = path_with_deleted(path, &tmp, &res, &buflen);
2082 if (error) 2612 if (error)
2083 res = ERR_PTR(error); 2613 res = ERR_PTR(error);
2084 spin_unlock(&dcache_lock); 2614 write_sequnlock(&rename_lock);
2085 path_put(&root); 2615 path_put(&root);
2086 return res; 2616 return res;
2087} 2617}
@@ -2107,12 +2637,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2107 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2637 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2108 2638
2109 get_fs_root(current->fs, &root); 2639 get_fs_root(current->fs, &root);
2110 spin_lock(&dcache_lock); 2640 write_seqlock(&rename_lock);
2111 tmp = root; 2641 tmp = root;
2112 error = path_with_deleted(path, &tmp, &res, &buflen); 2642 error = path_with_deleted(path, &tmp, &res, &buflen);
2113 if (!error && !path_equal(&tmp, &root)) 2643 if (!error && !path_equal(&tmp, &root))
2114 error = prepend_unreachable(&res, &buflen); 2644 error = prepend_unreachable(&res, &buflen);
2115 spin_unlock(&dcache_lock); 2645 write_sequnlock(&rename_lock);
2116 path_put(&root); 2646 path_put(&root);
2117 if (error) 2647 if (error)
2118 res = ERR_PTR(error); 2648 res = ERR_PTR(error);
@@ -2144,7 +2674,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2144/* 2674/*
2145 * Write full pathname from the root of the filesystem into the buffer. 2675 * Write full pathname from the root of the filesystem into the buffer.
2146 */ 2676 */
2147char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2677static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2148{ 2678{
2149 char *end = buf + buflen; 2679 char *end = buf + buflen;
2150 char *retval; 2680 char *retval;
@@ -2158,10 +2688,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2158 2688
2159 while (!IS_ROOT(dentry)) { 2689 while (!IS_ROOT(dentry)) {
2160 struct dentry *parent = dentry->d_parent; 2690 struct dentry *parent = dentry->d_parent;
2691 int error;
2161 2692
2162 prefetch(parent); 2693 prefetch(parent);
2163 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 2694 spin_lock(&dentry->d_lock);
2164 (prepend(&end, &buflen, "/", 1) != 0)) 2695 error = prepend_name(&end, &buflen, &dentry->d_name);
2696 spin_unlock(&dentry->d_lock);
2697 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2165 goto Elong; 2698 goto Elong;
2166 2699
2167 retval = end; 2700 retval = end;
@@ -2171,14 +2704,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2171Elong: 2704Elong:
2172 return ERR_PTR(-ENAMETOOLONG); 2705 return ERR_PTR(-ENAMETOOLONG);
2173} 2706}
2174EXPORT_SYMBOL(__dentry_path); 2707
2708char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2709{
2710 char *retval;
2711
2712 write_seqlock(&rename_lock);
2713 retval = __dentry_path(dentry, buf, buflen);
2714 write_sequnlock(&rename_lock);
2715
2716 return retval;
2717}
2718EXPORT_SYMBOL(dentry_path_raw);
2175 2719
2176char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2720char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2177{ 2721{
2178 char *p = NULL; 2722 char *p = NULL;
2179 char *retval; 2723 char *retval;
2180 2724
2181 spin_lock(&dcache_lock); 2725 write_seqlock(&rename_lock);
2182 if (d_unlinked(dentry)) { 2726 if (d_unlinked(dentry)) {
2183 p = buf + buflen; 2727 p = buf + buflen;
2184 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2728 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2186,12 +2730,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2186 buflen++; 2730 buflen++;
2187 } 2731 }
2188 retval = __dentry_path(dentry, buf, buflen); 2732 retval = __dentry_path(dentry, buf, buflen);
2189 spin_unlock(&dcache_lock); 2733 write_sequnlock(&rename_lock);
2190 if (!IS_ERR(retval) && p) 2734 if (!IS_ERR(retval) && p)
2191 *p = '/'; /* restore '/' overriden with '\0' */ 2735 *p = '/'; /* restore '/' overriden with '\0' */
2192 return retval; 2736 return retval;
2193Elong: 2737Elong:
2194 spin_unlock(&dcache_lock);
2195 return ERR_PTR(-ENAMETOOLONG); 2738 return ERR_PTR(-ENAMETOOLONG);
2196} 2739}
2197 2740
@@ -2225,7 +2768,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2225 get_fs_root_and_pwd(current->fs, &root, &pwd); 2768 get_fs_root_and_pwd(current->fs, &root, &pwd);
2226 2769
2227 error = -ENOENT; 2770 error = -ENOENT;
2228 spin_lock(&dcache_lock); 2771 write_seqlock(&rename_lock);
2229 if (!d_unlinked(pwd.dentry)) { 2772 if (!d_unlinked(pwd.dentry)) {
2230 unsigned long len; 2773 unsigned long len;
2231 struct path tmp = root; 2774 struct path tmp = root;
@@ -2234,7 +2777,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2234 2777
2235 prepend(&cwd, &buflen, "\0", 1); 2778 prepend(&cwd, &buflen, "\0", 1);
2236 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2779 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2237 spin_unlock(&dcache_lock); 2780 write_sequnlock(&rename_lock);
2238 2781
2239 if (error) 2782 if (error)
2240 goto out; 2783 goto out;
@@ -2253,8 +2796,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2253 if (copy_to_user(buf, cwd, len)) 2796 if (copy_to_user(buf, cwd, len))
2254 error = -EFAULT; 2797 error = -EFAULT;
2255 } 2798 }
2256 } else 2799 } else {
2257 spin_unlock(&dcache_lock); 2800 write_sequnlock(&rename_lock);
2801 }
2258 2802
2259out: 2803out:
2260 path_put(&pwd); 2804 path_put(&pwd);
@@ -2282,25 +2826,25 @@ out:
2282int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2826int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2283{ 2827{
2284 int result; 2828 int result;
2285 unsigned long seq; 2829 unsigned seq;
2286 2830
2287 if (new_dentry == old_dentry) 2831 if (new_dentry == old_dentry)
2288 return 1; 2832 return 1;
2289 2833
2290 /*
2291 * Need rcu_readlock to protect against the d_parent trashing
2292 * due to d_move
2293 */
2294 rcu_read_lock();
2295 do { 2834 do {
2296 /* for restarting inner loop in case of seq retry */ 2835 /* for restarting inner loop in case of seq retry */
2297 seq = read_seqbegin(&rename_lock); 2836 seq = read_seqbegin(&rename_lock);
2837 /*
2838 * Need rcu_readlock to protect against the d_parent trashing
2839 * due to d_move
2840 */
2841 rcu_read_lock();
2298 if (d_ancestor(old_dentry, new_dentry)) 2842 if (d_ancestor(old_dentry, new_dentry))
2299 result = 1; 2843 result = 1;
2300 else 2844 else
2301 result = 0; 2845 result = 0;
2846 rcu_read_unlock();
2302 } while (read_seqretry(&rename_lock, seq)); 2847 } while (read_seqretry(&rename_lock, seq));
2303 rcu_read_unlock();
2304 2848
2305 return result; 2849 return result;
2306} 2850}
@@ -2332,10 +2876,15 @@ EXPORT_SYMBOL(path_is_under);
2332 2876
2333void d_genocide(struct dentry *root) 2877void d_genocide(struct dentry *root)
2334{ 2878{
2335 struct dentry *this_parent = root; 2879 struct dentry *this_parent;
2336 struct list_head *next; 2880 struct list_head *next;
2881 unsigned seq;
2882 int locked = 0;
2337 2883
2338 spin_lock(&dcache_lock); 2884 seq = read_seqbegin(&rename_lock);
2885again:
2886 this_parent = root;
2887 spin_lock(&this_parent->d_lock);
2339repeat: 2888repeat:
2340 next = this_parent->d_subdirs.next; 2889 next = this_parent->d_subdirs.next;
2341resume: 2890resume:
@@ -2343,21 +2892,62 @@ resume:
2343 struct list_head *tmp = next; 2892 struct list_head *tmp = next;
2344 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2893 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2345 next = tmp->next; 2894 next = tmp->next;
2346 if (d_unhashed(dentry)||!dentry->d_inode) 2895
2896 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2897 if (d_unhashed(dentry) || !dentry->d_inode) {
2898 spin_unlock(&dentry->d_lock);
2347 continue; 2899 continue;
2900 }
2348 if (!list_empty(&dentry->d_subdirs)) { 2901 if (!list_empty(&dentry->d_subdirs)) {
2902 spin_unlock(&this_parent->d_lock);
2903 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2349 this_parent = dentry; 2904 this_parent = dentry;
2905 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2350 goto repeat; 2906 goto repeat;
2351 } 2907 }
2352 atomic_dec(&dentry->d_count); 2908 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2909 dentry->d_flags |= DCACHE_GENOCIDE;
2910 dentry->d_count--;
2911 }
2912 spin_unlock(&dentry->d_lock);
2353 } 2913 }
2354 if (this_parent != root) { 2914 if (this_parent != root) {
2355 next = this_parent->d_u.d_child.next; 2915 struct dentry *tmp;
2356 atomic_dec(&this_parent->d_count); 2916 struct dentry *child;
2357 this_parent = this_parent->d_parent; 2917
2918 tmp = this_parent->d_parent;
2919 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2920 this_parent->d_flags |= DCACHE_GENOCIDE;
2921 this_parent->d_count--;
2922 }
2923 rcu_read_lock();
2924 spin_unlock(&this_parent->d_lock);
2925 child = this_parent;
2926 this_parent = tmp;
2927 spin_lock(&this_parent->d_lock);
2928 /* might go back up the wrong parent if we have had a rename
2929 * or deletion */
2930 if (this_parent != child->d_parent ||
2931 (!locked && read_seqretry(&rename_lock, seq))) {
2932 spin_unlock(&this_parent->d_lock);
2933 rcu_read_unlock();
2934 goto rename_retry;
2935 }
2936 rcu_read_unlock();
2937 next = child->d_u.d_child.next;
2358 goto resume; 2938 goto resume;
2359 } 2939 }
2360 spin_unlock(&dcache_lock); 2940 spin_unlock(&this_parent->d_lock);
2941 if (!locked && read_seqretry(&rename_lock, seq))
2942 goto rename_retry;
2943 if (locked)
2944 write_sequnlock(&rename_lock);
2945 return;
2946
2947rename_retry:
2948 locked = 1;
2949 write_seqlock(&rename_lock);
2950 goto again;
2361} 2951}
2362 2952
2363/** 2953/**
@@ -2411,7 +3001,7 @@ static void __init dcache_init_early(void)
2411 3001
2412 dentry_hashtable = 3002 dentry_hashtable =
2413 alloc_large_system_hash("Dentry cache", 3003 alloc_large_system_hash("Dentry cache",
2414 sizeof(struct hlist_head), 3004 sizeof(struct dcache_hash_bucket),
2415 dhash_entries, 3005 dhash_entries,
2416 13, 3006 13,
2417 HASH_EARLY, 3007 HASH_EARLY,
@@ -2420,16 +3010,13 @@ static void __init dcache_init_early(void)
2420 0); 3010 0);
2421 3011
2422 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3012 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2423 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3013 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2424} 3014}
2425 3015
2426static void __init dcache_init(void) 3016static void __init dcache_init(void)
2427{ 3017{
2428 int loop; 3018 int loop;
2429 3019
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2433 /* 3020 /*
2434 * A constructor could be added for stable state like the lists, 3021 * A constructor could be added for stable state like the lists,
2435 * but it is probably not worth it because of the cache nature 3022 * but it is probably not worth it because of the cache nature
@@ -2446,7 +3033,7 @@ static void __init dcache_init(void)
2446 3033
2447 dentry_hashtable = 3034 dentry_hashtable =
2448 alloc_large_system_hash("Dentry cache", 3035 alloc_large_system_hash("Dentry cache",
2449 sizeof(struct hlist_head), 3036 sizeof(struct dcache_hash_bucket),
2450 dhash_entries, 3037 dhash_entries,
2451 13, 3038 13,
2452 0, 3039 0,
@@ -2455,7 +3042,7 @@ static void __init dcache_init(void)
2455 0); 3042 0);
2456 3043
2457 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3044 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2458 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3045 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2459} 3046}
2460 3047
2461/* SLAB cache for __getname() consumers */ 3048/* SLAB cache for __getname() consumers */
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803f7f79..6fc4f319b550 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,12 +44,17 @@
44 */ 44 */
45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU)
54 return -ECHILD;
55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
53 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
54 goto out; 59 goto out;
55 dentry_save = nd->path.dentry; 60 dentry_save = nd->path.dentry;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 9d1a22d62765..337352a94751 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
260 ecryptfs_dentry->d_parent)); 260 ecryptfs_dentry->d_parent));
261 lower_inode = lower_dentry->d_inode; 261 lower_inode = lower_dentry->d_inode;
262 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); 262 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
263 BUG_ON(!atomic_read(&lower_dentry->d_count)); 263 BUG_ON(!lower_dentry->d_count);
264 ecryptfs_set_dentry_private(ecryptfs_dentry, 264 ecryptfs_set_dentry_private(ecryptfs_dentry,
265 kmem_cache_alloc(ecryptfs_dentry_info_cache, 265 kmem_cache_alloc(ecryptfs_dentry_info_cache,
266 GFP_KERNEL)); 266 GFP_KERNEL));
@@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
441 struct qstr lower_name; 441 struct qstr lower_name;
442 int rc = 0; 442 int rc = 0;
443 443
444 ecryptfs_dentry->d_op = &ecryptfs_dops; 444 d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
445 if ((ecryptfs_dentry->d_name.len == 1 445 if ((ecryptfs_dentry->d_name.len == 1
446 && !strcmp(ecryptfs_dentry->d_name.name, ".")) 446 && !strcmp(ecryptfs_dentry->d_name.name, "."))
447 || (ecryptfs_dentry->d_name.len == 2 447 || (ecryptfs_dentry->d_name.len == 2
@@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
454 lower_name.hash = ecryptfs_dentry->d_name.hash; 454 lower_name.hash = ecryptfs_dentry->d_name.hash;
455 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 455 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
456 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 456 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
457 &lower_name); 457 lower_dir_dentry->d_inode, &lower_name);
458 if (rc < 0) 458 if (rc < 0)
459 goto out_d_drop; 459 goto out_d_drop;
460 } 460 }
@@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
489 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 489 lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
490 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 490 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
491 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 491 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
492 &lower_name); 492 lower_dir_dentry->d_inode, &lower_name);
493 if (rc < 0) 493 if (rc < 0)
494 goto out_d_drop; 494 goto out_d_drop;
495 } 495 }
@@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
980} 980}
981 981
982static int 982static int
983ecryptfs_permission(struct inode *inode, int mask) 983ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
984{ 984{
985 if (flags & IPERM_FLAG_RCU)
986 return -ECHILD;
985 return inode_permission(ecryptfs_inode_to_lower(inode), mask); 987 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
986} 988}
987 989
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a9dbd62518e6..351038675376 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
189 if (special_file(lower_inode->i_mode)) 189 if (special_file(lower_inode->i_mode))
190 init_special_inode(inode, lower_inode->i_mode, 190 init_special_inode(inode, lower_inode->i_mode,
191 lower_inode->i_rdev); 191 lower_inode->i_rdev);
192 dentry->d_op = &ecryptfs_dops; 192 d_set_d_op(dentry, &ecryptfs_dops);
193 fsstack_copy_attr_all(inode, lower_inode); 193 fsstack_copy_attr_all(inode, lower_inode);
194 /* This size will be overwritten for real files w/ headers and 194 /* This size will be overwritten for real files w/ headers and
195 * other metadata */ 195 * other metadata */
@@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
594 deactivate_locked_super(s); 594 deactivate_locked_super(s);
595 goto out; 595 goto out;
596 } 596 }
597 s->s_root->d_op = &ecryptfs_dops; 597 d_set_d_op(s->s_root, &ecryptfs_dops);
598 s->s_root->d_sb = s; 598 s->s_root->d_sb = s;
599 s->s_root->d_parent = s->s_root; 599 s->s_root->d_parent = s->s_root;
600 600
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2720178b7718..3042fe123a34 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -62,6 +62,16 @@ out:
62 return inode; 62 return inode;
63} 63}
64 64
65static void ecryptfs_i_callback(struct rcu_head *head)
66{
67 struct inode *inode = container_of(head, struct inode, i_rcu);
68 struct ecryptfs_inode_info *inode_info;
69 inode_info = ecryptfs_inode_to_private(inode);
70
71 INIT_LIST_HEAD(&inode->i_dentry);
72 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
73}
74
65/** 75/**
66 * ecryptfs_destroy_inode 76 * ecryptfs_destroy_inode
67 * @inode: The ecryptfs inode 77 * @inode: The ecryptfs inode
@@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
88 } 98 }
89 } 99 }
90 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 100 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
91 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 101 call_rcu(&inode->i_rcu, ecryptfs_i_callback);
92} 102}
93 103
94/** 104/**
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 5073a07652cc..0f31acb0131c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
65 return &ei->vfs_inode; 65 return &ei->vfs_inode;
66} 66}
67 67
68static void efs_destroy_inode(struct inode *inode) 68static void efs_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 72 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
71} 73}
72 74
75static void efs_destroy_inode(struct inode *inode)
76{
77 call_rcu(&inode->i_rcu, efs_i_callback);
78}
79
73static void init_once(void *foo) 80static void init_once(void *foo)
74{ 81{
75 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 82 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6e0456..8c6c4669b381 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
150 return &oi->vfs_inode; 150 return &oi->vfs_inode;
151} 151}
152 152
153static void exofs_i_callback(struct rcu_head *head)
154{
155 struct inode *inode = container_of(head, struct inode, i_rcu);
156 INIT_LIST_HEAD(&inode->i_dentry);
157 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
158}
159
153/* 160/*
154 * Remove an inode from the cache 161 * Remove an inode from the cache
155 */ 162 */
156static void exofs_destroy_inode(struct inode *inode) 163static void exofs_destroy_inode(struct inode *inode)
157{ 164{
158 kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); 165 call_rcu(&inode->i_rcu, exofs_i_callback);
159} 166}
160 167
161/* 168/*
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 51b304056f10..4b6825740dd5 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result,
43 void *context) 43 void *context)
44{ 44{
45 struct dentry *dentry, *toput = NULL; 45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode;
46 47
47 if (acceptable(context, result)) 48 if (acceptable(context, result))
48 return result; 49 return result;
49 50
50 spin_lock(&dcache_lock); 51 inode = result->d_inode;
51 list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { 52 spin_lock(&inode->i_lock);
52 dget_locked(dentry); 53 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
53 spin_unlock(&dcache_lock); 54 dget(dentry);
55 spin_unlock(&inode->i_lock);
54 if (toput) 56 if (toput)
55 dput(toput); 57 dput(toput);
56 if (dentry != result && acceptable(context, dentry)) { 58 if (dentry != result && acceptable(context, dentry)) {
57 dput(result); 59 dput(result);
58 return dentry; 60 return dentry;
59 } 61 }
60 spin_lock(&dcache_lock); 62 spin_lock(&inode->i_lock);
61 toput = dentry; 63 toput = dentry;
62 } 64 }
63 spin_unlock(&dcache_lock); 65 spin_unlock(&inode->i_lock);
64 66
65 if (toput) 67 if (toput)
66 dput(toput); 68 dput(toput);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 2bcc0431bada..7b4180554a62 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,10 +232,17 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
232} 232}
233 233
234int 234int
235ext2_check_acl(struct inode *inode, int mask) 235ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
236{ 236{
237 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); 237 struct posix_acl *acl;
238
239 if (flags & IPERM_FLAG_RCU) {
240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
241 return -ECHILD;
242 return -EAGAIN;
243 }
238 244
245 acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
239 if (IS_ERR(acl)) 246 if (IS_ERR(acl))
240 return PTR_ERR(acl); 247 return PTR_ERR(acl);
241 if (acl) { 248 if (acl) {
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3ff6cbb9ac44..c939b7b12099 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_check_acl (struct inode *, int); 57extern int ext2_check_acl (struct inode *, int, unsigned int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d89e0b6a2d78..e0c6380ff992 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
161 return &ei->vfs_inode; 161 return &ei->vfs_inode;
162} 162}
163 163
164static void ext2_destroy_inode(struct inode *inode) 164static void ext2_i_callback(struct rcu_head *head)
165{ 165{
166 struct inode *inode = container_of(head, struct inode, i_rcu);
167 INIT_LIST_HEAD(&inode->i_dentry);
166 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 168 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
167} 169}
168 170
171static void ext2_destroy_inode(struct inode *inode)
172{
173 call_rcu(&inode->i_rcu, ext2_i_callback);
174}
175
169static void init_once(void *foo) 176static void init_once(void *foo)
170{ 177{
171 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 178 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 8a11fe212183..e4fa49e6c539 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,10 +240,17 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
240} 240}
241 241
242int 242int
243ext3_check_acl(struct inode *inode, int mask) 243ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
244{ 244{
245 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 245 struct posix_acl *acl;
246
247 if (flags & IPERM_FLAG_RCU) {
248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
249 return -ECHILD;
250 return -EAGAIN;
251 }
246 252
253 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
247 if (IS_ERR(acl)) 254 if (IS_ERR(acl))
248 return PTR_ERR(acl); 255 return PTR_ERR(acl);
249 if (acl) { 256 if (acl) {
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 597334626de9..5faf8048e906 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_check_acl (struct inode *, int); 57extern int ext3_check_acl (struct inode *, int, unsigned int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index acf8695fa8f0..77ce1616f725 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
479 return &ei->vfs_inode; 479 return &ei->vfs_inode;
480} 480}
481 481
482static void ext3_i_callback(struct rcu_head *head)
483{
484 struct inode *inode = container_of(head, struct inode, i_rcu);
485 INIT_LIST_HEAD(&inode->i_dentry);
486 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
487}
488
482static void ext3_destroy_inode(struct inode *inode) 489static void ext3_destroy_inode(struct inode *inode)
483{ 490{
484 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 491 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode)
489 false); 496 false);
490 dump_stack(); 497 dump_stack();
491 } 498 }
492 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 499 call_rcu(&inode->i_rcu, ext3_i_callback);
493} 500}
494 501
495static void init_once(void *foo) 502static void init_once(void *foo)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5e2ed4504ead..e0270d1f8d82 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
238} 238}
239 239
240int 240int
241ext4_check_acl(struct inode *inode, int mask) 241ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
242{ 242{
243 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); 243 struct posix_acl *acl;
244
245 if (flags & IPERM_FLAG_RCU) {
246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
247 return -ECHILD;
248 return -EAGAIN;
249 }
244 250
251 acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
245 if (IS_ERR(acl)) 252 if (IS_ERR(acl))
246 return PTR_ERR(acl); 253 return PTR_ERR(acl);
247 if (acl) { 254 if (acl) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9d843d5deac4..dec821168fd4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_check_acl(struct inode *, int); 57extern int ext4_check_acl(struct inode *, int, unsigned int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb15c9c0be74..cd37f9d5e447 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -841,6 +841,13 @@ static int ext4_drop_inode(struct inode *inode)
841 return drop; 841 return drop;
842} 842}
843 843
844static void ext4_i_callback(struct rcu_head *head)
845{
846 struct inode *inode = container_of(head, struct inode, i_rcu);
847 INIT_LIST_HEAD(&inode->i_dentry);
848 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
849}
850
844static void ext4_destroy_inode(struct inode *inode) 851static void ext4_destroy_inode(struct inode *inode)
845{ 852{
846 ext4_ioend_wait(inode); 853 ext4_ioend_wait(inode);
@@ -853,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode)
853 true); 860 true);
854 dump_stack(); 861 dump_stack();
855 } 862 }
856 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 863 call_rcu(&inode->i_rcu, ext4_i_callback);
857} 864}
858 865
859static void init_once(void *foo) 866static void init_once(void *foo)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a92c30..206351af7c58 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
514 return &ei->vfs_inode; 514 return &ei->vfs_inode;
515} 515}
516 516
517static void fat_destroy_inode(struct inode *inode) 517static void fat_i_callback(struct rcu_head *head)
518{ 518{
519 struct inode *inode = container_of(head, struct inode, i_rcu);
520 INIT_LIST_HEAD(&inode->i_dentry);
519 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 521 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
520} 522}
521 523
524static void fat_destroy_inode(struct inode *inode)
525{
526 call_rcu(&inode->i_rcu, fat_i_callback);
527}
528
522static void init_once(void *foo) 529static void init_once(void *foo)
523{ 530{
524 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 531 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -743,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
743 */ 750 */
744 result = d_obtain_alias(inode); 751 result = d_obtain_alias(inode);
745 if (!IS_ERR(result)) 752 if (!IS_ERR(result))
746 result->d_op = sb->s_root->d_op; 753 d_set_d_op(result, sb->s_root->d_op);
747 return result; 754 return result;
748} 755}
749 756
@@ -793,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
793 800
794 parent = d_obtain_alias(inode); 801 parent = d_obtain_alias(inode);
795 if (!IS_ERR(parent)) 802 if (!IS_ERR(parent))
796 parent->d_op = sb->s_root->d_op; 803 d_set_d_op(parent, sb->s_root->d_op);
797out: 804out:
798 unlock_super(sb); 805 unlock_super(sb);
799 806
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3345aabd1dd7..35ffe43afa4b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
148 * that the existing dentry can be used. The msdos fs routines will 148 * that the existing dentry can be used. The msdos fs routines will
149 * return ENOENT or EINVAL as appropriate. 149 * return ENOENT or EINVAL as appropriate.
150 */ 150 */
151static int msdos_hash(struct dentry *dentry, struct qstr *qstr) 151static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
152 struct qstr *qstr)
152{ 153{
153 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; 154 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
154 unsigned char msdos_name[MSDOS_NAME]; 155 unsigned char msdos_name[MSDOS_NAME];
@@ -164,16 +165,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
164 * Compare two msdos names. If either of the names are invalid, 165 * Compare two msdos names. If either of the names are invalid,
165 * we fall back to doing the standard name comparison. 166 * we fall back to doing the standard name comparison.
166 */ 167 */
167static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) 168static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
169 const struct dentry *dentry, const struct inode *inode,
170 unsigned int len, const char *str, const struct qstr *name)
168{ 171{
169 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; 172 struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
170 unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; 173 unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
171 int error; 174 int error;
172 175
173 error = msdos_format_name(a->name, a->len, a_msdos_name, options); 176 error = msdos_format_name(name->name, name->len, a_msdos_name, options);
174 if (error) 177 if (error)
175 goto old_compare; 178 goto old_compare;
176 error = msdos_format_name(b->name, b->len, b_msdos_name, options); 179 error = msdos_format_name(str, len, b_msdos_name, options);
177 if (error) 180 if (error)
178 goto old_compare; 181 goto old_compare;
179 error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); 182 error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
@@ -182,8 +185,8 @@ out:
182 185
183old_compare: 186old_compare:
184 error = 1; 187 error = 1;
185 if (a->len == b->len) 188 if (name->len == len)
186 error = memcmp(a->name, b->name, a->len); 189 error = memcmp(name->name, str, len);
187 goto out; 190 goto out;
188} 191}
189 192
@@ -224,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
224 } 227 }
225out: 228out:
226 unlock_super(sb); 229 unlock_super(sb);
227 dentry->d_op = &msdos_dentry_operations; 230 d_set_d_op(dentry, &msdos_dentry_operations);
228 dentry = d_splice_alias(inode, dentry); 231 dentry = d_splice_alias(inode, dentry);
229 if (dentry) 232 if (dentry)
230 dentry->d_op = &msdos_dentry_operations; 233 d_set_d_op(dentry, &msdos_dentry_operations);
231 return dentry; 234 return dentry;
232 235
233error: 236error:
@@ -670,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
670 } 673 }
671 674
672 sb->s_flags |= MS_NOATIME; 675 sb->s_flags |= MS_NOATIME;
673 sb->s_root->d_op = &msdos_dentry_operations; 676 d_set_d_op(sb->s_root, &msdos_dentry_operations);
674 unlock_super(sb); 677 unlock_super(sb);
675 return 0; 678 return 0;
676} 679}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b936703b8924..e3ffc5e12332 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU)
47 return -ECHILD;
48
46 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
47 if (dentry->d_inode) 50 if (dentry->d_inode)
48 return 1; 51 return 1;
@@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
51 54
52static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
53{ 56{
57 if (nd->flags & LOOKUP_RCU)
58 return -ECHILD;
59
54 /* 60 /*
55 * This is not negative dentry. Always valid. 61 * This is not negative dentry. Always valid.
56 * 62 *
@@ -85,22 +91,26 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
85} 91}
86 92
87/* returns the length of a struct qstr, ignoring trailing dots */ 93/* returns the length of a struct qstr, ignoring trailing dots */
88static unsigned int vfat_striptail_len(struct qstr *qstr) 94static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
89{ 95{
90 unsigned int len = qstr->len; 96 while (len && name[len - 1] == '.')
91
92 while (len && qstr->name[len - 1] == '.')
93 len--; 97 len--;
94 return len; 98 return len;
95} 99}
96 100
101static unsigned int vfat_striptail_len(const struct qstr *qstr)
102{
103 return __vfat_striptail_len(qstr->len, qstr->name);
104}
105
97/* 106/*
98 * Compute the hash for the vfat name corresponding to the dentry. 107 * Compute the hash for the vfat name corresponding to the dentry.
99 * Note: if the name is invalid, we leave the hash code unchanged so 108 * Note: if the name is invalid, we leave the hash code unchanged so
100 * that the existing dentry can be used. The vfat fs routines will 109 * that the existing dentry can be used. The vfat fs routines will
101 * return ENOENT or EINVAL as appropriate. 110 * return ENOENT or EINVAL as appropriate.
102 */ 111 */
103static int vfat_hash(struct dentry *dentry, struct qstr *qstr) 112static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
113 struct qstr *qstr)
104{ 114{
105 qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); 115 qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
106 return 0; 116 return 0;
@@ -112,9 +122,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
112 * that the existing dentry can be used. The vfat fs routines will 122 * that the existing dentry can be used. The vfat fs routines will
113 * return ENOENT or EINVAL as appropriate. 123 * return ENOENT or EINVAL as appropriate.
114 */ 124 */
115static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) 125static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
126 struct qstr *qstr)
116{ 127{
117 struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; 128 struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
118 const unsigned char *name; 129 const unsigned char *name;
119 unsigned int len; 130 unsigned int len;
120 unsigned long hash; 131 unsigned long hash;
@@ -133,16 +144,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
133/* 144/*
134 * Case insensitive compare of two vfat names. 145 * Case insensitive compare of two vfat names.
135 */ 146 */
136static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) 147static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
148 const struct dentry *dentry, const struct inode *inode,
149 unsigned int len, const char *str, const struct qstr *name)
137{ 150{
138 struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; 151 struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
139 unsigned int alen, blen; 152 unsigned int alen, blen;
140 153
141 /* A filename cannot end in '.' or we treat it like it has none */ 154 /* A filename cannot end in '.' or we treat it like it has none */
142 alen = vfat_striptail_len(a); 155 alen = vfat_striptail_len(name);
143 blen = vfat_striptail_len(b); 156 blen = __vfat_striptail_len(len, str);
144 if (alen == blen) { 157 if (alen == blen) {
145 if (nls_strnicmp(t, a->name, b->name, alen) == 0) 158 if (nls_strnicmp(t, name->name, str, alen) == 0)
146 return 0; 159 return 0;
147 } 160 }
148 return 1; 161 return 1;
@@ -151,15 +164,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
151/* 164/*
152 * Case sensitive compare of two vfat names. 165 * Case sensitive compare of two vfat names.
153 */ 166 */
154static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) 167static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
168 const struct dentry *dentry, const struct inode *inode,
169 unsigned int len, const char *str, const struct qstr *name)
155{ 170{
156 unsigned int alen, blen; 171 unsigned int alen, blen;
157 172
158 /* A filename cannot end in '.' or we treat it like it has none */ 173 /* A filename cannot end in '.' or we treat it like it has none */
159 alen = vfat_striptail_len(a); 174 alen = vfat_striptail_len(name);
160 blen = vfat_striptail_len(b); 175 blen = __vfat_striptail_len(len, str);
161 if (alen == blen) { 176 if (alen == blen) {
162 if (strncmp(a->name, b->name, alen) == 0) 177 if (strncmp(name->name, str, alen) == 0)
163 return 0; 178 return 0;
164 } 179 }
165 return 1; 180 return 1;
@@ -757,11 +772,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
757 772
758out: 773out:
759 unlock_super(sb); 774 unlock_super(sb);
760 dentry->d_op = sb->s_root->d_op; 775 d_set_d_op(dentry, sb->s_root->d_op);
761 dentry->d_time = dentry->d_parent->d_inode->i_version; 776 dentry->d_time = dentry->d_parent->d_inode->i_version;
762 dentry = d_splice_alias(inode, dentry); 777 dentry = d_splice_alias(inode, dentry);
763 if (dentry) { 778 if (dentry) {
764 dentry->d_op = sb->s_root->d_op; 779 d_set_d_op(dentry, sb->s_root->d_op);
765 dentry->d_time = dentry->d_parent->d_inode->i_version; 780 dentry->d_time = dentry->d_parent->d_inode->i_version;
766 } 781 }
767 return dentry; 782 return dentry;
@@ -1063,9 +1078,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1063 } 1078 }
1064 1079
1065 if (MSDOS_SB(sb)->options.name_check != 's') 1080 if (MSDOS_SB(sb)->options.name_check != 's')
1066 sb->s_root->d_op = &vfat_ci_dentry_ops; 1081 d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
1067 else 1082 else
1068 sb->s_root->d_op = &vfat_dentry_ops; 1083 d_set_d_op(sb->s_root, &vfat_dentry_ops);
1069 1084
1070 unlock_super(sb); 1085 unlock_super(sb);
1071 return 0; 1086 return 0;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492d8eef..751d6b255a12 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs)
115 tmp = &(*tmp)->next; 115 tmp = &(*tmp)->next;
116 } 116 }
117 write_unlock(&file_systems_lock); 117 write_unlock(&file_systems_lock);
118
119 synchronize_rcu();
120
118 return -EINVAL; 121 return -EINVAL;
119} 122}
120 123
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac5079d..2ba6719ac612 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
337 return ip; 337 return ip;
338} 338}
339 339
340static void vxfs_i_callback(struct rcu_head *head)
341{
342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(vxfs_inode_cachep, inode->i_private);
345}
346
340/** 347/**
341 * vxfs_evict_inode - remove inode from main memory 348 * vxfs_evict_inode - remove inode from main memory
342 * @ip: inode to discard. 349 * @ip: inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
350{ 357{
351 truncate_inode_pages(&ip->i_data, 0); 358 truncate_inode_pages(&ip->i_data, 0);
352 end_writeback(ip); 359 end_writeback(ip);
353 kmem_cache_free(vxfs_inode_cachep, ip->i_private); 360 call_rcu(&ip->i_rcu, vxfs_i_callback);
354} 361}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ed45a9cf5f3d..68ca487bedb1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -14,12 +14,14 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
14 struct path old_root; 14 struct path old_root;
15 15
16 spin_lock(&fs->lock); 16 spin_lock(&fs->lock);
17 write_seqcount_begin(&fs->seq);
17 old_root = fs->root; 18 old_root = fs->root;
18 fs->root = *path; 19 fs->root = *path;
19 path_get(path); 20 path_get_long(path);
21 write_seqcount_end(&fs->seq);
20 spin_unlock(&fs->lock); 22 spin_unlock(&fs->lock);
21 if (old_root.dentry) 23 if (old_root.dentry)
22 path_put(&old_root); 24 path_put_long(&old_root);
23} 25}
24 26
25/* 27/*
@@ -31,13 +33,15 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
31 struct path old_pwd; 33 struct path old_pwd;
32 34
33 spin_lock(&fs->lock); 35 spin_lock(&fs->lock);
36 write_seqcount_begin(&fs->seq);
34 old_pwd = fs->pwd; 37 old_pwd = fs->pwd;
35 fs->pwd = *path; 38 fs->pwd = *path;
36 path_get(path); 39 path_get_long(path);
40 write_seqcount_end(&fs->seq);
37 spin_unlock(&fs->lock); 41 spin_unlock(&fs->lock);
38 42
39 if (old_pwd.dentry) 43 if (old_pwd.dentry)
40 path_put(&old_pwd); 44 path_put_long(&old_pwd);
41} 45}
42 46
43void chroot_fs_refs(struct path *old_root, struct path *new_root) 47void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -52,31 +56,33 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
52 fs = p->fs; 56 fs = p->fs;
53 if (fs) { 57 if (fs) {
54 spin_lock(&fs->lock); 58 spin_lock(&fs->lock);
59 write_seqcount_begin(&fs->seq);
55 if (fs->root.dentry == old_root->dentry 60 if (fs->root.dentry == old_root->dentry
56 && fs->root.mnt == old_root->mnt) { 61 && fs->root.mnt == old_root->mnt) {
57 path_get(new_root); 62 path_get_long(new_root);
58 fs->root = *new_root; 63 fs->root = *new_root;
59 count++; 64 count++;
60 } 65 }
61 if (fs->pwd.dentry == old_root->dentry 66 if (fs->pwd.dentry == old_root->dentry
62 && fs->pwd.mnt == old_root->mnt) { 67 && fs->pwd.mnt == old_root->mnt) {
63 path_get(new_root); 68 path_get_long(new_root);
64 fs->pwd = *new_root; 69 fs->pwd = *new_root;
65 count++; 70 count++;
66 } 71 }
72 write_seqcount_end(&fs->seq);
67 spin_unlock(&fs->lock); 73 spin_unlock(&fs->lock);
68 } 74 }
69 task_unlock(p); 75 task_unlock(p);
70 } while_each_thread(g, p); 76 } while_each_thread(g, p);
71 read_unlock(&tasklist_lock); 77 read_unlock(&tasklist_lock);
72 while (count--) 78 while (count--)
73 path_put(old_root); 79 path_put_long(old_root);
74} 80}
75 81
76void free_fs_struct(struct fs_struct *fs) 82void free_fs_struct(struct fs_struct *fs)
77{ 83{
78 path_put(&fs->root); 84 path_put_long(&fs->root);
79 path_put(&fs->pwd); 85 path_put_long(&fs->pwd);
80 kmem_cache_free(fs_cachep, fs); 86 kmem_cache_free(fs_cachep, fs);
81} 87}
82 88
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
88 int kill; 94 int kill;
89 task_lock(tsk); 95 task_lock(tsk);
90 spin_lock(&fs->lock); 96 spin_lock(&fs->lock);
97 write_seqcount_begin(&fs->seq);
91 tsk->fs = NULL; 98 tsk->fs = NULL;
92 kill = !--fs->users; 99 kill = !--fs->users;
100 write_seqcount_end(&fs->seq);
93 spin_unlock(&fs->lock); 101 spin_unlock(&fs->lock);
94 task_unlock(tsk); 102 task_unlock(tsk);
95 if (kill) 103 if (kill)
@@ -105,8 +113,15 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
105 fs->users = 1; 113 fs->users = 1;
106 fs->in_exec = 0; 114 fs->in_exec = 0;
107 spin_lock_init(&fs->lock); 115 spin_lock_init(&fs->lock);
116 seqcount_init(&fs->seq);
108 fs->umask = old->umask; 117 fs->umask = old->umask;
109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 118
119 spin_lock(&old->lock);
120 fs->root = old->root;
121 path_get_long(&fs->root);
122 fs->pwd = old->pwd;
123 path_get_long(&fs->pwd);
124 spin_unlock(&old->lock);
110 } 125 }
111 return fs; 126 return fs;
112} 127}
@@ -144,6 +159,7 @@ EXPORT_SYMBOL(current_umask);
144struct fs_struct init_fs = { 159struct fs_struct init_fs = {
145 .users = 1, 160 .users = 1,
146 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), 161 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
162 .seq = SEQCNT_ZERO,
147 .umask = 0022, 163 .umask = 0022,
148}; 164};
149 165
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c95482d..f738599fd8cd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
156 */ 156 */
157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) 157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode = entry->d_inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU)
162 return -ECHILD;
163
164 inode = entry->d_inode;
161 if (inode && is_bad_inode(inode)) 165 if (inode && is_bad_inode(inode))
162 return 0; 166 return 0;
163 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 167 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -347,7 +351,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
347 } 351 }
348 352
349 entry = newent ? newent : entry; 353 entry = newent ? newent : entry;
350 entry->d_op = &fuse_dentry_operations; 354 d_set_d_op(entry, &fuse_dentry_operations);
351 if (outarg_valid) 355 if (outarg_valid)
352 fuse_change_entry_timeout(entry, &outarg); 356 fuse_change_entry_timeout(entry, &outarg);
353 else 357 else
@@ -981,12 +985,15 @@ static int fuse_access(struct inode *inode, int mask)
981 * access request is sent. Execute permission is still checked 985 * access request is sent. Execute permission is still checked
982 * locally based on file mode. 986 * locally based on file mode.
983 */ 987 */
984static int fuse_permission(struct inode *inode, int mask) 988static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
985{ 989{
986 struct fuse_conn *fc = get_fuse_conn(inode); 990 struct fuse_conn *fc = get_fuse_conn(inode);
987 bool refreshed = false; 991 bool refreshed = false;
988 int err = 0; 992 int err = 0;
989 993
994 if (flags & IPERM_FLAG_RCU)
995 return -ECHILD;
996
990 if (!fuse_allow_task(fc, current)) 997 if (!fuse_allow_task(fc, current))
991 return -EACCES; 998 return -EACCES;
992 999
@@ -1001,7 +1008,7 @@ static int fuse_permission(struct inode *inode, int mask)
1001 } 1008 }
1002 1009
1003 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1010 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1004 err = generic_permission(inode, mask, NULL); 1011 err = generic_permission(inode, mask, flags, NULL);
1005 1012
1006 /* If permission is denied, try to refresh file 1013 /* If permission is denied, try to refresh file
1007 attributes. This is also needed, because the root 1014 attributes. This is also needed, because the root
@@ -1009,7 +1016,8 @@ static int fuse_permission(struct inode *inode, int mask)
1009 if (err == -EACCES && !refreshed) { 1016 if (err == -EACCES && !refreshed) {
1010 err = fuse_do_getattr(inode, NULL, NULL); 1017 err = fuse_do_getattr(inode, NULL, NULL);
1011 if (!err) 1018 if (!err)
1012 err = generic_permission(inode, mask, NULL); 1019 err = generic_permission(inode, mask,
1020 flags, NULL);
1013 } 1021 }
1014 1022
1015 /* Note: the opposite of the above test does not 1023 /* Note: the opposite of the above test does not
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad86a92..a8b31da19b93 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -99,6 +99,13 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
99 return inode; 99 return inode;
100} 100}
101 101
102static void fuse_i_callback(struct rcu_head *head)
103{
104 struct inode *inode = container_of(head, struct inode, i_rcu);
105 INIT_LIST_HEAD(&inode->i_dentry);
106 kmem_cache_free(fuse_inode_cachep, inode);
107}
108
102static void fuse_destroy_inode(struct inode *inode) 109static void fuse_destroy_inode(struct inode *inode)
103{ 110{
104 struct fuse_inode *fi = get_fuse_inode(inode); 111 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -106,7 +113,7 @@ static void fuse_destroy_inode(struct inode *inode)
106 BUG_ON(!list_empty(&fi->queued_writes)); 113 BUG_ON(!list_empty(&fi->queued_writes));
107 if (fi->forget_req) 114 if (fi->forget_req)
108 fuse_request_free(fi->forget_req); 115 fuse_request_free(fi->forget_req);
109 kmem_cache_free(fuse_inode_cachep, inode); 116 call_rcu(&inode->i_rcu, fuse_i_callback);
110} 117}
111 118
112void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 119void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
@@ -619,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
619 626
620 entry = d_obtain_alias(inode); 627 entry = d_obtain_alias(inode);
621 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { 628 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
622 entry->d_op = &fuse_dentry_operations; 629 d_set_d_op(entry, &fuse_dentry_operations);
623 fuse_invalidate_entry_cache(entry); 630 fuse_invalidate_entry_cache(entry);
624 } 631 }
625 632
@@ -721,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
721 728
722 parent = d_obtain_alias(inode); 729 parent = d_obtain_alias(inode);
723 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { 730 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
724 parent->d_op = &fuse_dentry_operations; 731 d_set_d_op(parent, &fuse_dentry_operations);
725 fuse_invalidate_entry_cache(parent); 732 fuse_invalidate_entry_cache(parent);
726 } 733 }
727 734
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 6bc9e3a5a693..06c48a891832 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,14 +190,20 @@ generic_acl_chmod(struct inode *inode)
190} 190}
191 191
192int 192int
193generic_check_acl(struct inode *inode, int mask) 193generic_check_acl(struct inode *inode, int mask, unsigned int flags)
194{ 194{
195 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS); 195 if (flags & IPERM_FLAG_RCU) {
196 196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
197 if (acl) { 197 return -ECHILD;
198 int error = posix_acl_permission(inode, acl, mask); 198 } else {
199 posix_acl_release(acl); 199 struct posix_acl *acl;
200 return error; 200
201 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
202 if (acl) {
203 int error = posix_acl_permission(inode, acl, mask);
204 posix_acl_release(acl);
205 return error;
206 }
201 } 207 }
202 return -EAGAIN; 208 return -EAGAIN;
203} 209}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 48171f4c943d..7118f1a780a9 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
75 * Returns: errno 75 * Returns: errno
76 */ 76 */
77 77
78int gfs2_check_acl(struct inode *inode, int mask) 78int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
79{ 79{
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU)
84 return -ECHILD;
85
83 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
84 if (IS_ERR(acl)) 87 if (IS_ERR(acl))
85 return PTR_ERR(acl); 88 return PTR_ERR(acl);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b522b0cb39ea..a93907c8159b 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
17#define GFS2_ACL_MAX_ENTRIES 25 17#define GFS2_ACL_MAX_ENTRIES 25
18 18
19extern int gfs2_check_acl(struct inode *inode, int mask); 19extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
22extern const struct xattr_handler gfs2_xattr_system_handler; 22extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755b3858..4a456338b873 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -11,6 +11,7 @@
11#include <linux/completion.h> 11#include <linux/completion.h>
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <linux/gfs2_ondisk.h> 13#include <linux/gfs2_ondisk.h>
14#include <linux/namei.h>
14#include <linux/crc32.h> 15#include <linux/crc32.h>
15 16
16#include "gfs2.h" 17#include "gfs2.h"
@@ -34,15 +35,23 @@
34 35
35static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) 36static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
36{ 37{
37 struct dentry *parent = dget_parent(dentry); 38 struct dentry *parent;
38 struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); 39 struct gfs2_sbd *sdp;
39 struct gfs2_inode *dip = GFS2_I(parent->d_inode); 40 struct gfs2_inode *dip;
40 struct inode *inode = dentry->d_inode; 41 struct inode *inode;
41 struct gfs2_holder d_gh; 42 struct gfs2_holder d_gh;
42 struct gfs2_inode *ip = NULL; 43 struct gfs2_inode *ip = NULL;
43 int error; 44 int error;
44 int had_lock = 0; 45 int had_lock = 0;
45 46
47 if (nd->flags & LOOKUP_RCU)
48 return -ECHILD;
49
50 parent = dget_parent(dentry);
51 sdp = GFS2_SB(parent->d_inode);
52 dip = GFS2_I(parent->d_inode);
53 inode = dentry->d_inode;
54
46 if (inode) { 55 if (inode) {
47 if (is_bad_inode(inode)) 56 if (is_bad_inode(inode))
48 goto invalid; 57 goto invalid;
@@ -100,13 +109,14 @@ fail:
100 return 0; 109 return 0;
101} 110}
102 111
103static int gfs2_dhash(struct dentry *dentry, struct qstr *str) 112static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
113 struct qstr *str)
104{ 114{
105 str->hash = gfs2_disk_hash(str->name, str->len); 115 str->hash = gfs2_disk_hash(str->name, str->len);
106 return 0; 116 return 0;
107} 117}
108 118
109static int gfs2_dentry_delete(struct dentry *dentry) 119static int gfs2_dentry_delete(const struct dentry *dentry)
110{ 120{
111 struct gfs2_inode *ginode; 121 struct gfs2_inode *ginode;
112 122
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5ab3839dfcb9..97012ecff560 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
130 130
131 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); 131 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
132 if (!IS_ERR(dentry)) 132 if (!IS_ERR(dentry))
133 dentry->d_op = &gfs2_dops; 133 d_set_d_op(dentry, &gfs2_dops);
134 return dentry; 134 return dentry;
135} 135}
136 136
@@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
158out_inode: 158out_inode:
159 dentry = d_obtain_alias(inode); 159 dentry = d_obtain_alias(inode);
160 if (!IS_ERR(dentry)) 160 if (!IS_ERR(dentry))
161 dentry->d_op = &gfs2_dops; 161 d_set_d_op(dentry, &gfs2_dops);
162 return dentry; 162 return dentry;
163} 163}
164 164
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index aa996471ec5c..fca6689e12e6 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
241 !capable(CAP_LINUX_IMMUTABLE)) 241 !capable(CAP_LINUX_IMMUTABLE))
242 goto out; 242 goto out;
243 if (!IS_IMMUTABLE(inode)) { 243 if (!IS_IMMUTABLE(inode)) {
244 error = gfs2_permission(inode, MAY_WRITE); 244 error = gfs2_permission(inode, MAY_WRITE, 0);
245 if (error) 245 if (error)
246 goto out; 246 goto out;
247 } 247 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 14e682dbe8bf..2232b3c780bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -509,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
509 } 509 }
510 510
511 if (!is_root) { 511 if (!is_root) {
512 error = gfs2_permission(dir, MAY_EXEC); 512 error = gfs2_permission(dir, MAY_EXEC, 0);
513 if (error) 513 if (error)
514 goto out; 514 goto out;
515 } 515 }
@@ -539,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
539{ 539{
540 int error; 540 int error;
541 541
542 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 542 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
543 if (error) 543 if (error)
544 return error; 544 return error;
545 545
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d8499fadcc53..732a183efdb3 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -113,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
113extern struct inode *gfs2_createi(struct gfs2_holder *ghs, 113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
114 const struct qstr *name, 114 const struct qstr *name,
115 unsigned int mode, dev_t dev); 115 unsigned int mode, dev_t dev);
116extern int gfs2_permission(struct inode *inode, int mask); 116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b81..2aeabd4218cc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
440 iput(inode); 440 iput(inode);
441 return -ENOMEM; 441 return -ENOMEM;
442 } 442 }
443 dentry->d_op = &gfs2_dops; 443 d_set_d_op(dentry, &gfs2_dops);
444 *dptr = dentry; 444 *dptr = dentry;
445 return 0; 445 return 0;
446} 446}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1db6b7343229..1501db4f0e6d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
106{ 106{
107 struct inode *inode = NULL; 107 struct inode *inode = NULL;
108 108
109 dentry->d_op = &gfs2_dops; 109 d_set_d_op(dentry, &gfs2_dops);
110 110
111 inode = gfs2_lookupi(dir, &dentry->d_name, 0); 111 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
112 if (inode && IS_ERR(inode)) 112 if (inode && IS_ERR(inode))
@@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
166 if (error) 166 if (error)
167 goto out_child; 167 goto out_child;
168 168
169 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); 169 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
170 if (error) 170 if (error)
171 goto out_gunlock; 171 goto out_gunlock;
172 172
@@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
289 if (IS_APPEND(&dip->i_inode)) 289 if (IS_APPEND(&dip->i_inode))
290 return -EPERM; 290 return -EPERM;
291 291
292 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 292 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
293 if (error) 293 if (error)
294 return error; 294 return error;
295 295
@@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
822 } 822 }
823 } 823 }
824 } else { 824 } else {
825 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); 825 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
826 if (error) 826 if (error)
827 goto out_gunlock; 827 goto out_gunlock;
828 828
@@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
857 /* Check out the dir to be renamed */ 857 /* Check out the dir to be renamed */
858 858
859 if (dir_rename) { 859 if (dir_rename) {
860 error = gfs2_permission(odentry->d_inode, MAY_WRITE); 860 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
861 if (error) 861 if (error)
862 goto out_gunlock; 862 goto out_gunlock;
863 } 863 }
@@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1041 * Returns: errno 1041 * Returns: errno
1042 */ 1042 */
1043 1043
1044int gfs2_permission(struct inode *inode, int mask) 1044int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1045{ 1045{
1046 struct gfs2_inode *ip = GFS2_I(inode); 1046 struct gfs2_inode *ip;
1047 struct gfs2_holder i_gh; 1047 struct gfs2_holder i_gh;
1048 int error; 1048 int error;
1049 int unlock = 0; 1049 int unlock = 0;
1050 1050
1051 if (flags & IPERM_FLAG_RCU)
1052 return -ECHILD;
1053
1054 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1055 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1056 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1057 if (error)
@@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask)
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) 1062 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES; 1063 error = -EACCES;
1060 else 1064 else
1061 error = generic_permission(inode, mask, gfs2_check_acl); 1065 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock) 1066 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh); 1067 gfs2_glock_dq_uninit(&i_gh);
1064 1068
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c4997430b..16c2ecac7eb7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
1405 return &ip->i_inode; 1405 return &ip->i_inode;
1406} 1406}
1407 1407
1408static void gfs2_destroy_inode(struct inode *inode) 1408static void gfs2_i_callback(struct rcu_head *head)
1409{ 1409{
1410 struct inode *inode = container_of(head, struct inode, i_rcu);
1411 INIT_LIST_HEAD(&inode->i_dentry);
1410 kmem_cache_free(gfs2_inode_cachep, inode); 1412 kmem_cache_free(gfs2_inode_cachep, inode);
1411} 1413}
1412 1414
1415static void gfs2_destroy_inode(struct inode *inode)
1416{
1417 call_rcu(&inode->i_rcu, gfs2_i_callback);
1418}
1419
1413const struct super_operations gfs2_super_ops = { 1420const struct super_operations gfs2_super_ops = {
1414 .alloc_inode = gfs2_alloc_inode, 1421 .alloc_inode = gfs2_alloc_inode,
1415 .destroy_inode = gfs2_destroy_inode, 1422 .destroy_inode = gfs2_destroy_inode,
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b8611b41b..ea4aefe7c652 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
25 struct inode *inode = NULL; 25 struct inode *inode = NULL;
26 int res; 26 int res;
27 27
28 dentry->d_op = &hfs_dentry_operations; 28 d_set_d_op(dentry, &hfs_dentry_operations);
29 29
30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); 30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); 31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index c8cffb81e849..ad97c2d58287 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,10 +213,14 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
213/* string.c */ 213/* string.c */
214extern const struct dentry_operations hfs_dentry_operations; 214extern const struct dentry_operations hfs_dentry_operations;
215 215
216extern int hfs_hash_dentry(struct dentry *, struct qstr *); 216extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
217 struct qstr *);
217extern int hfs_strcmp(const unsigned char *, unsigned int, 218extern int hfs_strcmp(const unsigned char *, unsigned int,
218 const unsigned char *, unsigned int); 219 const unsigned char *, unsigned int);
219extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 220extern int hfs_compare_dentry(const struct dentry *parent,
221 const struct inode *pinode,
222 const struct dentry *dentry, const struct inode *inode,
223 unsigned int len, const char *str, const struct qstr *name);
220 224
221/* trans.c */ 225/* trans.c */
222extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); 226extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 927a5af79428..495a976a3cc9 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,7 +51,8 @@ static unsigned char caseorder[256] = {
51/* 51/*
52 * Hash a string to an integer in a case-independent way 52 * Hash a string to an integer in a case-independent way
53 */ 53 */
54int hfs_hash_dentry(struct dentry *dentry, struct qstr *this) 54int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
55 struct qstr *this)
55{ 56{
56 const unsigned char *name = this->name; 57 const unsigned char *name = this->name;
57 unsigned int hash, len = this->len; 58 unsigned int hash, len = this->len;
@@ -92,21 +93,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
92 * Test for equality of two strings in the HFS filename character ordering. 93 * Test for equality of two strings in the HFS filename character ordering.
93 * return 1 on failure and 0 on success 94 * return 1 on failure and 0 on success
94 */ 95 */
95int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) 96int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
97 const struct dentry *dentry, const struct inode *inode,
98 unsigned int len, const char *str, const struct qstr *name)
96{ 99{
97 const unsigned char *n1, *n2; 100 const unsigned char *n1, *n2;
98 int len;
99 101
100 len = s1->len;
101 if (len >= HFS_NAMELEN) { 102 if (len >= HFS_NAMELEN) {
102 if (s2->len < HFS_NAMELEN) 103 if (name->len < HFS_NAMELEN)
103 return 1; 104 return 1;
104 len = HFS_NAMELEN; 105 len = HFS_NAMELEN;
105 } else if (len != s2->len) 106 } else if (len != name->len)
106 return 1; 107 return 1;
107 108
108 n1 = s1->name; 109 n1 = str;
109 n2 = s2->name; 110 n2 = name->name;
110 while (len--) { 111 while (len--) {
111 if (caseorder[*n1++] != caseorder[*n2++]) 112 if (caseorder[*n1++] != caseorder[*n2++])
112 return 1; 113 return 1;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4824c27cebb8..0bef62aa4f42 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
167 return i ? &i->vfs_inode : NULL; 167 return i ? &i->vfs_inode : NULL;
168} 168}
169 169
170static void hfs_destroy_inode(struct inode *inode) 170static void hfs_i_callback(struct rcu_head *head)
171{ 171{
172 struct inode *inode = container_of(head, struct inode, i_rcu);
173 INIT_LIST_HEAD(&inode->i_dentry);
172 kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); 174 kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
173} 175}
174 176
177static void hfs_destroy_inode(struct inode *inode)
178{
179 call_rcu(&inode->i_rcu, hfs_i_callback);
180}
181
175static const struct super_operations hfs_super_operations = { 182static const struct super_operations hfs_super_operations = {
176 .alloc_inode = hfs_alloc_inode, 183 .alloc_inode = hfs_alloc_inode,
177 .destroy_inode = hfs_destroy_inode, 184 .destroy_inode = hfs_destroy_inode,
@@ -427,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
427 if (!sb->s_root) 434 if (!sb->s_root)
428 goto bail_iput; 435 goto bail_iput;
429 436
430 sb->s_root->d_op = &hfs_dentry_operations; 437 d_set_d_op(sb->s_root, &hfs_dentry_operations);
431 438
432 /* everything's okay */ 439 /* everything's okay */
433 return 0; 440 return 0;
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 7478f5c219aa..19cf291eb91f 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -8,15 +8,20 @@
8 * This file contains the code to do various system dependent things. 8 * This file contains the code to do various system dependent things.
9 */ 9 */
10 10
11#include <linux/namei.h>
11#include "hfs_fs.h" 12#include "hfs_fs.h"
12 13
13/* dentry case-handling: just lowercase everything */ 14/* dentry case-handling: just lowercase everything */
14 15
15static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) 16static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
16{ 17{
17 struct inode *inode = dentry->d_inode; 18 struct inode *inode;
18 int diff; 19 int diff;
19 20
21 if (nd->flags & LOOKUP_RCU)
22 return -ECHILD;
23
24 inode = dentry->d_inode;
20 if(!inode) 25 if(!inode)
21 return 1; 26 return 1;
22 27
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 9d59c0571f59..ccab87145f7a 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
37 37
38 sb = dir->i_sb; 38 sb = dir->i_sb;
39 39
40 dentry->d_op = &hfsplus_dentry_operations; 40 d_set_d_op(dentry, &hfsplus_dentry_operations);
41 dentry->d_fsdata = NULL; 41 dentry->d_fsdata = NULL;
42 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 42 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); 43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index cb3653efb57a..a5308f491e3e 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -379,8 +379,12 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unist
379int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); 379int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
380int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); 380int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
381int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); 381int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
382int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); 382int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
383int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); 383 struct qstr *str);
384int hfsplus_compare_dentry(const struct dentry *parent,
385 const struct inode *pinode,
386 const struct dentry *dentry, const struct inode *inode,
387 unsigned int len, const char *str, const struct qstr *name);
384 388
385/* wrapper.c */ 389/* wrapper.c */
386int hfsplus_read_wrapper(struct super_block *); 390int hfsplus_read_wrapper(struct super_block *);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 52cc746d3ba3..ddf712e4700e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
419 err = -ENOMEM; 419 err = -ENOMEM;
420 goto cleanup; 420 goto cleanup;
421 } 421 }
422 sb->s_root->d_op = &hfsplus_dentry_operations; 422 d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
423 423
424 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 424 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
425 str.name = HFSP_HIDDENDIR_NAME; 425 str.name = HFSP_HIDDENDIR_NAME;
@@ -488,11 +488,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
488 return i ? &i->vfs_inode : NULL; 488 return i ? &i->vfs_inode : NULL;
489} 489}
490 490
491static void hfsplus_destroy_inode(struct inode *inode) 491static void hfsplus_i_callback(struct rcu_head *head)
492{ 492{
493 struct inode *inode = container_of(head, struct inode, i_rcu);
494
495 INIT_LIST_HEAD(&inode->i_dentry);
493 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); 496 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
494} 497}
495 498
499static void hfsplus_destroy_inode(struct inode *inode)
500{
501 call_rcu(&inode->i_rcu, hfsplus_i_callback);
502}
503
496#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 504#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
497 505
498static struct dentry *hfsplus_mount(struct file_system_type *fs_type, 506static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index b66d67de882c..d800aa0f2c80 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -320,7 +320,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
320 * Composed unicode characters are decomposed and case-folding is performed 320 * Composed unicode characters are decomposed and case-folding is performed
321 * if the appropriate bits are (un)set on the superblock. 321 * if the appropriate bits are (un)set on the superblock.
322 */ 322 */
323int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) 323int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
324 struct qstr *str)
324{ 325{
325 struct super_block *sb = dentry->d_sb; 326 struct super_block *sb = dentry->d_sb;
326 const char *astr; 327 const char *astr;
@@ -363,9 +364,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
363 * Composed unicode characters are decomposed and case-folding is performed 364 * Composed unicode characters are decomposed and case-folding is performed
364 * if the appropriate bits are (un)set on the superblock. 365 * if the appropriate bits are (un)set on the superblock.
365 */ 366 */
366int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) 367int hfsplus_compare_dentry(const struct dentry *parent,
368 const struct inode *pinode,
369 const struct dentry *dentry, const struct inode *inode,
370 unsigned int len, const char *str, const struct qstr *name)
367{ 371{
368 struct super_block *sb = dentry->d_sb; 372 struct super_block *sb = parent->d_sb;
369 int casefold, decompose, size; 373 int casefold, decompose, size;
370 int dsize1, dsize2, len1, len2; 374 int dsize1, dsize2, len1, len2;
371 const u16 *dstr1, *dstr2; 375 const u16 *dstr1, *dstr2;
@@ -375,10 +379,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *
375 379
376 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 380 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
377 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 381 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
378 astr1 = s1->name; 382 astr1 = str;
379 len1 = s1->len; 383 len1 = len;
380 astr2 = s2->name; 384 astr2 = name->name;
381 len2 = s2->len; 385 len2 = name->len;
382 dsize1 = dsize2 = 0; 386 dsize1 = dsize2 = 0;
383 dstr1 = dstr2 = NULL; 387 dstr1 = dstr2 = NULL;
384 388
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2c0f148a49e6..d3244d949a4e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
32 32
33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) 33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
34 34
35static int hostfs_d_delete(struct dentry *dentry) 35static int hostfs_d_delete(const struct dentry *dentry)
36{ 36{
37 return 1; 37 return 1;
38} 38}
@@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args,
92 92
93static char *__dentry_name(struct dentry *dentry, char *name) 93static char *__dentry_name(struct dentry *dentry, char *name)
94{ 94{
95 char *p = __dentry_path(dentry, name, PATH_MAX); 95 char *p = dentry_path_raw(dentry, name, PATH_MAX);
96 char *root; 96 char *root;
97 size_t len; 97 size_t len;
98 98
99 spin_unlock(&dcache_lock);
100
101 root = dentry->d_sb->s_fs_info; 99 root = dentry->d_sb->s_fs_info;
102 len = strlen(root); 100 len = strlen(root);
103 if (IS_ERR(p)) { 101 if (IS_ERR(p)) {
@@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry)
123 if (!name) 121 if (!name)
124 return NULL; 122 return NULL;
125 123
126 spin_lock(&dcache_lock);
127 return __dentry_name(dentry, name); /* will unlock */ 124 return __dentry_name(dentry, name); /* will unlock */
128} 125}
129 126
130static char *inode_name(struct inode *ino) 127static char *inode_name(struct inode *ino)
131{ 128{
132 struct dentry *dentry; 129 struct dentry *dentry;
133 char *name = __getname(); 130 char *name;
134 if (!name)
135 return NULL;
136 131
137 spin_lock(&dcache_lock); 132 dentry = d_find_alias(ino);
138 if (list_empty(&ino->i_dentry)) { 133 if (!dentry)
139 spin_unlock(&dcache_lock);
140 __putname(name);
141 return NULL; 134 return NULL;
142 } 135
143 dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); 136 name = dentry_name(dentry);
144 return __dentry_name(dentry, name); /* will unlock */ 137
138 dput(dentry);
139
140 return name;
145} 141}
146 142
147static char *follow_link(char *link) 143static char *follow_link(char *link)
@@ -251,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
251 } 247 }
252} 248}
253 249
254static void hostfs_destroy_inode(struct inode *inode) 250static void hostfs_i_callback(struct rcu_head *head)
255{ 251{
252 struct inode *inode = container_of(head, struct inode, i_rcu);
253 INIT_LIST_HEAD(&inode->i_dentry);
256 kfree(HOSTFS_I(inode)); 254 kfree(HOSTFS_I(inode));
257} 255}
258 256
257static void hostfs_destroy_inode(struct inode *inode)
258{
259 call_rcu(&inode->i_rcu, hostfs_i_callback);
260}
261
259static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 262static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
260{ 263{
261 const char *root_path = vfs->mnt_sb->s_fs_info; 264 const char *root_path = vfs->mnt_sb->s_fs_info;
@@ -609,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
609 goto out_put; 612 goto out_put;
610 613
611 d_add(dentry, inode); 614 d_add(dentry, inode);
612 dentry->d_op = &hostfs_dentry_ops; 615 d_set_d_op(dentry, &hostfs_dentry_ops);
613 return NULL; 616 return NULL;
614 617
615 out_put: 618 out_put:
@@ -746,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
746 return err; 749 return err;
747} 750}
748 751
749int hostfs_permission(struct inode *ino, int desired) 752int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
750{ 753{
751 char *name; 754 char *name;
752 int r = 0, w = 0, x = 0, err; 755 int r = 0, w = 0, x = 0, err;
753 756
757 if (flags & IPERM_FLAG_RCU)
758 return -ECHILD;
759
754 if (desired & MAY_READ) r = 1; 760 if (desired & MAY_READ) r = 1;
755 if (desired & MAY_WRITE) w = 1; 761 if (desired & MAY_WRITE) w = 1;
756 if (desired & MAY_EXEC) x = 1; 762 if (desired & MAY_EXEC) x = 1;
@@ -765,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired)
765 err = access_file(name, r, w, x); 771 err = access_file(name, r, w, x);
766 __putname(name); 772 __putname(name);
767 if (!err) 773 if (!err)
768 err = generic_permission(ino, desired, NULL); 774 err = generic_permission(ino, desired, flags, NULL);
769 return err; 775 return err;
770} 776}
771 777
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 67d9d36b3d5f..32c13a94e1e9 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,7 +12,8 @@
12 * Note: the dentry argument is the parent dentry. 12 * Note: the dentry argument is the parent dentry.
13 */ 13 */
14 14
15static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) 15static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
16 struct qstr *qstr)
16{ 17{
17 unsigned long hash; 18 unsigned long hash;
18 int i; 19 int i;
@@ -34,19 +35,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
34 return 0; 35 return 0;
35} 36}
36 37
37static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 38static int hpfs_compare_dentry(const struct dentry *parent,
39 const struct inode *pinode,
40 const struct dentry *dentry, const struct inode *inode,
41 unsigned int len, const char *str, const struct qstr *name)
38{ 42{
39 unsigned al=a->len; 43 unsigned al = len;
40 unsigned bl=b->len; 44 unsigned bl = name->len;
41 hpfs_adjust_length(a->name, &al); 45
46 hpfs_adjust_length(str, &al);
42 /*hpfs_adjust_length(b->name, &bl);*/ 47 /*hpfs_adjust_length(b->name, &bl);*/
43 /* 'a' is the qstr of an already existing dentry, so the name 48
44 * must be valid. 'b' must be validated first. 49 /*
50 * 'str' is the nane of an already existing dentry, so the name
51 * must be valid. 'name' must be validated first.
45 */ 52 */
46 53
47 if (hpfs_chk_name(b->name, &bl)) 54 if (hpfs_chk_name(name->name, &bl))
48 return 1; 55 return 1;
49 if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0)) 56 if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0))
50 return 1; 57 return 1;
51 return 0; 58 return 0;
52} 59}
@@ -58,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
58 65
59void hpfs_set_dentry_operations(struct dentry *dentry) 66void hpfs_set_dentry_operations(struct dentry *dentry)
60{ 67{
61 dentry->d_op = &hpfs_dentry_operations; 68 d_set_d_op(dentry, &hpfs_dentry_operations);
62} 69}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 11c2b4080f65..f4ad9e31ddc4 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -419,7 +419,7 @@ again:
419 unlock_kernel(); 419 unlock_kernel();
420 return -ENOSPC; 420 return -ENOSPC;
421 } 421 }
422 if (generic_permission(inode, MAY_WRITE, NULL) || 422 if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
423 !S_ISREG(inode->i_mode) || 423 !S_ISREG(inode->i_mode) ||
424 get_write_access(inode)) { 424 get_write_access(inode)) {
425 d_rehash(dentry); 425 d_rehash(dentry);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6c5f01597c3a..49935ba78db8 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
177 return &ei->vfs_inode; 177 return &ei->vfs_inode;
178} 178}
179 179
180static void hpfs_destroy_inode(struct inode *inode) 180static void hpfs_i_callback(struct rcu_head *head)
181{ 181{
182 struct inode *inode = container_of(head, struct inode, i_rcu);
183 INIT_LIST_HEAD(&inode->i_dentry);
182 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 184 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
183} 185}
184 186
187static void hpfs_destroy_inode(struct inode *inode)
188{
189 call_rcu(&inode->i_rcu, hpfs_i_callback);
190}
191
185static void init_once(void *foo) 192static void init_once(void *foo)
186{ 193{
187 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 194 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f702b5f713fc..87ed48e0343d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
632 mntput(ino->i_sb->s_fs_info); 632 mntput(ino->i_sb->s_fs_info);
633} 633}
634 634
635static void hppfs_destroy_inode(struct inode *inode) 635static void hppfs_i_callback(struct rcu_head *head)
636{ 636{
637 struct inode *inode = container_of(head, struct inode, i_rcu);
638 INIT_LIST_HEAD(&inode->i_dentry);
637 kfree(HPPFS_I(inode)); 639 kfree(HPPFS_I(inode));
638} 640}
639 641
642static void hppfs_destroy_inode(struct inode *inode)
643{
644 call_rcu(&inode->i_rcu, hppfs_i_callback);
645}
646
640static const struct super_operations hppfs_sbops = { 647static const struct super_operations hppfs_sbops = {
641 .alloc_inode = hppfs_alloc_inode, 648 .alloc_inode = hppfs_alloc_inode,
642 .destroy_inode = hppfs_destroy_inode, 649 .destroy_inode = hppfs_destroy_inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a5fe68189eed..9885082b470f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
663 return &p->vfs_inode; 663 return &p->vfs_inode;
664} 664}
665 665
666static void hugetlbfs_i_callback(struct rcu_head *head)
667{
668 struct inode *inode = container_of(head, struct inode, i_rcu);
669 INIT_LIST_HEAD(&inode->i_dentry);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
671}
672
666static void hugetlbfs_destroy_inode(struct inode *inode) 673static void hugetlbfs_destroy_inode(struct inode *inode)
667{ 674{
668 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 675 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
669 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 676 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 677 call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
671} 678}
672 679
673static const struct address_space_operations hugetlbfs_aops = { 680static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index ae2727ab0c3a..da85e56378f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -102,26 +102,29 @@ static DECLARE_RWSEM(iprune_sem);
102 */ 102 */
103struct inodes_stat_t inodes_stat; 103struct inodes_stat_t inodes_stat;
104 104
105static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; 105static DEFINE_PER_CPU(unsigned int, nr_inodes);
106static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
107 106
108static struct kmem_cache *inode_cachep __read_mostly; 107static struct kmem_cache *inode_cachep __read_mostly;
109 108
110static inline int get_nr_inodes(void) 109static int get_nr_inodes(void)
111{ 110{
112 return percpu_counter_sum_positive(&nr_inodes); 111 int i;
112 int sum = 0;
113 for_each_possible_cpu(i)
114 sum += per_cpu(nr_inodes, i);
115 return sum < 0 ? 0 : sum;
113} 116}
114 117
115static inline int get_nr_inodes_unused(void) 118static inline int get_nr_inodes_unused(void)
116{ 119{
117 return percpu_counter_sum_positive(&nr_inodes_unused); 120 return inodes_stat.nr_unused;
118} 121}
119 122
120int get_nr_dirty_inodes(void) 123int get_nr_dirty_inodes(void)
121{ 124{
125 /* not actually dirty inodes, but a wild approximation */
122 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 126 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
123 return nr_dirty > 0 ? nr_dirty : 0; 127 return nr_dirty > 0 ? nr_dirty : 0;
124
125} 128}
126 129
127/* 130/*
@@ -132,7 +135,6 @@ int proc_nr_inodes(ctl_table *table, int write,
132 void __user *buffer, size_t *lenp, loff_t *ppos) 135 void __user *buffer, size_t *lenp, loff_t *ppos)
133{ 136{
134 inodes_stat.nr_inodes = get_nr_inodes(); 137 inodes_stat.nr_inodes = get_nr_inodes();
135 inodes_stat.nr_unused = get_nr_inodes_unused();
136 return proc_dointvec(table, write, buffer, lenp, ppos); 138 return proc_dointvec(table, write, buffer, lenp, ppos);
137} 139}
138#endif 140#endif
@@ -224,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
224 inode->i_fsnotify_mask = 0; 226 inode->i_fsnotify_mask = 0;
225#endif 227#endif
226 228
227 percpu_counter_inc(&nr_inodes); 229 this_cpu_inc(nr_inodes);
228 230
229 return 0; 231 return 0;
230out: 232out:
@@ -255,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
255 return inode; 257 return inode;
256} 258}
257 259
260void free_inode_nonrcu(struct inode *inode)
261{
262 kmem_cache_free(inode_cachep, inode);
263}
264EXPORT_SYMBOL(free_inode_nonrcu);
265
258void __destroy_inode(struct inode *inode) 266void __destroy_inode(struct inode *inode)
259{ 267{
260 BUG_ON(inode_has_buffers(inode)); 268 BUG_ON(inode_has_buffers(inode));
@@ -266,10 +274,17 @@ void __destroy_inode(struct inode *inode)
266 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 274 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
267 posix_acl_release(inode->i_default_acl); 275 posix_acl_release(inode->i_default_acl);
268#endif 276#endif
269 percpu_counter_dec(&nr_inodes); 277 this_cpu_dec(nr_inodes);
270} 278}
271EXPORT_SYMBOL(__destroy_inode); 279EXPORT_SYMBOL(__destroy_inode);
272 280
281static void i_callback(struct rcu_head *head)
282{
283 struct inode *inode = container_of(head, struct inode, i_rcu);
284 INIT_LIST_HEAD(&inode->i_dentry);
285 kmem_cache_free(inode_cachep, inode);
286}
287
273static void destroy_inode(struct inode *inode) 288static void destroy_inode(struct inode *inode)
274{ 289{
275 BUG_ON(!list_empty(&inode->i_lru)); 290 BUG_ON(!list_empty(&inode->i_lru));
@@ -277,7 +292,7 @@ static void destroy_inode(struct inode *inode)
277 if (inode->i_sb->s_op->destroy_inode) 292 if (inode->i_sb->s_op->destroy_inode)
278 inode->i_sb->s_op->destroy_inode(inode); 293 inode->i_sb->s_op->destroy_inode(inode);
279 else 294 else
280 kmem_cache_free(inode_cachep, (inode)); 295 call_rcu(&inode->i_rcu, i_callback);
281} 296}
282 297
283/* 298/*
@@ -335,7 +350,7 @@ static void inode_lru_list_add(struct inode *inode)
335{ 350{
336 if (list_empty(&inode->i_lru)) { 351 if (list_empty(&inode->i_lru)) {
337 list_add(&inode->i_lru, &inode_lru); 352 list_add(&inode->i_lru, &inode_lru);
338 percpu_counter_inc(&nr_inodes_unused); 353 inodes_stat.nr_unused++;
339 } 354 }
340} 355}
341 356
@@ -343,7 +358,7 @@ static void inode_lru_list_del(struct inode *inode)
343{ 358{
344 if (!list_empty(&inode->i_lru)) { 359 if (!list_empty(&inode->i_lru)) {
345 list_del_init(&inode->i_lru); 360 list_del_init(&inode->i_lru);
346 percpu_counter_dec(&nr_inodes_unused); 361 inodes_stat.nr_unused--;
347 } 362 }
348} 363}
349 364
@@ -430,6 +445,7 @@ void end_writeback(struct inode *inode)
430 BUG_ON(!(inode->i_state & I_FREEING)); 445 BUG_ON(!(inode->i_state & I_FREEING));
431 BUG_ON(inode->i_state & I_CLEAR); 446 BUG_ON(inode->i_state & I_CLEAR);
432 inode_sync_wait(inode); 447 inode_sync_wait(inode);
448 /* don't need i_lock here, no concurrent mods to i_state */
433 inode->i_state = I_FREEING | I_CLEAR; 449 inode->i_state = I_FREEING | I_CLEAR;
434} 450}
435EXPORT_SYMBOL(end_writeback); 451EXPORT_SYMBOL(end_writeback);
@@ -513,7 +529,7 @@ void evict_inodes(struct super_block *sb)
513 list_move(&inode->i_lru, &dispose); 529 list_move(&inode->i_lru, &dispose);
514 list_del_init(&inode->i_wb_list); 530 list_del_init(&inode->i_wb_list);
515 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 531 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
516 percpu_counter_dec(&nr_inodes_unused); 532 inodes_stat.nr_unused--;
517 } 533 }
518 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
519 535
@@ -554,7 +570,7 @@ int invalidate_inodes(struct super_block *sb)
554 list_move(&inode->i_lru, &dispose); 570 list_move(&inode->i_lru, &dispose);
555 list_del_init(&inode->i_wb_list); 571 list_del_init(&inode->i_wb_list);
556 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 572 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
557 percpu_counter_dec(&nr_inodes_unused); 573 inodes_stat.nr_unused--;
558 } 574 }
559 spin_unlock(&inode_lock); 575 spin_unlock(&inode_lock);
560 576
@@ -616,7 +632,7 @@ static void prune_icache(int nr_to_scan)
616 if (atomic_read(&inode->i_count) || 632 if (atomic_read(&inode->i_count) ||
617 (inode->i_state & ~I_REFERENCED)) { 633 (inode->i_state & ~I_REFERENCED)) {
618 list_del_init(&inode->i_lru); 634 list_del_init(&inode->i_lru);
619 percpu_counter_dec(&nr_inodes_unused); 635 inodes_stat.nr_unused--;
620 continue; 636 continue;
621 } 637 }
622 638
@@ -650,7 +666,7 @@ static void prune_icache(int nr_to_scan)
650 */ 666 */
651 list_move(&inode->i_lru, &freeable); 667 list_move(&inode->i_lru, &freeable);
652 list_del_init(&inode->i_wb_list); 668 list_del_init(&inode->i_wb_list);
653 percpu_counter_dec(&nr_inodes_unused); 669 inodes_stat.nr_unused--;
654 } 670 }
655 if (current_is_kswapd()) 671 if (current_is_kswapd())
656 __count_vm_events(KSWAPD_INODESTEAL, reap); 672 __count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -1648,8 +1664,6 @@ void __init inode_init(void)
1648 SLAB_MEM_SPREAD), 1664 SLAB_MEM_SPREAD),
1649 init_once); 1665 init_once);
1650 register_shrinker(&icache_shrinker); 1666 register_shrinker(&icache_shrinker);
1651 percpu_counter_init(&nr_inodes, 0);
1652 percpu_counter_init(&nr_inodes_unused, 0);
1653 1667
1654 /* Hash may have been set up in inode_init_early */ 1668 /* Hash may have been set up in inode_init_early */
1655 if (!hashdist) 1669 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4e..9687c2ee2735 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
63 63
64extern void free_vfsmnt(struct vfsmount *); 64extern void free_vfsmnt(struct vfsmount *);
65extern struct vfsmount *alloc_vfsmnt(const char *); 65extern struct vfsmount *alloc_vfsmnt(const char *);
66extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 67extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
68 struct vfsmount *); 69 struct vfsmount *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bfdeb82a53be..844a7903c72f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -26,16 +26,32 @@
26 26
27#define BEQUIET 27#define BEQUIET
28 28
29static int isofs_hashi(struct dentry *parent, struct qstr *qstr); 29static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
30static int isofs_hash(struct dentry *parent, struct qstr *qstr); 30 struct qstr *qstr);
31static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); 31static int isofs_hash(const struct dentry *parent, const struct inode *inode,
32static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); 32 struct qstr *qstr);
33static int isofs_dentry_cmpi(const struct dentry *parent,
34 const struct inode *pinode,
35 const struct dentry *dentry, const struct inode *inode,
36 unsigned int len, const char *str, const struct qstr *name);
37static int isofs_dentry_cmp(const struct dentry *parent,
38 const struct inode *pinode,
39 const struct dentry *dentry, const struct inode *inode,
40 unsigned int len, const char *str, const struct qstr *name);
33 41
34#ifdef CONFIG_JOLIET 42#ifdef CONFIG_JOLIET
35static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); 43static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
36static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); 44 struct qstr *qstr);
37static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); 45static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
38static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); 46 struct qstr *qstr);
47static int isofs_dentry_cmpi_ms(const struct dentry *parent,
48 const struct inode *pinode,
49 const struct dentry *dentry, const struct inode *inode,
50 unsigned int len, const char *str, const struct qstr *name);
51static int isofs_dentry_cmp_ms(const struct dentry *parent,
52 const struct inode *pinode,
53 const struct dentry *dentry, const struct inode *inode,
54 unsigned int len, const char *str, const struct qstr *name);
39#endif 55#endif
40 56
41static void isofs_put_super(struct super_block *sb) 57static void isofs_put_super(struct super_block *sb)
@@ -65,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
65 return &ei->vfs_inode; 81 return &ei->vfs_inode;
66} 82}
67 83
68static void isofs_destroy_inode(struct inode *inode) 84static void isofs_i_callback(struct rcu_head *head)
69{ 85{
86 struct inode *inode = container_of(head, struct inode, i_rcu);
87 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 88 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
71} 89}
72 90
91static void isofs_destroy_inode(struct inode *inode)
92{
93 call_rcu(&inode->i_rcu, isofs_i_callback);
94}
95
73static void init_once(void *foo) 96static void init_once(void *foo)
74{ 97{
75 struct iso_inode_info *ei = foo; 98 struct iso_inode_info *ei = foo;
@@ -160,7 +183,7 @@ struct iso9660_options{
160 * Compute the hash for the isofs name corresponding to the dentry. 183 * Compute the hash for the isofs name corresponding to the dentry.
161 */ 184 */
162static int 185static int
163isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) 186isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
164{ 187{
165 const char *name; 188 const char *name;
166 int len; 189 int len;
@@ -181,7 +204,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
181 * Compute the hash for the isofs name corresponding to the dentry. 204 * Compute the hash for the isofs name corresponding to the dentry.
182 */ 205 */
183static int 206static int
184isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) 207isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
185{ 208{
186 const char *name; 209 const char *name;
187 int len; 210 int len;
@@ -206,100 +229,94 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
206} 229}
207 230
208/* 231/*
209 * Case insensitive compare of two isofs names. 232 * Compare of two isofs names.
210 */
211static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
212 struct qstr *b, int ms)
213{
214 int alen, blen;
215
216 /* A filename cannot end in '.' or we treat it like it has none */
217 alen = a->len;
218 blen = b->len;
219 if (ms) {
220 while (alen && a->name[alen-1] == '.')
221 alen--;
222 while (blen && b->name[blen-1] == '.')
223 blen--;
224 }
225 if (alen == blen) {
226 if (strnicmp(a->name, b->name, alen) == 0)
227 return 0;
228 }
229 return 1;
230}
231
232/*
233 * Case sensitive compare of two isofs names.
234 */ 233 */
235static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, 234static int isofs_dentry_cmp_common(
236 struct qstr *b, int ms) 235 unsigned int len, const char *str,
236 const struct qstr *name, int ms, int ci)
237{ 237{
238 int alen, blen; 238 int alen, blen;
239 239
240 /* A filename cannot end in '.' or we treat it like it has none */ 240 /* A filename cannot end in '.' or we treat it like it has none */
241 alen = a->len; 241 alen = name->len;
242 blen = b->len; 242 blen = len;
243 if (ms) { 243 if (ms) {
244 while (alen && a->name[alen-1] == '.') 244 while (alen && name->name[alen-1] == '.')
245 alen--; 245 alen--;
246 while (blen && b->name[blen-1] == '.') 246 while (blen && str[blen-1] == '.')
247 blen--; 247 blen--;
248 } 248 }
249 if (alen == blen) { 249 if (alen == blen) {
250 if (strncmp(a->name, b->name, alen) == 0) 250 if (ci) {
251 return 0; 251 if (strnicmp(name->name, str, alen) == 0)
252 return 0;
253 } else {
254 if (strncmp(name->name, str, alen) == 0)
255 return 0;
256 }
252 } 257 }
253 return 1; 258 return 1;
254} 259}
255 260
256static int 261static int
257isofs_hash(struct dentry *dentry, struct qstr *qstr) 262isofs_hash(const struct dentry *dentry, const struct inode *inode,
263 struct qstr *qstr)
258{ 264{
259 return isofs_hash_common(dentry, qstr, 0); 265 return isofs_hash_common(dentry, qstr, 0);
260} 266}
261 267
262static int 268static int
263isofs_hashi(struct dentry *dentry, struct qstr *qstr) 269isofs_hashi(const struct dentry *dentry, const struct inode *inode,
270 struct qstr *qstr)
264{ 271{
265 return isofs_hashi_common(dentry, qstr, 0); 272 return isofs_hashi_common(dentry, qstr, 0);
266} 273}
267 274
268static int 275static int
269isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b) 276isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
277 const struct dentry *dentry, const struct inode *inode,
278 unsigned int len, const char *str, const struct qstr *name)
270{ 279{
271 return isofs_dentry_cmp_common(dentry, a, b, 0); 280 return isofs_dentry_cmp_common(len, str, name, 0, 0);
272} 281}
273 282
274static int 283static int
275isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b) 284isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
285 const struct dentry *dentry, const struct inode *inode,
286 unsigned int len, const char *str, const struct qstr *name)
276{ 287{
277 return isofs_dentry_cmpi_common(dentry, a, b, 0); 288 return isofs_dentry_cmp_common(len, str, name, 0, 1);
278} 289}
279 290
280#ifdef CONFIG_JOLIET 291#ifdef CONFIG_JOLIET
281static int 292static int
282isofs_hash_ms(struct dentry *dentry, struct qstr *qstr) 293isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
294 struct qstr *qstr)
283{ 295{
284 return isofs_hash_common(dentry, qstr, 1); 296 return isofs_hash_common(dentry, qstr, 1);
285} 297}
286 298
287static int 299static int
288isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) 300isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
301 struct qstr *qstr)
289{ 302{
290 return isofs_hashi_common(dentry, qstr, 1); 303 return isofs_hashi_common(dentry, qstr, 1);
291} 304}
292 305
293static int 306static int
294isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) 307isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
308 const struct dentry *dentry, const struct inode *inode,
309 unsigned int len, const char *str, const struct qstr *name)
295{ 310{
296 return isofs_dentry_cmp_common(dentry, a, b, 1); 311 return isofs_dentry_cmp_common(len, str, name, 1, 0);
297} 312}
298 313
299static int 314static int
300isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) 315isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
316 const struct dentry *dentry, const struct inode *inode,
317 unsigned int len, const char *str, const struct qstr *name)
301{ 318{
302 return isofs_dentry_cmpi_common(dentry, a, b, 1); 319 return isofs_dentry_cmp_common(len, str, name, 1, 1);
303} 320}
304#endif 321#endif
305 322
@@ -932,7 +949,7 @@ root_found:
932 table += 2; 949 table += 2;
933 if (opt.check == 'r') 950 if (opt.check == 'r')
934 table++; 951 table++;
935 s->s_root->d_op = &isofs_dentry_ops[table]; 952 d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
936 953
937 kfree(opt.iocharset); 954 kfree(opt.iocharset);
938 955
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 0d23abfd4280..679a849c3b27 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
37 37
38 qstr.name = compare; 38 qstr.name = compare;
39 qstr.len = dlen; 39 qstr.len = dlen;
40 return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr); 40 return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
41 dentry->d_name.len, dentry->d_name.name, &qstr);
41} 42}
42 43
43/* 44/*
@@ -171,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
171 struct inode *inode; 172 struct inode *inode;
172 struct page *page; 173 struct page *page;
173 174
174 dentry->d_op = dir->i_sb->s_root->d_op; 175 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
175 176
176 page = alloc_page(GFP_USER); 177 page = alloc_page(GFP_USER);
177 if (!page) 178 if (!page)
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 54a92fd02bbd..95b79672150a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
259 return rc; 259 return rc;
260} 260}
261 261
262int jffs2_check_acl(struct inode *inode, int mask) 262int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
263{ 263{
264 struct posix_acl *acl; 264 struct posix_acl *acl;
265 int rc; 265 int rc;
266 266
267 if (flags & IPERM_FLAG_RCU)
268 return -ECHILD;
269
267 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); 270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
268 if (IS_ERR(acl)) 271 if (IS_ERR(acl))
269 return PTR_ERR(acl); 272 return PTR_ERR(acl);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 5e42de8d9541..3119f59253d3 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_check_acl(struct inode *, int); 29extern int jffs2_check_acl(struct inode *, int, unsigned int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c86041b866a4..853b8e300084 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
40 return &f->vfs_inode; 40 return &f->vfs_inode;
41} 41}
42 42
43static void jffs2_destroy_inode(struct inode *inode) 43static void jffs2_i_callback(struct rcu_head *head)
44{ 44{
45 struct inode *inode = container_of(head, struct inode, i_rcu);
46 INIT_LIST_HEAD(&inode->i_dentry);
45 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 47 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
46} 48}
47 49
50static void jffs2_destroy_inode(struct inode *inode)
51{
52 call_rcu(&inode->i_rcu, jffs2_i_callback);
53}
54
48static void jffs2_i_init_once(void *foo) 55static void jffs2_i_init_once(void *foo)
49{ 56{
50 struct jffs2_inode_info *f = foo; 57 struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 1057a4998e4e..e5de9422fa32 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,10 +114,14 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117int jfs_check_acl(struct inode *inode, int mask) 117int jfs_check_acl(struct inode *inode, int mask, unsigned int flags)
118{ 118{
119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 119 struct posix_acl *acl;
120
121 if (flags & IPERM_FLAG_RCU)
122 return -ECHILD;
120 123
124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
121 if (IS_ERR(acl)) 125 if (IS_ERR(acl))
122 return PTR_ERR(acl); 126 return PTR_ERR(acl);
123 if (acl) { 127 if (acl) {
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 54e07559878d..f9285c4900fa 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_check_acl(struct inode *, int); 23int jfs_check_acl(struct inode *, int, unsigned int flags);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_acl_chmod(struct inode *inode); 25int jfs_acl_chmod(struct inode *inode);
26 26
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 231ca4af9bce..4414e3a42264 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h>
21#include <linux/ctype.h> 22#include <linux/ctype.h>
22#include <linux/quotaops.h> 23#include <linux/quotaops.h>
23#include <linux/exportfs.h> 24#include <linux/exportfs.h>
@@ -1465,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1465 jfs_info("jfs_lookup: name = %s", name); 1466 jfs_info("jfs_lookup: name = %s", name);
1466 1467
1467 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) 1468 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
1468 dentry->d_op = &jfs_ci_dentry_operations; 1469 d_set_d_op(dentry, &jfs_ci_dentry_operations);
1469 1470
1470 if ((name[0] == '.') && (len == 1)) 1471 if ((name[0] == '.') && (len == 1))
1471 inum = dip->i_ino; 1472 inum = dip->i_ino;
@@ -1494,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1494 dentry = d_splice_alias(ip, dentry); 1495 dentry = d_splice_alias(ip, dentry);
1495 1496
1496 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) 1497 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
1497 dentry->d_op = &jfs_ci_dentry_operations; 1498 d_set_d_op(dentry, &jfs_ci_dentry_operations);
1498 1499
1499 return dentry; 1500 return dentry;
1500} 1501}
@@ -1573,7 +1574,8 @@ const struct file_operations jfs_dir_operations = {
1573 .llseek = generic_file_llseek, 1574 .llseek = generic_file_llseek,
1574}; 1575};
1575 1576
1576static int jfs_ci_hash(struct dentry *dir, struct qstr *this) 1577static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
1578 struct qstr *this)
1577{ 1579{
1578 unsigned long hash; 1580 unsigned long hash;
1579 int i; 1581 int i;
@@ -1586,32 +1588,63 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
1586 return 0; 1588 return 0;
1587} 1589}
1588 1590
1589static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) 1591static int jfs_ci_compare(const struct dentry *parent,
1592 const struct inode *pinode,
1593 const struct dentry *dentry, const struct inode *inode,
1594 unsigned int len, const char *str, const struct qstr *name)
1590{ 1595{
1591 int i, result = 1; 1596 int i, result = 1;
1592 1597
1593 if (a->len != b->len) 1598 if (len != name->len)
1594 goto out; 1599 goto out;
1595 for (i=0; i < a->len; i++) { 1600 for (i=0; i < len; i++) {
1596 if (tolower(a->name[i]) != tolower(b->name[i])) 1601 if (tolower(str[i]) != tolower(name->name[i]))
1597 goto out; 1602 goto out;
1598 } 1603 }
1599 result = 0; 1604 result = 0;
1605out:
1606 return result;
1607}
1600 1608
1609static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1610{
1611 if (nd->flags & LOOKUP_RCU)
1612 return -ECHILD;
1601 /* 1613 /*
1602 * We want creates to preserve case. A negative dentry, a, that 1614 * This is not negative dentry. Always valid.
1603 * has a different case than b may cause a new entry to be created 1615 *
1604 * with the wrong case. Since we can't tell if a comes from a negative 1616 * Note, rename() to existing directory entry will have ->d_inode,
1605 * dentry, we blindly replace it with b. This should be harmless if 1617 * and will use existing name which isn't specified name by user.
1606 * a is not a negative dentry. 1618 *
1619 * We may be able to drop this positive dentry here. But dropping
1620 * positive dentry isn't good idea. So it's unsupported like
1621 * rename("filename", "FILENAME") for now.
1607 */ 1622 */
1608 memcpy((unsigned char *)a->name, b->name, a->len); 1623 if (dentry->d_inode)
1609out: 1624 return 1;
1610 return result; 1625
1626 /*
1627 * This may be nfsd (or something), anyway, we can't see the
1628 * intent of this. So, since this can be for creation, drop it.
1629 */
1630 if (!nd)
1631 return 0;
1632
1633 /*
1634 * Drop the negative dentry, in order to make sure to use the
1635 * case sensitive name which is specified by user if this is
1636 * for creation.
1637 */
1638 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
1639 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1640 return 0;
1641 }
1642 return 1;
1611} 1643}
1612 1644
1613const struct dentry_operations jfs_ci_dentry_operations = 1645const struct dentry_operations jfs_ci_dentry_operations =
1614{ 1646{
1615 .d_hash = jfs_ci_hash, 1647 .d_hash = jfs_ci_hash,
1616 .d_compare = jfs_ci_compare, 1648 .d_compare = jfs_ci_compare,
1649 .d_revalidate = jfs_ci_revalidate,
1617}; 1650};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0669fc1cc3bf..3150d766e0d4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
115 return &jfs_inode->vfs_inode; 115 return &jfs_inode->vfs_inode;
116} 116}
117 117
118static void jfs_i_callback(struct rcu_head *head)
119{
120 struct inode *inode = container_of(head, struct inode, i_rcu);
121 struct jfs_inode_info *ji = JFS_IP(inode);
122 INIT_LIST_HEAD(&inode->i_dentry);
123 kmem_cache_free(jfs_inode_cachep, ji);
124}
125
118static void jfs_destroy_inode(struct inode *inode) 126static void jfs_destroy_inode(struct inode *inode)
119{ 127{
120 struct jfs_inode_info *ji = JFS_IP(inode); 128 struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
128 ji->active_ag = -1; 136 ji->active_ag = -1;
129 } 137 }
130 spin_unlock_irq(&ji->ag_lock); 138 spin_unlock_irq(&ji->ag_lock);
131 kmem_cache_free(jfs_inode_cachep, ji); 139 call_rcu(&inode->i_rcu, jfs_i_callback);
132} 140}
133 141
134static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) 142static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -517,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
517 goto out_no_root; 525 goto out_no_root;
518 526
519 if (sbi->mntflag & JFS_OS2) 527 if (sbi->mntflag & JFS_OS2)
520 sb->s_root->d_op = &jfs_ci_dentry_operations; 528 d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
521 529
522 /* logical blocks are represented by 40 bits in pxd_t, etc. */ 530 /* logical blocks are represented by 40 bits in pxd_t, etc. */
523 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; 531 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
diff --git a/fs/libfs.c b/fs/libfs.c
index a3accdf528ad..889311e3d06b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,11 @@
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
19static inline int simple_positive(struct dentry *dentry)
20{
21 return dentry->d_inode && !d_unhashed(dentry);
22}
23
19int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, 24int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
20 struct kstat *stat) 25 struct kstat *stat)
21{ 26{
@@ -37,7 +42,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
37 * Retaining negative dentries for an in-memory filesystem just wastes 42 * Retaining negative dentries for an in-memory filesystem just wastes
38 * memory and lookup time: arrange for them to be deleted immediately. 43 * memory and lookup time: arrange for them to be deleted immediately.
39 */ 44 */
40static int simple_delete_dentry(struct dentry *dentry) 45static int simple_delete_dentry(const struct dentry *dentry)
41{ 46{
42 return 1; 47 return 1;
43} 48}
@@ -54,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
54 59
55 if (dentry->d_name.len > NAME_MAX) 60 if (dentry->d_name.len > NAME_MAX)
56 return ERR_PTR(-ENAMETOOLONG); 61 return ERR_PTR(-ENAMETOOLONG);
57 dentry->d_op = &simple_dentry_operations; 62 d_set_d_op(dentry, &simple_dentry_operations);
58 d_add(dentry, NULL); 63 d_add(dentry, NULL);
59 return NULL; 64 return NULL;
60} 65}
@@ -76,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file)
76 81
77loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) 82loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
78{ 83{
79 mutex_lock(&file->f_path.dentry->d_inode->i_mutex); 84 struct dentry *dentry = file->f_path.dentry;
85 mutex_lock(&dentry->d_inode->i_mutex);
80 switch (origin) { 86 switch (origin) {
81 case 1: 87 case 1:
82 offset += file->f_pos; 88 offset += file->f_pos;
@@ -84,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
84 if (offset >= 0) 90 if (offset >= 0)
85 break; 91 break;
86 default: 92 default:
87 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 93 mutex_unlock(&dentry->d_inode->i_mutex);
88 return -EINVAL; 94 return -EINVAL;
89 } 95 }
90 if (offset != file->f_pos) { 96 if (offset != file->f_pos) {
@@ -94,21 +100,24 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
94 struct dentry *cursor = file->private_data; 100 struct dentry *cursor = file->private_data;
95 loff_t n = file->f_pos - 2; 101 loff_t n = file->f_pos - 2;
96 102
97 spin_lock(&dcache_lock); 103 spin_lock(&dentry->d_lock);
104 /* d_lock not required for cursor */
98 list_del(&cursor->d_u.d_child); 105 list_del(&cursor->d_u.d_child);
99 p = file->f_path.dentry->d_subdirs.next; 106 p = dentry->d_subdirs.next;
100 while (n && p != &file->f_path.dentry->d_subdirs) { 107 while (n && p != &dentry->d_subdirs) {
101 struct dentry *next; 108 struct dentry *next;
102 next = list_entry(p, struct dentry, d_u.d_child); 109 next = list_entry(p, struct dentry, d_u.d_child);
103 if (!d_unhashed(next) && next->d_inode) 110 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
111 if (simple_positive(next))
104 n--; 112 n--;
113 spin_unlock(&next->d_lock);
105 p = p->next; 114 p = p->next;
106 } 115 }
107 list_add_tail(&cursor->d_u.d_child, p); 116 list_add_tail(&cursor->d_u.d_child, p);
108 spin_unlock(&dcache_lock); 117 spin_unlock(&dentry->d_lock);
109 } 118 }
110 } 119 }
111 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 120 mutex_unlock(&dentry->d_inode->i_mutex);
112 return offset; 121 return offset;
113} 122}
114 123
@@ -148,29 +157,35 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
148 i++; 157 i++;
149 /* fallthrough */ 158 /* fallthrough */
150 default: 159 default:
151 spin_lock(&dcache_lock); 160 spin_lock(&dentry->d_lock);
152 if (filp->f_pos == 2) 161 if (filp->f_pos == 2)
153 list_move(q, &dentry->d_subdirs); 162 list_move(q, &dentry->d_subdirs);
154 163
155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 164 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
156 struct dentry *next; 165 struct dentry *next;
157 next = list_entry(p, struct dentry, d_u.d_child); 166 next = list_entry(p, struct dentry, d_u.d_child);
158 if (d_unhashed(next) || !next->d_inode) 167 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
168 if (!simple_positive(next)) {
169 spin_unlock(&next->d_lock);
159 continue; 170 continue;
171 }
160 172
161 spin_unlock(&dcache_lock); 173 spin_unlock(&next->d_lock);
174 spin_unlock(&dentry->d_lock);
162 if (filldir(dirent, next->d_name.name, 175 if (filldir(dirent, next->d_name.name,
163 next->d_name.len, filp->f_pos, 176 next->d_name.len, filp->f_pos,
164 next->d_inode->i_ino, 177 next->d_inode->i_ino,
165 dt_type(next->d_inode)) < 0) 178 dt_type(next->d_inode)) < 0)
166 return 0; 179 return 0;
167 spin_lock(&dcache_lock); 180 spin_lock(&dentry->d_lock);
181 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
168 /* next is still alive */ 182 /* next is still alive */
169 list_move(q, p); 183 list_move(q, p);
184 spin_unlock(&next->d_lock);
170 p = q; 185 p = q;
171 filp->f_pos++; 186 filp->f_pos++;
172 } 187 }
173 spin_unlock(&dcache_lock); 188 spin_unlock(&dentry->d_lock);
174 } 189 }
175 return 0; 190 return 0;
176} 191}
@@ -259,23 +274,23 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
259 return 0; 274 return 0;
260} 275}
261 276
262static inline int simple_positive(struct dentry *dentry)
263{
264 return dentry->d_inode && !d_unhashed(dentry);
265}
266
267int simple_empty(struct dentry *dentry) 277int simple_empty(struct dentry *dentry)
268{ 278{
269 struct dentry *child; 279 struct dentry *child;
270 int ret = 0; 280 int ret = 0;
271 281
272 spin_lock(&dcache_lock); 282 spin_lock(&dentry->d_lock);
273 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 283 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
274 if (simple_positive(child)) 284 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
285 if (simple_positive(child)) {
286 spin_unlock(&child->d_lock);
275 goto out; 287 goto out;
288 }
289 spin_unlock(&child->d_lock);
290 }
276 ret = 1; 291 ret = 1;
277out: 292out:
278 spin_unlock(&dcache_lock); 293 spin_unlock(&dentry->d_lock);
279 return ret; 294 return ret;
280} 295}
281 296
diff --git a/fs/locks.c b/fs/locks.c
index 8729347bcd1a..08415b2a6d36 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1389 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1389 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1390 goto out; 1390 goto out;
1391 if ((arg == F_WRLCK) 1391 if ((arg == F_WRLCK)
1392 && ((atomic_read(&dentry->d_count) > 1) 1392 && ((dentry->d_count > 1)
1393 || (atomic_read(&inode->i_count) > 1))) 1393 || (atomic_read(&inode->i_count) > 1)))
1394 goto out; 1394 goto out;
1395 } 1395 }
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 409dfd65e9a1..f9ddf0c388c8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
555 return __logfs_create(dir, dentry, inode, target, destlen); 555 return __logfs_create(dir, dentry, inode, target, destlen);
556} 556}
557 557
558static int logfs_permission(struct inode *inode, int mask) 558static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
559{ 559{
560 return generic_permission(inode, mask, NULL); 560 if (flags & IPERM_FLAG_RCU)
561 return -ECHILD;
562 return generic_permission(inode, mask, flags, NULL);
561} 563}
562 564
563static int logfs_link(struct dentry *old_dentry, struct inode *dir, 565static int logfs_link(struct dentry *old_dentry, struct inode *dir,
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ece098f..03b8c240aeda 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
141 return __logfs_iget(sb, ino); 141 return __logfs_iget(sb, ino);
142} 142}
143 143
144static void logfs_i_callback(struct rcu_head *head)
145{
146 struct inode *inode = container_of(head, struct inode, i_rcu);
147 INIT_LIST_HEAD(&inode->i_dentry);
148 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
149}
150
144static void __logfs_destroy_inode(struct inode *inode) 151static void __logfs_destroy_inode(struct inode *inode)
145{ 152{
146 struct logfs_inode *li = logfs_inode(inode); 153 struct logfs_inode *li = logfs_inode(inode);
147 154
148 BUG_ON(li->li_block); 155 BUG_ON(li->li_block);
149 list_del(&li->li_freeing_list); 156 list_del(&li->li_freeing_list);
150 kmem_cache_free(logfs_inode_cache, li); 157 call_rcu(&inode->i_rcu, logfs_i_callback);
151} 158}
152 159
153static void logfs_destroy_inode(struct inode *inode) 160static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fb2020858a34..ae0b83f476a6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
68 return &ei->vfs_inode; 68 return &ei->vfs_inode;
69} 69}
70 70
71static void minix_destroy_inode(struct inode *inode) 71static void minix_i_callback(struct rcu_head *head)
72{ 72{
73 struct inode *inode = container_of(head, struct inode, i_rcu);
74 INIT_LIST_HEAD(&inode->i_dentry);
73 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 75 kmem_cache_free(minix_inode_cachep, minix_i(inode));
74} 76}
75 77
78static void minix_destroy_inode(struct inode *inode)
79{
80 call_rcu(&inode->i_rcu, minix_i_callback);
81}
82
76static void init_once(void *foo) 83static void init_once(void *foo)
77{ 84{
78 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 85 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3accef..1b9e07728a9f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
23 struct inode * inode = NULL; 23 struct inode * inode = NULL;
24 ino_t ino; 24 ino_t ino;
25 25
26 dentry->d_op = dir->i_sb->s_root->d_op; 26 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
27 27
28 if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) 28 if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
29 return ERR_PTR(-ENAMETOOLONG); 29 return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index 4ff7ca530533..19433cdba011 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
169/* 169/*
170 * This does basic POSIX ACL permission checking 170 * This does basic POSIX ACL permission checking
171 */ 171 */
172static int acl_permission_check(struct inode *inode, int mask, 172static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
173 int (*check_acl)(struct inode *inode, int mask)) 173 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
174{ 174{
175 umode_t mode = inode->i_mode; 175 umode_t mode = inode->i_mode;
176 176
@@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask,
180 mode >>= 6; 180 mode >>= 6;
181 else { 181 else {
182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
183 int error = check_acl(inode, mask); 183 int error = check_acl(inode, mask, flags);
184 if (error != -EAGAIN) 184 if (error != -EAGAIN)
185 return error; 185 return error;
186 } 186 }
@@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask,
198} 198}
199 199
200/** 200/**
201 * generic_permission - check for access rights on a Posix-like filesystem 201 * generic_permission - check for access rights on a Posix-like filesystem
202 * @inode: inode to check access rights for 202 * @inode: inode to check access rights for
203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
204 * @check_acl: optional callback to check for Posix ACLs 204 * @check_acl: optional callback to check for Posix ACLs
205 * @flags IPERM_FLAG_ flags.
205 * 206 *
206 * Used to check for read/write/execute permissions on a file. 207 * Used to check for read/write/execute permissions on a file.
207 * We use "fsuid" for this, letting us set arbitrary permissions 208 * We use "fsuid" for this, letting us set arbitrary permissions
208 * for filesystem access without changing the "normal" uids which 209 * for filesystem access without changing the "normal" uids which
209 * are used for other things.. 210 * are used for other things.
211 *
212 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
213 * request cannot be satisfied (eg. requires blocking or too much complexity).
214 * It would then be called again in ref-walk mode.
210 */ 215 */
211int generic_permission(struct inode *inode, int mask, 216int generic_permission(struct inode *inode, int mask, unsigned int flags,
212 int (*check_acl)(struct inode *inode, int mask)) 217 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
213{ 218{
214 int ret; 219 int ret;
215 220
216 /* 221 /*
217 * Do the basic POSIX ACL permission checks. 222 * Do the basic POSIX ACL permission checks.
218 */ 223 */
219 ret = acl_permission_check(inode, mask, check_acl); 224 ret = acl_permission_check(inode, mask, flags, check_acl);
220 if (ret != -EACCES) 225 if (ret != -EACCES)
221 return ret; 226 return ret;
222 227
@@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask)
271 } 276 }
272 277
273 if (inode->i_op->permission) 278 if (inode->i_op->permission)
274 retval = inode->i_op->permission(inode, mask); 279 retval = inode->i_op->permission(inode, mask, 0);
275 else 280 else
276 retval = generic_permission(inode, mask, inode->i_op->check_acl); 281 retval = generic_permission(inode, mask, 0,
282 inode->i_op->check_acl);
277 283
278 if (retval) 284 if (retval)
279 return retval; 285 return retval;
@@ -362,6 +368,18 @@ void path_get(struct path *path)
362EXPORT_SYMBOL(path_get); 368EXPORT_SYMBOL(path_get);
363 369
364/** 370/**
371 * path_get_long - get a long reference to a path
372 * @path: path to get the reference to
373 *
374 * Given a path increment the reference count to the dentry and the vfsmount.
375 */
376void path_get_long(struct path *path)
377{
378 mntget_long(path->mnt);
379 dget(path->dentry);
380}
381
382/**
365 * path_put - put a reference to a path 383 * path_put - put a reference to a path
366 * @path: path to put the reference to 384 * @path: path to put the reference to
367 * 385 *
@@ -375,6 +393,185 @@ void path_put(struct path *path)
375EXPORT_SYMBOL(path_put); 393EXPORT_SYMBOL(path_put);
376 394
377/** 395/**
396 * path_put_long - put a long reference to a path
397 * @path: path to put the reference to
398 *
399 * Given a path decrement the reference count to the dentry and the vfsmount.
400 */
401void path_put_long(struct path *path)
402{
403 dput(path->dentry);
404 mntput_long(path->mnt);
405}
406
407/**
408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
409 * @nd: nameidata pathwalk data to drop
410 * @Returns: 0 on success, -ECHLID on failure
411 *
412 * Path walking has 2 modes, rcu-walk and ref-walk (see
413 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
414 * to drop out of rcu-walk mode and take normal reference counts on dentries
415 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
416 * refcounts at the last known good point before rcu-walk got stuck, so
417 * ref-walk may continue from there. If this is not successful (eg. a seqcount
418 * has changed), then failure is returned and path walk restarts from the
419 * beginning in ref-walk mode.
420 *
421 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
422 * ref-walk. Must be called from rcu-walk context.
423 */
424static int nameidata_drop_rcu(struct nameidata *nd)
425{
426 struct fs_struct *fs = current->fs;
427 struct dentry *dentry = nd->path.dentry;
428
429 BUG_ON(!(nd->flags & LOOKUP_RCU));
430 if (nd->root.mnt) {
431 spin_lock(&fs->lock);
432 if (nd->root.mnt != fs->root.mnt ||
433 nd->root.dentry != fs->root.dentry)
434 goto err_root;
435 }
436 spin_lock(&dentry->d_lock);
437 if (!__d_rcu_to_refcount(dentry, nd->seq))
438 goto err;
439 BUG_ON(nd->inode != dentry->d_inode);
440 spin_unlock(&dentry->d_lock);
441 if (nd->root.mnt) {
442 path_get(&nd->root);
443 spin_unlock(&fs->lock);
444 }
445 mntget(nd->path.mnt);
446
447 rcu_read_unlock();
448 br_read_unlock(vfsmount_lock);
449 nd->flags &= ~LOOKUP_RCU;
450 return 0;
451err:
452 spin_unlock(&dentry->d_lock);
453err_root:
454 if (nd->root.mnt)
455 spin_unlock(&fs->lock);
456 return -ECHILD;
457}
458
459/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
460static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
461{
462 if (nd->flags & LOOKUP_RCU)
463 return nameidata_drop_rcu(nd);
464 return 0;
465}
466
467/**
468 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
469 * @nd: nameidata pathwalk data to drop
470 * @dentry: dentry to drop
471 * @Returns: 0 on success, -ECHLID on failure
472 *
473 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
474 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
475 * @nd. Must be called from rcu-walk context.
476 */
477static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
478{
479 struct fs_struct *fs = current->fs;
480 struct dentry *parent = nd->path.dentry;
481
482 BUG_ON(!(nd->flags & LOOKUP_RCU));
483 if (nd->root.mnt) {
484 spin_lock(&fs->lock);
485 if (nd->root.mnt != fs->root.mnt ||
486 nd->root.dentry != fs->root.dentry)
487 goto err_root;
488 }
489 spin_lock(&parent->d_lock);
490 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
491 if (!__d_rcu_to_refcount(dentry, nd->seq))
492 goto err;
493 /*
494 * If the sequence check on the child dentry passed, then the child has
495 * not been removed from its parent. This means the parent dentry must
496 * be valid and able to take a reference at this point.
497 */
498 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
499 BUG_ON(!parent->d_count);
500 parent->d_count++;
501 spin_unlock(&dentry->d_lock);
502 spin_unlock(&parent->d_lock);
503 if (nd->root.mnt) {
504 path_get(&nd->root);
505 spin_unlock(&fs->lock);
506 }
507 mntget(nd->path.mnt);
508
509 rcu_read_unlock();
510 br_read_unlock(vfsmount_lock);
511 nd->flags &= ~LOOKUP_RCU;
512 return 0;
513err:
514 spin_unlock(&dentry->d_lock);
515 spin_unlock(&parent->d_lock);
516err_root:
517 if (nd->root.mnt)
518 spin_unlock(&fs->lock);
519 return -ECHILD;
520}
521
522/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
523static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
524{
525 if (nd->flags & LOOKUP_RCU)
526 return nameidata_dentry_drop_rcu(nd, dentry);
527 return 0;
528}
529
530/**
531 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
532 * @nd: nameidata pathwalk data to drop
533 * @Returns: 0 on success, -ECHLID on failure
534 *
535 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
536 * nd->path should be the final element of the lookup, so nd->root is discarded.
537 * Must be called from rcu-walk context.
538 */
539static int nameidata_drop_rcu_last(struct nameidata *nd)
540{
541 struct dentry *dentry = nd->path.dentry;
542
543 BUG_ON(!(nd->flags & LOOKUP_RCU));
544 nd->flags &= ~LOOKUP_RCU;
545 nd->root.mnt = NULL;
546 spin_lock(&dentry->d_lock);
547 if (!__d_rcu_to_refcount(dentry, nd->seq))
548 goto err_unlock;
549 BUG_ON(nd->inode != dentry->d_inode);
550 spin_unlock(&dentry->d_lock);
551
552 mntget(nd->path.mnt);
553
554 rcu_read_unlock();
555 br_read_unlock(vfsmount_lock);
556
557 return 0;
558
559err_unlock:
560 spin_unlock(&dentry->d_lock);
561 rcu_read_unlock();
562 br_read_unlock(vfsmount_lock);
563 return -ECHILD;
564}
565
566/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
567static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
568{
569 if (likely(nd->flags & LOOKUP_RCU))
570 return nameidata_drop_rcu_last(nd);
571 return 0;
572}
573
574/**
378 * release_open_intent - free up open intent resources 575 * release_open_intent - free up open intent resources
379 * @nd: pointer to nameidata 576 * @nd: pointer to nameidata
380 */ 577 */
@@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd)
386 fput(nd->intent.open.file); 583 fput(nd->intent.open.file);
387} 584}
388 585
586static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
587{
588 int status;
589
590 status = dentry->d_op->d_revalidate(dentry, nd);
591 if (status == -ECHILD) {
592 if (nameidata_dentry_drop_rcu(nd, dentry))
593 return status;
594 status = dentry->d_op->d_revalidate(dentry, nd);
595 }
596
597 return status;
598}
599
389static inline struct dentry * 600static inline struct dentry *
390do_revalidate(struct dentry *dentry, struct nameidata *nd) 601do_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 602{
392 int status = dentry->d_op->d_revalidate(dentry, nd); 603 int status;
604
605 status = d_revalidate(dentry, nd);
393 if (unlikely(status <= 0)) { 606 if (unlikely(status <= 0)) {
394 /* 607 /*
395 * The dentry failed validation. 608 * The dentry failed validation.
@@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
397 * the dentry otherwise d_revalidate is asking us 610 * the dentry otherwise d_revalidate is asking us
398 * to return a fail status. 611 * to return a fail status.
399 */ 612 */
400 if (!status) { 613 if (status < 0) {
614 /* If we're in rcu-walk, we don't have a ref */
615 if (!(nd->flags & LOOKUP_RCU))
616 dput(dentry);
617 dentry = ERR_PTR(status);
618
619 } else {
620 /* Don't d_invalidate in rcu-walk mode */
621 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
622 return ERR_PTR(-ECHILD);
401 if (!d_invalidate(dentry)) { 623 if (!d_invalidate(dentry)) {
402 dput(dentry); 624 dput(dentry);
403 dentry = NULL; 625 dentry = NULL;
404 } 626 }
405 } else {
406 dput(dentry);
407 dentry = ERR_PTR(status);
408 } 627 }
409 } 628 }
410 return dentry; 629 return dentry;
411} 630}
412 631
632static inline int need_reval_dot(struct dentry *dentry)
633{
634 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
635 return 0;
636
637 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
638 return 0;
639
640 return 1;
641}
642
413/* 643/*
414 * force_reval_path - force revalidation of a dentry 644 * force_reval_path - force revalidation of a dentry
415 * 645 *
@@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd)
433 663
434 /* 664 /*
435 * only check on filesystems where it's possible for the dentry to 665 * only check on filesystems where it's possible for the dentry to
436 * become stale. It's assumed that if this flag is set then the 666 * become stale.
437 * d_revalidate op will also be defined.
438 */ 667 */
439 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) 668 if (!need_reval_dot(dentry))
440 return 0; 669 return 0;
441 670
442 status = dentry->d_op->d_revalidate(dentry, nd); 671 status = d_revalidate(dentry, nd);
443 if (status > 0) 672 if (status > 0)
444 return 0; 673 return 0;
445 674
@@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd)
459 * short-cut DAC fails, then call ->permission() to do more 688 * short-cut DAC fails, then call ->permission() to do more
460 * complete permission check. 689 * complete permission check.
461 */ 690 */
462static int exec_permission(struct inode *inode) 691static inline int exec_permission(struct inode *inode, unsigned int flags)
463{ 692{
464 int ret; 693 int ret;
465 694
466 if (inode->i_op->permission) { 695 if (inode->i_op->permission) {
467 ret = inode->i_op->permission(inode, MAY_EXEC); 696 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
468 if (!ret) 697 } else {
469 goto ok; 698 ret = acl_permission_check(inode, MAY_EXEC, flags,
470 return ret; 699 inode->i_op->check_acl);
471 } 700 }
472 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); 701 if (likely(!ret))
473 if (!ret)
474 goto ok; 702 goto ok;
703 if (ret == -ECHILD)
704 return ret;
475 705
476 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 706 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
477 goto ok; 707 goto ok;
478 708
479 return ret; 709 return ret;
480ok: 710ok:
481 return security_inode_permission(inode, MAY_EXEC); 711 return security_inode_exec_permission(inode, flags);
482} 712}
483 713
484static __always_inline void set_root(struct nameidata *nd) 714static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd)
489 719
490static int link_path_walk(const char *, struct nameidata *); 720static int link_path_walk(const char *, struct nameidata *);
491 721
722static __always_inline void set_root_rcu(struct nameidata *nd)
723{
724 if (!nd->root.mnt) {
725 struct fs_struct *fs = current->fs;
726 unsigned seq;
727
728 do {
729 seq = read_seqcount_begin(&fs->seq);
730 nd->root = fs->root;
731 } while (read_seqcount_retry(&fs->seq, seq));
732 }
733}
734
492static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 735static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
493{ 736{
737 int ret;
738
494 if (IS_ERR(link)) 739 if (IS_ERR(link))
495 goto fail; 740 goto fail;
496 741
@@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
500 nd->path = nd->root; 745 nd->path = nd->root;
501 path_get(&nd->root); 746 path_get(&nd->root);
502 } 747 }
748 nd->inode = nd->path.dentry->d_inode;
503 749
504 return link_path_walk(link, nd); 750 ret = link_path_walk(link, nd);
751 return ret;
505fail: 752fail:
506 path_put(&nd->path); 753 path_put(&nd->path);
507 return PTR_ERR(link); 754 return PTR_ERR(link);
@@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
516 763
517static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 764static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
518{ 765{
519 dput(nd->path.dentry); 766 if (!(nd->flags & LOOKUP_RCU)) {
520 if (nd->path.mnt != path->mnt) { 767 dput(nd->path.dentry);
521 mntput(nd->path.mnt); 768 if (nd->path.mnt != path->mnt)
522 nd->path.mnt = path->mnt; 769 mntput(nd->path.mnt);
523 } 770 }
771 nd->path.mnt = path->mnt;
524 nd->path.dentry = path->dentry; 772 nd->path.dentry = path->dentry;
525} 773}
526 774
@@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
535 783
536 if (path->mnt != nd->path.mnt) { 784 if (path->mnt != nd->path.mnt) {
537 path_to_nameidata(path, nd); 785 path_to_nameidata(path, nd);
786 nd->inode = nd->path.dentry->d_inode;
538 dget(dentry); 787 dget(dentry);
539 } 788 }
540 mntget(path->mnt); 789 mntget(path->mnt);
790
541 nd->last_type = LAST_BIND; 791 nd->last_type = LAST_BIND;
542 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 792 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
543 error = PTR_ERR(*p); 793 error = PTR_ERR(*p);
@@ -591,6 +841,20 @@ loop:
591 return err; 841 return err;
592} 842}
593 843
844static int follow_up_rcu(struct path *path)
845{
846 struct vfsmount *parent;
847 struct dentry *mountpoint;
848
849 parent = path->mnt->mnt_parent;
850 if (parent == path->mnt)
851 return 0;
852 mountpoint = path->mnt->mnt_mountpoint;
853 path->dentry = mountpoint;
854 path->mnt = parent;
855 return 1;
856}
857
594int follow_up(struct path *path) 858int follow_up(struct path *path)
595{ 859{
596 struct vfsmount *parent; 860 struct vfsmount *parent;
@@ -612,9 +876,24 @@ int follow_up(struct path *path)
612 return 1; 876 return 1;
613} 877}
614 878
615/* no need for dcache_lock, as serialization is taken care in 879/*
616 * namespace.c 880 * serialization is taken care of in namespace.c
617 */ 881 */
882static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
883 struct inode **inode)
884{
885 while (d_mountpoint(path->dentry)) {
886 struct vfsmount *mounted;
887 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
888 if (!mounted)
889 return;
890 path->mnt = mounted;
891 path->dentry = mounted->mnt_root;
892 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
893 *inode = path->dentry->d_inode;
894 }
895}
896
618static int __follow_mount(struct path *path) 897static int __follow_mount(struct path *path)
619{ 898{
620 int res = 0; 899 int res = 0;
@@ -645,9 +924,6 @@ static void follow_mount(struct path *path)
645 } 924 }
646} 925}
647 926
648/* no need for dcache_lock, as serialization is taken care in
649 * namespace.c
650 */
651int follow_down(struct path *path) 927int follow_down(struct path *path)
652{ 928{
653 struct vfsmount *mounted; 929 struct vfsmount *mounted;
@@ -663,7 +939,42 @@ int follow_down(struct path *path)
663 return 0; 939 return 0;
664} 940}
665 941
666static __always_inline void follow_dotdot(struct nameidata *nd) 942static int follow_dotdot_rcu(struct nameidata *nd)
943{
944 struct inode *inode = nd->inode;
945
946 set_root_rcu(nd);
947
948 while(1) {
949 if (nd->path.dentry == nd->root.dentry &&
950 nd->path.mnt == nd->root.mnt) {
951 break;
952 }
953 if (nd->path.dentry != nd->path.mnt->mnt_root) {
954 struct dentry *old = nd->path.dentry;
955 struct dentry *parent = old->d_parent;
956 unsigned seq;
957
958 seq = read_seqcount_begin(&parent->d_seq);
959 if (read_seqcount_retry(&old->d_seq, nd->seq))
960 return -ECHILD;
961 inode = parent->d_inode;
962 nd->path.dentry = parent;
963 nd->seq = seq;
964 break;
965 }
966 if (!follow_up_rcu(&nd->path))
967 break;
968 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
969 inode = nd->path.dentry->d_inode;
970 }
971 __follow_mount_rcu(nd, &nd->path, &inode);
972 nd->inode = inode;
973
974 return 0;
975}
976
977static void follow_dotdot(struct nameidata *nd)
667{ 978{
668 set_root(nd); 979 set_root(nd);
669 980
@@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
684 break; 995 break;
685 } 996 }
686 follow_mount(&nd->path); 997 follow_mount(&nd->path);
998 nd->inode = nd->path.dentry->d_inode;
687} 999}
688 1000
689/* 1001/*
@@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
721 * It _is_ time-critical. 1033 * It _is_ time-critical.
722 */ 1034 */
723static int do_lookup(struct nameidata *nd, struct qstr *name, 1035static int do_lookup(struct nameidata *nd, struct qstr *name,
724 struct path *path) 1036 struct path *path, struct inode **inode)
725{ 1037{
726 struct vfsmount *mnt = nd->path.mnt; 1038 struct vfsmount *mnt = nd->path.mnt;
727 struct dentry *dentry, *parent; 1039 struct dentry *dentry, *parent = nd->path.dentry;
728 struct inode *dir; 1040 struct inode *dir;
729 /* 1041 /*
730 * See if the low-level filesystem might want 1042 * See if the low-level filesystem might want
731 * to use its own hash.. 1043 * to use its own hash..
732 */ 1044 */
733 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 1045 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
734 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); 1046 int err = parent->d_op->d_hash(parent, nd->inode, name);
735 if (err < 0) 1047 if (err < 0)
736 return err; 1048 return err;
737 } 1049 }
@@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
741 * of a false negative due to a concurrent rename, we're going to 1053 * of a false negative due to a concurrent rename, we're going to
742 * do the non-racy lookup, below. 1054 * do the non-racy lookup, below.
743 */ 1055 */
744 dentry = __d_lookup(nd->path.dentry, name); 1056 if (nd->flags & LOOKUP_RCU) {
745 if (!dentry) 1057 unsigned seq;
746 goto need_lookup; 1058
1059 *inode = nd->inode;
1060 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1061 if (!dentry) {
1062 if (nameidata_drop_rcu(nd))
1063 return -ECHILD;
1064 goto need_lookup;
1065 }
1066 /* Memory barrier in read_seqcount_begin of child is enough */
1067 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1068 return -ECHILD;
1069
1070 nd->seq = seq;
1071 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
1072 goto need_revalidate;
1073 path->mnt = mnt;
1074 path->dentry = dentry;
1075 __follow_mount_rcu(nd, path, inode);
1076 } else {
1077 dentry = __d_lookup(parent, name);
1078 if (!dentry)
1079 goto need_lookup;
747found: 1080found:
748 if (dentry->d_op && dentry->d_op->d_revalidate) 1081 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
749 goto need_revalidate; 1082 goto need_revalidate;
750done: 1083done:
751 path->mnt = mnt; 1084 path->mnt = mnt;
752 path->dentry = dentry; 1085 path->dentry = dentry;
753 __follow_mount(path); 1086 __follow_mount(path);
1087 *inode = path->dentry->d_inode;
1088 }
754 return 0; 1089 return 0;
755 1090
756need_lookup: 1091need_lookup:
757 parent = nd->path.dentry;
758 dir = parent->d_inode; 1092 dir = parent->d_inode;
1093 BUG_ON(nd->inode != dir);
759 1094
760 mutex_lock(&dir->i_mutex); 1095 mutex_lock(&dir->i_mutex);
761 /* 1096 /*
@@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
817static int link_path_walk(const char *name, struct nameidata *nd) 1152static int link_path_walk(const char *name, struct nameidata *nd)
818{ 1153{
819 struct path next; 1154 struct path next;
820 struct inode *inode;
821 int err; 1155 int err;
822 unsigned int lookup_flags = nd->flags; 1156 unsigned int lookup_flags = nd->flags;
823 1157
@@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
826 if (!*name) 1160 if (!*name)
827 goto return_reval; 1161 goto return_reval;
828 1162
829 inode = nd->path.dentry->d_inode;
830 if (nd->depth) 1163 if (nd->depth)
831 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1164 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
832 1165
833 /* At this point we know we have a real path component. */ 1166 /* At this point we know we have a real path component. */
834 for(;;) { 1167 for(;;) {
1168 struct inode *inode;
835 unsigned long hash; 1169 unsigned long hash;
836 struct qstr this; 1170 struct qstr this;
837 unsigned int c; 1171 unsigned int c;
838 1172
839 nd->flags |= LOOKUP_CONTINUE; 1173 nd->flags |= LOOKUP_CONTINUE;
840 err = exec_permission(inode); 1174 if (nd->flags & LOOKUP_RCU) {
1175 err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1176 if (err == -ECHILD) {
1177 if (nameidata_drop_rcu(nd))
1178 return -ECHILD;
1179 goto exec_again;
1180 }
1181 } else {
1182exec_again:
1183 err = exec_permission(nd->inode, 0);
1184 }
841 if (err) 1185 if (err)
842 break; 1186 break;
843 1187
@@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
868 if (this.name[0] == '.') switch (this.len) { 1212 if (this.name[0] == '.') switch (this.len) {
869 default: 1213 default:
870 break; 1214 break;
871 case 2: 1215 case 2:
872 if (this.name[1] != '.') 1216 if (this.name[1] != '.')
873 break; 1217 break;
874 follow_dotdot(nd); 1218 if (nd->flags & LOOKUP_RCU) {
875 inode = nd->path.dentry->d_inode; 1219 if (follow_dotdot_rcu(nd))
1220 return -ECHILD;
1221 } else
1222 follow_dotdot(nd);
876 /* fallthrough */ 1223 /* fallthrough */
877 case 1: 1224 case 1:
878 continue; 1225 continue;
879 } 1226 }
880 /* This does the actual lookups.. */ 1227 /* This does the actual lookups.. */
881 err = do_lookup(nd, &this, &next); 1228 err = do_lookup(nd, &this, &next, &inode);
882 if (err) 1229 if (err)
883 break; 1230 break;
884
885 err = -ENOENT; 1231 err = -ENOENT;
886 inode = next.dentry->d_inode;
887 if (!inode) 1232 if (!inode)
888 goto out_dput; 1233 goto out_dput;
889 1234
890 if (inode->i_op->follow_link) { 1235 if (inode->i_op->follow_link) {
1236 /* We commonly drop rcu-walk here */
1237 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1238 return -ECHILD;
1239 BUG_ON(inode != next.dentry->d_inode);
891 err = do_follow_link(&next, nd); 1240 err = do_follow_link(&next, nd);
892 if (err) 1241 if (err)
893 goto return_err; 1242 goto return_err;
1243 nd->inode = nd->path.dentry->d_inode;
894 err = -ENOENT; 1244 err = -ENOENT;
895 inode = nd->path.dentry->d_inode; 1245 if (!nd->inode)
896 if (!inode)
897 break; 1246 break;
898 } else 1247 } else {
899 path_to_nameidata(&next, nd); 1248 path_to_nameidata(&next, nd);
1249 nd->inode = inode;
1250 }
900 err = -ENOTDIR; 1251 err = -ENOTDIR;
901 if (!inode->i_op->lookup) 1252 if (!nd->inode->i_op->lookup)
902 break; 1253 break;
903 continue; 1254 continue;
904 /* here ends the main loop */ 1255 /* here ends the main loop */
@@ -913,32 +1264,39 @@ last_component:
913 if (this.name[0] == '.') switch (this.len) { 1264 if (this.name[0] == '.') switch (this.len) {
914 default: 1265 default:
915 break; 1266 break;
916 case 2: 1267 case 2:
917 if (this.name[1] != '.') 1268 if (this.name[1] != '.')
918 break; 1269 break;
919 follow_dotdot(nd); 1270 if (nd->flags & LOOKUP_RCU) {
920 inode = nd->path.dentry->d_inode; 1271 if (follow_dotdot_rcu(nd))
1272 return -ECHILD;
1273 } else
1274 follow_dotdot(nd);
921 /* fallthrough */ 1275 /* fallthrough */
922 case 1: 1276 case 1:
923 goto return_reval; 1277 goto return_reval;
924 } 1278 }
925 err = do_lookup(nd, &this, &next); 1279 err = do_lookup(nd, &this, &next, &inode);
926 if (err) 1280 if (err)
927 break; 1281 break;
928 inode = next.dentry->d_inode;
929 if (follow_on_final(inode, lookup_flags)) { 1282 if (follow_on_final(inode, lookup_flags)) {
1283 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1284 return -ECHILD;
1285 BUG_ON(inode != next.dentry->d_inode);
930 err = do_follow_link(&next, nd); 1286 err = do_follow_link(&next, nd);
931 if (err) 1287 if (err)
932 goto return_err; 1288 goto return_err;
933 inode = nd->path.dentry->d_inode; 1289 nd->inode = nd->path.dentry->d_inode;
934 } else 1290 } else {
935 path_to_nameidata(&next, nd); 1291 path_to_nameidata(&next, nd);
1292 nd->inode = inode;
1293 }
936 err = -ENOENT; 1294 err = -ENOENT;
937 if (!inode) 1295 if (!nd->inode)
938 break; 1296 break;
939 if (lookup_flags & LOOKUP_DIRECTORY) { 1297 if (lookup_flags & LOOKUP_DIRECTORY) {
940 err = -ENOTDIR; 1298 err = -ENOTDIR;
941 if (!inode->i_op->lookup) 1299 if (!nd->inode->i_op->lookup)
942 break; 1300 break;
943 } 1301 }
944 goto return_base; 1302 goto return_base;
@@ -958,25 +1316,43 @@ return_reval:
958 * We bypassed the ordinary revalidation routines. 1316 * We bypassed the ordinary revalidation routines.
959 * We may need to check the cached dentry for staleness. 1317 * We may need to check the cached dentry for staleness.
960 */ 1318 */
961 if (nd->path.dentry && nd->path.dentry->d_sb && 1319 if (need_reval_dot(nd->path.dentry)) {
962 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
963 err = -ESTALE;
964 /* Note: we do not d_invalidate() */ 1320 /* Note: we do not d_invalidate() */
965 if (!nd->path.dentry->d_op->d_revalidate( 1321 err = d_revalidate(nd->path.dentry, nd);
966 nd->path.dentry, nd)) 1322 if (!err)
1323 err = -ESTALE;
1324 if (err < 0)
967 break; 1325 break;
968 } 1326 }
969return_base: 1327return_base:
1328 if (nameidata_drop_rcu_last_maybe(nd))
1329 return -ECHILD;
970 return 0; 1330 return 0;
971out_dput: 1331out_dput:
972 path_put_conditional(&next, nd); 1332 if (!(nd->flags & LOOKUP_RCU))
1333 path_put_conditional(&next, nd);
973 break; 1334 break;
974 } 1335 }
975 path_put(&nd->path); 1336 if (!(nd->flags & LOOKUP_RCU))
1337 path_put(&nd->path);
976return_err: 1338return_err:
977 return err; 1339 return err;
978} 1340}
979 1341
1342static inline int path_walk_rcu(const char *name, struct nameidata *nd)
1343{
1344 current->total_link_count = 0;
1345
1346 return link_path_walk(name, nd);
1347}
1348
1349static inline int path_walk_simple(const char *name, struct nameidata *nd)
1350{
1351 current->total_link_count = 0;
1352
1353 return link_path_walk(name, nd);
1354}
1355
980static int path_walk(const char *name, struct nameidata *nd) 1356static int path_walk(const char *name, struct nameidata *nd)
981{ 1357{
982 struct path save = nd->path; 1358 struct path save = nd->path;
@@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd)
1002 return result; 1378 return result;
1003} 1379}
1004 1380
1381static void path_finish_rcu(struct nameidata *nd)
1382{
1383 if (nd->flags & LOOKUP_RCU) {
1384 /* RCU dangling. Cancel it. */
1385 nd->flags &= ~LOOKUP_RCU;
1386 nd->root.mnt = NULL;
1387 rcu_read_unlock();
1388 br_read_unlock(vfsmount_lock);
1389 }
1390 if (nd->file)
1391 fput(nd->file);
1392}
1393
1394static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1395{
1396 int retval = 0;
1397 int fput_needed;
1398 struct file *file;
1399
1400 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1401 nd->flags = flags | LOOKUP_RCU;
1402 nd->depth = 0;
1403 nd->root.mnt = NULL;
1404 nd->file = NULL;
1405
1406 if (*name=='/') {
1407 struct fs_struct *fs = current->fs;
1408 unsigned seq;
1409
1410 br_read_lock(vfsmount_lock);
1411 rcu_read_lock();
1412
1413 do {
1414 seq = read_seqcount_begin(&fs->seq);
1415 nd->root = fs->root;
1416 nd->path = nd->root;
1417 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1418 } while (read_seqcount_retry(&fs->seq, seq));
1419
1420 } else if (dfd == AT_FDCWD) {
1421 struct fs_struct *fs = current->fs;
1422 unsigned seq;
1423
1424 br_read_lock(vfsmount_lock);
1425 rcu_read_lock();
1426
1427 do {
1428 seq = read_seqcount_begin(&fs->seq);
1429 nd->path = fs->pwd;
1430 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1431 } while (read_seqcount_retry(&fs->seq, seq));
1432
1433 } else {
1434 struct dentry *dentry;
1435
1436 file = fget_light(dfd, &fput_needed);
1437 retval = -EBADF;
1438 if (!file)
1439 goto out_fail;
1440
1441 dentry = file->f_path.dentry;
1442
1443 retval = -ENOTDIR;
1444 if (!S_ISDIR(dentry->d_inode->i_mode))
1445 goto fput_fail;
1446
1447 retval = file_permission(file, MAY_EXEC);
1448 if (retval)
1449 goto fput_fail;
1450
1451 nd->path = file->f_path;
1452 if (fput_needed)
1453 nd->file = file;
1454
1455 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1456 br_read_lock(vfsmount_lock);
1457 rcu_read_lock();
1458 }
1459 nd->inode = nd->path.dentry->d_inode;
1460 return 0;
1461
1462fput_fail:
1463 fput_light(file, fput_needed);
1464out_fail:
1465 return retval;
1466}
1467
1005static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1468static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1006{ 1469{
1007 int retval = 0; 1470 int retval = 0;
@@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
1042 1505
1043 fput_light(file, fput_needed); 1506 fput_light(file, fput_needed);
1044 } 1507 }
1508 nd->inode = nd->path.dentry->d_inode;
1045 return 0; 1509 return 0;
1046 1510
1047fput_fail: 1511fput_fail:
@@ -1054,16 +1518,53 @@ out_fail:
1054static int do_path_lookup(int dfd, const char *name, 1518static int do_path_lookup(int dfd, const char *name,
1055 unsigned int flags, struct nameidata *nd) 1519 unsigned int flags, struct nameidata *nd)
1056{ 1520{
1057 int retval = path_init(dfd, name, flags, nd); 1521 int retval;
1058 if (!retval) 1522
1059 retval = path_walk(name, nd); 1523 /*
1060 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1524 * Path walking is largely split up into 2 different synchronisation
1061 nd->path.dentry->d_inode)) 1525 * schemes, rcu-walk and ref-walk (explained in
1062 audit_inode(name, nd->path.dentry); 1526 * Documentation/filesystems/path-lookup.txt). These share much of the
1527 * path walk code, but some things particularly setup, cleanup, and
1528 * following mounts are sufficiently divergent that functions are
1529 * duplicated. Typically there is a function foo(), and its RCU
1530 * analogue, foo_rcu().
1531 *
1532 * -ECHILD is the error number of choice (just to avoid clashes) that
1533 * is returned if some aspect of an rcu-walk fails. Such an error must
1534 * be handled by restarting a traditional ref-walk (which will always
1535 * be able to complete).
1536 */
1537 retval = path_init_rcu(dfd, name, flags, nd);
1538 if (unlikely(retval))
1539 return retval;
1540 retval = path_walk_rcu(name, nd);
1541 path_finish_rcu(nd);
1063 if (nd->root.mnt) { 1542 if (nd->root.mnt) {
1064 path_put(&nd->root); 1543 path_put(&nd->root);
1065 nd->root.mnt = NULL; 1544 nd->root.mnt = NULL;
1066 } 1545 }
1546
1547 if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
1548 /* slower, locked walk */
1549 if (retval == -ESTALE)
1550 flags |= LOOKUP_REVAL;
1551 retval = path_init(dfd, name, flags, nd);
1552 if (unlikely(retval))
1553 return retval;
1554 retval = path_walk(name, nd);
1555 if (nd->root.mnt) {
1556 path_put(&nd->root);
1557 nd->root.mnt = NULL;
1558 }
1559 }
1560
1561 if (likely(!retval)) {
1562 if (unlikely(!audit_dummy_context())) {
1563 if (nd->path.dentry && nd->inode)
1564 audit_inode(name, nd->path.dentry);
1565 }
1566 }
1567
1067 return retval; 1568 return retval;
1068} 1569}
1069 1570
@@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1106 path_get(&nd->path); 1607 path_get(&nd->path);
1107 nd->root = nd->path; 1608 nd->root = nd->path;
1108 path_get(&nd->root); 1609 path_get(&nd->root);
1610 nd->inode = nd->path.dentry->d_inode;
1109 1611
1110 retval = path_walk(name, nd); 1612 retval = path_walk(name, nd);
1111 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1613 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1112 nd->path.dentry->d_inode)) 1614 nd->inode))
1113 audit_inode(name, nd->path.dentry); 1615 audit_inode(name, nd->path.dentry);
1114 1616
1115 path_put(&nd->root); 1617 path_put(&nd->root);
@@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1125 struct dentry *dentry; 1627 struct dentry *dentry;
1126 int err; 1628 int err;
1127 1629
1128 err = exec_permission(inode); 1630 err = exec_permission(inode, 0);
1129 if (err) 1631 if (err)
1130 return ERR_PTR(err); 1632 return ERR_PTR(err);
1131 1633
@@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
1133 * See if the low-level filesystem might want 1635 * See if the low-level filesystem might want
1134 * to use its own hash.. 1636 * to use its own hash..
1135 */ 1637 */
1136 if (base->d_op && base->d_op->d_hash) { 1638 if (base->d_flags & DCACHE_OP_HASH) {
1137 err = base->d_op->d_hash(base, name); 1639 err = base->d_op->d_hash(base, inode, name);
1138 dentry = ERR_PTR(err); 1640 dentry = ERR_PTR(err);
1139 if (err < 0) 1641 if (err < 0)
1140 goto out; 1642 goto out;
@@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1147 */ 1649 */
1148 dentry = d_lookup(base, name); 1650 dentry = d_lookup(base, name);
1149 1651
1150 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1652 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
1151 dentry = do_revalidate(dentry, nd); 1653 dentry = do_revalidate(dentry, nd);
1152 1654
1153 if (!dentry) 1655 if (!dentry)
@@ -1490,6 +1992,7 @@ out_unlock:
1490 mutex_unlock(&dir->d_inode->i_mutex); 1992 mutex_unlock(&dir->d_inode->i_mutex);
1491 dput(nd->path.dentry); 1993 dput(nd->path.dentry);
1492 nd->path.dentry = path->dentry; 1994 nd->path.dentry = path->dentry;
1995
1493 if (error) 1996 if (error)
1494 return error; 1997 return error;
1495 /* Don't check for write permission, don't truncate */ 1998 /* Don't check for write permission, don't truncate */
@@ -1584,6 +2087,9 @@ exit:
1584 return ERR_PTR(error); 2087 return ERR_PTR(error);
1585} 2088}
1586 2089
2090/*
2091 * Handle O_CREAT case for do_filp_open
2092 */
1587static struct file *do_last(struct nameidata *nd, struct path *path, 2093static struct file *do_last(struct nameidata *nd, struct path *path,
1588 int open_flag, int acc_mode, 2094 int open_flag, int acc_mode,
1589 int mode, const char *pathname) 2095 int mode, const char *pathname)
@@ -1597,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1597 follow_dotdot(nd); 2103 follow_dotdot(nd);
1598 dir = nd->path.dentry; 2104 dir = nd->path.dentry;
1599 case LAST_DOT: 2105 case LAST_DOT:
1600 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { 2106 if (need_reval_dot(dir)) {
1601 if (!dir->d_op->d_revalidate(dir, nd)) { 2107 error = d_revalidate(nd->path.dentry, nd);
2108 if (!error)
1602 error = -ESTALE; 2109 error = -ESTALE;
2110 if (error < 0)
1603 goto exit; 2111 goto exit;
1604 }
1605 } 2112 }
1606 /* fallthrough */ 2113 /* fallthrough */
1607 case LAST_ROOT: 2114 case LAST_ROOT:
1608 if (open_flag & O_CREAT) 2115 goto exit;
1609 goto exit;
1610 /* fallthrough */
1611 case LAST_BIND: 2116 case LAST_BIND:
1612 audit_inode(pathname, dir); 2117 audit_inode(pathname, dir);
1613 goto ok; 2118 goto ok;
1614 } 2119 }
1615 2120
1616 /* trailing slashes? */ 2121 /* trailing slashes? */
1617 if (nd->last.name[nd->last.len]) { 2122 if (nd->last.name[nd->last.len])
1618 if (open_flag & O_CREAT) 2123 goto exit;
1619 goto exit;
1620 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1621 }
1622
1623 /* just plain open? */
1624 if (!(open_flag & O_CREAT)) {
1625 error = do_lookup(nd, &nd->last, path);
1626 if (error)
1627 goto exit;
1628 error = -ENOENT;
1629 if (!path->dentry->d_inode)
1630 goto exit_dput;
1631 if (path->dentry->d_inode->i_op->follow_link)
1632 return NULL;
1633 error = -ENOTDIR;
1634 if (nd->flags & LOOKUP_DIRECTORY) {
1635 if (!path->dentry->d_inode->i_op->lookup)
1636 goto exit_dput;
1637 }
1638 path_to_nameidata(path, nd);
1639 audit_inode(pathname, nd->path.dentry);
1640 goto ok;
1641 }
1642 2124
1643 /* OK, it's O_CREAT */
1644 mutex_lock(&dir->d_inode->i_mutex); 2125 mutex_lock(&dir->d_inode->i_mutex);
1645 2126
1646 path->dentry = lookup_hash(nd); 2127 path->dentry = lookup_hash(nd);
@@ -1711,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1711 return NULL; 2192 return NULL;
1712 2193
1713 path_to_nameidata(path, nd); 2194 path_to_nameidata(path, nd);
2195 nd->inode = path->dentry->d_inode;
1714 error = -EISDIR; 2196 error = -EISDIR;
1715 if (S_ISDIR(path->dentry->d_inode->i_mode)) 2197 if (S_ISDIR(nd->inode->i_mode))
1716 goto exit; 2198 goto exit;
1717ok: 2199ok:
1718 filp = finish_open(nd, open_flag, acc_mode); 2200 filp = finish_open(nd, open_flag, acc_mode);
@@ -1743,7 +2225,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1743 struct path path; 2225 struct path path;
1744 int count = 0; 2226 int count = 0;
1745 int flag = open_to_namei_flags(open_flag); 2227 int flag = open_to_namei_flags(open_flag);
1746 int force_reval = 0; 2228 int flags;
1747 2229
1748 if (!(open_flag & O_CREAT)) 2230 if (!(open_flag & O_CREAT))
1749 mode = 0; 2231 mode = 0;
@@ -1772,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
1772 if (open_flag & O_APPEND) 2254 if (open_flag & O_APPEND)
1773 acc_mode |= MAY_APPEND; 2255 acc_mode |= MAY_APPEND;
1774 2256
1775 /* find the parent */ 2257 flags = LOOKUP_OPEN;
1776reval: 2258 if (open_flag & O_CREAT) {
1777 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 2259 flags |= LOOKUP_CREATE;
2260 if (open_flag & O_EXCL)
2261 flags |= LOOKUP_EXCL;
2262 }
2263 if (open_flag & O_DIRECTORY)
2264 flags |= LOOKUP_DIRECTORY;
2265 if (!(open_flag & O_NOFOLLOW))
2266 flags |= LOOKUP_FOLLOW;
2267
2268 filp = get_empty_filp();
2269 if (!filp)
2270 return ERR_PTR(-ENFILE);
2271
2272 filp->f_flags = open_flag;
2273 nd.intent.open.file = filp;
2274 nd.intent.open.flags = flag;
2275 nd.intent.open.create_mode = mode;
2276
2277 if (open_flag & O_CREAT)
2278 goto creat;
2279
2280 /* !O_CREAT, simple open */
2281 error = do_path_lookup(dfd, pathname, flags, &nd);
2282 if (unlikely(error))
2283 goto out_filp;
2284 error = -ELOOP;
2285 if (!(nd.flags & LOOKUP_FOLLOW)) {
2286 if (nd.inode->i_op->follow_link)
2287 goto out_path;
2288 }
2289 error = -ENOTDIR;
2290 if (nd.flags & LOOKUP_DIRECTORY) {
2291 if (!nd.inode->i_op->lookup)
2292 goto out_path;
2293 }
2294 audit_inode(pathname, nd.path.dentry);
2295 filp = finish_open(&nd, open_flag, acc_mode);
2296 return filp;
2297
2298creat:
2299 /* OK, have to create the file. Find the parent. */
2300 error = path_init_rcu(dfd, pathname,
2301 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
1778 if (error) 2302 if (error)
1779 return ERR_PTR(error); 2303 goto out_filp;
1780 if (force_reval) 2304 error = path_walk_rcu(pathname, &nd);
1781 nd.flags |= LOOKUP_REVAL; 2305 path_finish_rcu(&nd);
2306 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2307 /* slower, locked walk */
2308 if (error == -ESTALE) {
2309reval:
2310 flags |= LOOKUP_REVAL;
2311 }
2312 error = path_init(dfd, pathname,
2313 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2314 if (error)
2315 goto out_filp;
1782 2316
1783 current->total_link_count = 0; 2317 error = path_walk_simple(pathname, &nd);
1784 error = link_path_walk(pathname, &nd);
1785 if (error) {
1786 filp = ERR_PTR(error);
1787 goto out;
1788 } 2318 }
1789 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 2319 if (unlikely(error))
2320 goto out_filp;
2321 if (unlikely(!audit_dummy_context()))
1790 audit_inode(pathname, nd.path.dentry); 2322 audit_inode(pathname, nd.path.dentry);
1791 2323
1792 /* 2324 /*
1793 * We have the parent and last component. 2325 * We have the parent and last component.
1794 */ 2326 */
1795 2327 nd.flags = flags;
1796 error = -ENFILE;
1797 filp = get_empty_filp();
1798 if (filp == NULL)
1799 goto exit_parent;
1800 nd.intent.open.file = filp;
1801 filp->f_flags = open_flag;
1802 nd.intent.open.flags = flag;
1803 nd.intent.open.create_mode = mode;
1804 nd.flags &= ~LOOKUP_PARENT;
1805 nd.flags |= LOOKUP_OPEN;
1806 if (open_flag & O_CREAT) {
1807 nd.flags |= LOOKUP_CREATE;
1808 if (open_flag & O_EXCL)
1809 nd.flags |= LOOKUP_EXCL;
1810 }
1811 if (open_flag & O_DIRECTORY)
1812 nd.flags |= LOOKUP_DIRECTORY;
1813 if (!(open_flag & O_NOFOLLOW))
1814 nd.flags |= LOOKUP_FOLLOW;
1815 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1816 while (unlikely(!filp)) { /* trailing symlink */ 2329 while (unlikely(!filp)) { /* trailing symlink */
1817 struct path holder; 2330 struct path holder;
1818 struct inode *inode = path.dentry->d_inode;
1819 void *cookie; 2331 void *cookie;
1820 error = -ELOOP; 2332 error = -ELOOP;
1821 /* S_ISDIR part is a temporary automount kludge */ 2333 /* S_ISDIR part is a temporary automount kludge */
1822 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 2334 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
1823 goto exit_dput; 2335 goto exit_dput;
1824 if (count++ == 32) 2336 if (count++ == 32)
1825 goto exit_dput; 2337 goto exit_dput;
@@ -1840,36 +2352,33 @@ reval:
1840 goto exit_dput; 2352 goto exit_dput;
1841 error = __do_follow_link(&path, &nd, &cookie); 2353 error = __do_follow_link(&path, &nd, &cookie);
1842 if (unlikely(error)) { 2354 if (unlikely(error)) {
2355 if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
2356 nd.inode->i_op->put_link(path.dentry, &nd, cookie);
1843 /* nd.path had been dropped */ 2357 /* nd.path had been dropped */
1844 if (!IS_ERR(cookie) && inode->i_op->put_link) 2358 nd.path = path;
1845 inode->i_op->put_link(path.dentry, &nd, cookie); 2359 goto out_path;
1846 path_put(&path);
1847 release_open_intent(&nd);
1848 filp = ERR_PTR(error);
1849 goto out;
1850 } 2360 }
1851 holder = path; 2361 holder = path;
1852 nd.flags &= ~LOOKUP_PARENT; 2362 nd.flags &= ~LOOKUP_PARENT;
1853 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2363 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1854 if (inode->i_op->put_link) 2364 if (nd.inode->i_op->put_link)
1855 inode->i_op->put_link(holder.dentry, &nd, cookie); 2365 nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
1856 path_put(&holder); 2366 path_put(&holder);
1857 } 2367 }
1858out: 2368out:
1859 if (nd.root.mnt) 2369 if (nd.root.mnt)
1860 path_put(&nd.root); 2370 path_put(&nd.root);
1861 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 2371 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
1862 force_reval = 1;
1863 goto reval; 2372 goto reval;
1864 }
1865 return filp; 2373 return filp;
1866 2374
1867exit_dput: 2375exit_dput:
1868 path_put_conditional(&path, &nd); 2376 path_put_conditional(&path, &nd);
2377out_path:
2378 path_put(&nd.path);
2379out_filp:
1869 if (!IS_ERR(nd.intent.open.file)) 2380 if (!IS_ERR(nd.intent.open.file))
1870 release_open_intent(&nd); 2381 release_open_intent(&nd);
1871exit_parent:
1872 path_put(&nd.path);
1873 filp = ERR_PTR(error); 2382 filp = ERR_PTR(error);
1874 goto out; 2383 goto out;
1875} 2384}
@@ -2130,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry)
2130{ 2639{
2131 dget(dentry); 2640 dget(dentry);
2132 shrink_dcache_parent(dentry); 2641 shrink_dcache_parent(dentry);
2133 spin_lock(&dcache_lock);
2134 spin_lock(&dentry->d_lock); 2642 spin_lock(&dentry->d_lock);
2135 if (atomic_read(&dentry->d_count) == 2) 2643 if (dentry->d_count == 2)
2136 __d_drop(dentry); 2644 __d_drop(dentry);
2137 spin_unlock(&dentry->d_lock); 2645 spin_unlock(&dentry->d_lock);
2138 spin_unlock(&dcache_lock);
2139} 2646}
2140 2647
2141int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2648int vfs_rmdir(struct inode *dir, struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dbfc072ec70..3ddfd9046c44 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
138 mnt->mnt_group_id = 0; 138 mnt->mnt_group_id = 0;
139} 139}
140 140
141/*
142 * vfsmount lock must be held for read
143 */
144static inline void mnt_add_count(struct vfsmount *mnt, int n)
145{
146#ifdef CONFIG_SMP
147 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
148#else
149 preempt_disable();
150 mnt->mnt_count += n;
151 preempt_enable();
152#endif
153}
154
155static inline void mnt_set_count(struct vfsmount *mnt, int n)
156{
157#ifdef CONFIG_SMP
158 this_cpu_write(mnt->mnt_pcp->mnt_count, n);
159#else
160 mnt->mnt_count = n;
161#endif
162}
163
164/*
165 * vfsmount lock must be held for read
166 */
167static inline void mnt_inc_count(struct vfsmount *mnt)
168{
169 mnt_add_count(mnt, 1);
170}
171
172/*
173 * vfsmount lock must be held for read
174 */
175static inline void mnt_dec_count(struct vfsmount *mnt)
176{
177 mnt_add_count(mnt, -1);
178}
179
180/*
181 * vfsmount lock must be held for write
182 */
183unsigned int mnt_get_count(struct vfsmount *mnt)
184{
185#ifdef CONFIG_SMP
186 unsigned int count = atomic_read(&mnt->mnt_longrefs);
187 int cpu;
188
189 for_each_possible_cpu(cpu) {
190 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
191 }
192
193 return count;
194#else
195 return mnt->mnt_count;
196#endif
197}
198
141struct vfsmount *alloc_vfsmnt(const char *name) 199struct vfsmount *alloc_vfsmnt(const char *name)
142{ 200{
143 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
154 goto out_free_id; 212 goto out_free_id;
155 } 213 }
156 214
157 atomic_set(&mnt->mnt_count, 1); 215#ifdef CONFIG_SMP
216 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
217 if (!mnt->mnt_pcp)
218 goto out_free_devname;
219
220 atomic_set(&mnt->mnt_longrefs, 1);
221#else
222 mnt->mnt_count = 1;
223 mnt->mnt_writers = 0;
224#endif
225
158 INIT_LIST_HEAD(&mnt->mnt_hash); 226 INIT_LIST_HEAD(&mnt->mnt_hash);
159 INIT_LIST_HEAD(&mnt->mnt_child); 227 INIT_LIST_HEAD(&mnt->mnt_child);
160 INIT_LIST_HEAD(&mnt->mnt_mounts); 228 INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -166,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
166#ifdef CONFIG_FSNOTIFY 234#ifdef CONFIG_FSNOTIFY
167 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 235 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
168#endif 236#endif
169#ifdef CONFIG_SMP
170 mnt->mnt_writers = alloc_percpu(int);
171 if (!mnt->mnt_writers)
172 goto out_free_devname;
173#else
174 mnt->mnt_writers = 0;
175#endif
176 } 237 }
177 return mnt; 238 return mnt;
178 239
@@ -216,32 +277,32 @@ int __mnt_is_readonly(struct vfsmount *mnt)
216} 277}
217EXPORT_SYMBOL_GPL(__mnt_is_readonly); 278EXPORT_SYMBOL_GPL(__mnt_is_readonly);
218 279
219static inline void inc_mnt_writers(struct vfsmount *mnt) 280static inline void mnt_inc_writers(struct vfsmount *mnt)
220{ 281{
221#ifdef CONFIG_SMP 282#ifdef CONFIG_SMP
222 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; 283 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
223#else 284#else
224 mnt->mnt_writers++; 285 mnt->mnt_writers++;
225#endif 286#endif
226} 287}
227 288
228static inline void dec_mnt_writers(struct vfsmount *mnt) 289static inline void mnt_dec_writers(struct vfsmount *mnt)
229{ 290{
230#ifdef CONFIG_SMP 291#ifdef CONFIG_SMP
231 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; 292 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
232#else 293#else
233 mnt->mnt_writers--; 294 mnt->mnt_writers--;
234#endif 295#endif
235} 296}
236 297
237static unsigned int count_mnt_writers(struct vfsmount *mnt) 298static unsigned int mnt_get_writers(struct vfsmount *mnt)
238{ 299{
239#ifdef CONFIG_SMP 300#ifdef CONFIG_SMP
240 unsigned int count = 0; 301 unsigned int count = 0;
241 int cpu; 302 int cpu;
242 303
243 for_each_possible_cpu(cpu) { 304 for_each_possible_cpu(cpu) {
244 count += *per_cpu_ptr(mnt->mnt_writers, cpu); 305 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
245 } 306 }
246 307
247 return count; 308 return count;
@@ -273,9 +334,9 @@ int mnt_want_write(struct vfsmount *mnt)
273 int ret = 0; 334 int ret = 0;
274 335
275 preempt_disable(); 336 preempt_disable();
276 inc_mnt_writers(mnt); 337 mnt_inc_writers(mnt);
277 /* 338 /*
278 * The store to inc_mnt_writers must be visible before we pass 339 * The store to mnt_inc_writers must be visible before we pass
279 * MNT_WRITE_HOLD loop below, so that the slowpath can see our 340 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
280 * incremented count after it has set MNT_WRITE_HOLD. 341 * incremented count after it has set MNT_WRITE_HOLD.
281 */ 342 */
@@ -289,7 +350,7 @@ int mnt_want_write(struct vfsmount *mnt)
289 */ 350 */
290 smp_rmb(); 351 smp_rmb();
291 if (__mnt_is_readonly(mnt)) { 352 if (__mnt_is_readonly(mnt)) {
292 dec_mnt_writers(mnt); 353 mnt_dec_writers(mnt);
293 ret = -EROFS; 354 ret = -EROFS;
294 goto out; 355 goto out;
295 } 356 }
@@ -317,7 +378,7 @@ int mnt_clone_write(struct vfsmount *mnt)
317 if (__mnt_is_readonly(mnt)) 378 if (__mnt_is_readonly(mnt))
318 return -EROFS; 379 return -EROFS;
319 preempt_disable(); 380 preempt_disable();
320 inc_mnt_writers(mnt); 381 mnt_inc_writers(mnt);
321 preempt_enable(); 382 preempt_enable();
322 return 0; 383 return 0;
323} 384}
@@ -351,7 +412,7 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
351void mnt_drop_write(struct vfsmount *mnt) 412void mnt_drop_write(struct vfsmount *mnt)
352{ 413{
353 preempt_disable(); 414 preempt_disable();
354 dec_mnt_writers(mnt); 415 mnt_dec_writers(mnt);
355 preempt_enable(); 416 preempt_enable();
356} 417}
357EXPORT_SYMBOL_GPL(mnt_drop_write); 418EXPORT_SYMBOL_GPL(mnt_drop_write);
@@ -384,7 +445,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
384 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 445 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
385 * we're counting up here. 446 * we're counting up here.
386 */ 447 */
387 if (count_mnt_writers(mnt) > 0) 448 if (mnt_get_writers(mnt) > 0)
388 ret = -EBUSY; 449 ret = -EBUSY;
389 else 450 else
390 mnt->mnt_flags |= MNT_READONLY; 451 mnt->mnt_flags |= MNT_READONLY;
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
418 kfree(mnt->mnt_devname); 479 kfree(mnt->mnt_devname);
419 mnt_free_id(mnt); 480 mnt_free_id(mnt);
420#ifdef CONFIG_SMP 481#ifdef CONFIG_SMP
421 free_percpu(mnt->mnt_writers); 482 free_percpu(mnt->mnt_pcp);
422#endif 483#endif
423 kmem_cache_free(mnt_cache, mnt); 484 kmem_cache_free(mnt_cache, mnt);
424} 485}
@@ -492,6 +553,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
492} 553}
493 554
494/* 555/*
556 * Clear dentry's mounted state if it has no remaining mounts.
557 * vfsmount_lock must be held for write.
558 */
559static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
560{
561 unsigned u;
562
563 for (u = 0; u < HASH_SIZE; u++) {
564 struct vfsmount *p;
565
566 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
567 if (p->mnt_mountpoint == dentry)
568 return;
569 }
570 }
571 spin_lock(&dentry->d_lock);
572 dentry->d_flags &= ~DCACHE_MOUNTED;
573 spin_unlock(&dentry->d_lock);
574}
575
576/*
495 * vfsmount lock must be held for write 577 * vfsmount lock must be held for write
496 */ 578 */
497static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 579static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
@@ -502,7 +584,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
502 mnt->mnt_mountpoint = mnt->mnt_root; 584 mnt->mnt_mountpoint = mnt->mnt_root;
503 list_del_init(&mnt->mnt_child); 585 list_del_init(&mnt->mnt_child);
504 list_del_init(&mnt->mnt_hash); 586 list_del_init(&mnt->mnt_hash);
505 old_path->dentry->d_mounted--; 587 dentry_reset_mounted(old_path->mnt, old_path->dentry);
506} 588}
507 589
508/* 590/*
@@ -513,7 +595,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
513{ 595{
514 child_mnt->mnt_parent = mntget(mnt); 596 child_mnt->mnt_parent = mntget(mnt);
515 child_mnt->mnt_mountpoint = dget(dentry); 597 child_mnt->mnt_mountpoint = dget(dentry);
516 dentry->d_mounted++; 598 spin_lock(&dentry->d_lock);
599 dentry->d_flags |= DCACHE_MOUNTED;
600 spin_unlock(&dentry->d_lock);
517} 601}
518 602
519/* 603/*
@@ -629,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
629 return NULL; 713 return NULL;
630} 714}
631 715
632static inline void __mntput(struct vfsmount *mnt) 716static inline void mntfree(struct vfsmount *mnt)
633{ 717{
634 struct super_block *sb = mnt->mnt_sb; 718 struct super_block *sb = mnt->mnt_sb;
719
635 /* 720 /*
636 * This probably indicates that somebody messed 721 * This probably indicates that somebody messed
637 * up a mnt_want/drop_write() pair. If this 722 * up a mnt_want/drop_write() pair. If this
@@ -639,38 +724,123 @@ static inline void __mntput(struct vfsmount *mnt)
639 * to make r/w->r/o transitions. 724 * to make r/w->r/o transitions.
640 */ 725 */
641 /* 726 /*
642 * atomic_dec_and_lock() used to deal with ->mnt_count decrements 727 * The locking used to deal with mnt_count decrement provides barriers,
643 * provides barriers, so count_mnt_writers() below is safe. AV 728 * so mnt_get_writers() below is safe.
644 */ 729 */
645 WARN_ON(count_mnt_writers(mnt)); 730 WARN_ON(mnt_get_writers(mnt));
646 fsnotify_vfsmount_delete(mnt); 731 fsnotify_vfsmount_delete(mnt);
647 dput(mnt->mnt_root); 732 dput(mnt->mnt_root);
648 free_vfsmnt(mnt); 733 free_vfsmnt(mnt);
649 deactivate_super(sb); 734 deactivate_super(sb);
650} 735}
651 736
652void mntput_no_expire(struct vfsmount *mnt) 737#ifdef CONFIG_SMP
653{ 738static inline void __mntput(struct vfsmount *mnt, int longrefs)
654repeat: 739{
655 if (atomic_add_unless(&mnt->mnt_count, -1, 1)) 740 if (!longrefs) {
656 return; 741put_again:
742 br_read_lock(vfsmount_lock);
743 if (likely(atomic_read(&mnt->mnt_longrefs))) {
744 mnt_dec_count(mnt);
745 br_read_unlock(vfsmount_lock);
746 return;
747 }
748 br_read_unlock(vfsmount_lock);
749 } else {
750 BUG_ON(!atomic_read(&mnt->mnt_longrefs));
751 if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
752 return;
753 }
754
657 br_write_lock(vfsmount_lock); 755 br_write_lock(vfsmount_lock);
658 if (!atomic_dec_and_test(&mnt->mnt_count)) { 756 if (!longrefs)
757 mnt_dec_count(mnt);
758 else
759 atomic_dec(&mnt->mnt_longrefs);
760 if (mnt_get_count(mnt)) {
659 br_write_unlock(vfsmount_lock); 761 br_write_unlock(vfsmount_lock);
660 return; 762 return;
661 } 763 }
662 if (likely(!mnt->mnt_pinned)) { 764 if (unlikely(mnt->mnt_pinned)) {
765 mnt_add_count(mnt, mnt->mnt_pinned + 1);
766 mnt->mnt_pinned = 0;
663 br_write_unlock(vfsmount_lock); 767 br_write_unlock(vfsmount_lock);
664 __mntput(mnt); 768 acct_auto_close_mnt(mnt);
769 goto put_again;
770 }
771 br_write_unlock(vfsmount_lock);
772 mntfree(mnt);
773}
774#else
775static inline void __mntput(struct vfsmount *mnt, int longrefs)
776{
777put_again:
778 mnt_dec_count(mnt);
779 if (likely(mnt_get_count(mnt)))
665 return; 780 return;
781 br_write_lock(vfsmount_lock);
782 if (unlikely(mnt->mnt_pinned)) {
783 mnt_add_count(mnt, mnt->mnt_pinned + 1);
784 mnt->mnt_pinned = 0;
785 br_write_unlock(vfsmount_lock);
786 acct_auto_close_mnt(mnt);
787 goto put_again;
666 } 788 }
667 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
668 mnt->mnt_pinned = 0;
669 br_write_unlock(vfsmount_lock); 789 br_write_unlock(vfsmount_lock);
670 acct_auto_close_mnt(mnt); 790 mntfree(mnt);
671 goto repeat; 791}
792#endif
793
794static void mntput_no_expire(struct vfsmount *mnt)
795{
796 __mntput(mnt, 0);
797}
798
799void mntput(struct vfsmount *mnt)
800{
801 if (mnt) {
802 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
803 if (unlikely(mnt->mnt_expiry_mark))
804 mnt->mnt_expiry_mark = 0;
805 __mntput(mnt, 0);
806 }
807}
808EXPORT_SYMBOL(mntput);
809
810struct vfsmount *mntget(struct vfsmount *mnt)
811{
812 if (mnt)
813 mnt_inc_count(mnt);
814 return mnt;
672} 815}
673EXPORT_SYMBOL(mntput_no_expire); 816EXPORT_SYMBOL(mntget);
817
818void mntput_long(struct vfsmount *mnt)
819{
820#ifdef CONFIG_SMP
821 if (mnt) {
822 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
823 if (unlikely(mnt->mnt_expiry_mark))
824 mnt->mnt_expiry_mark = 0;
825 __mntput(mnt, 1);
826 }
827#else
828 mntput(mnt);
829#endif
830}
831EXPORT_SYMBOL(mntput_long);
832
833struct vfsmount *mntget_long(struct vfsmount *mnt)
834{
835#ifdef CONFIG_SMP
836 if (mnt)
837 atomic_inc(&mnt->mnt_longrefs);
838 return mnt;
839#else
840 return mntget(mnt);
841#endif
842}
843EXPORT_SYMBOL(mntget_long);
674 844
675void mnt_pin(struct vfsmount *mnt) 845void mnt_pin(struct vfsmount *mnt)
676{ 846{
@@ -678,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
678 mnt->mnt_pinned++; 848 mnt->mnt_pinned++;
679 br_write_unlock(vfsmount_lock); 849 br_write_unlock(vfsmount_lock);
680} 850}
681
682EXPORT_SYMBOL(mnt_pin); 851EXPORT_SYMBOL(mnt_pin);
683 852
684void mnt_unpin(struct vfsmount *mnt) 853void mnt_unpin(struct vfsmount *mnt)
685{ 854{
686 br_write_lock(vfsmount_lock); 855 br_write_lock(vfsmount_lock);
687 if (mnt->mnt_pinned) { 856 if (mnt->mnt_pinned) {
688 atomic_inc(&mnt->mnt_count); 857 mnt_inc_count(mnt);
689 mnt->mnt_pinned--; 858 mnt->mnt_pinned--;
690 } 859 }
691 br_write_unlock(vfsmount_lock); 860 br_write_unlock(vfsmount_lock);
692} 861}
693
694EXPORT_SYMBOL(mnt_unpin); 862EXPORT_SYMBOL(mnt_unpin);
695 863
696static inline void mangle(struct seq_file *m, const char *s) 864static inline void mangle(struct seq_file *m, const char *s)
@@ -985,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
985 int minimum_refs = 0; 1153 int minimum_refs = 0;
986 struct vfsmount *p; 1154 struct vfsmount *p;
987 1155
988 br_read_lock(vfsmount_lock); 1156 /* write lock needed for mnt_get_count */
1157 br_write_lock(vfsmount_lock);
989 for (p = mnt; p; p = next_mnt(p, mnt)) { 1158 for (p = mnt; p; p = next_mnt(p, mnt)) {
990 actual_refs += atomic_read(&p->mnt_count); 1159 actual_refs += mnt_get_count(p);
991 minimum_refs += 2; 1160 minimum_refs += 2;
992 } 1161 }
993 br_read_unlock(vfsmount_lock); 1162 br_write_unlock(vfsmount_lock);
994 1163
995 if (actual_refs > minimum_refs) 1164 if (actual_refs > minimum_refs)
996 return 0; 1165 return 0;
@@ -1017,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
1017{ 1186{
1018 int ret = 1; 1187 int ret = 1;
1019 down_read(&namespace_sem); 1188 down_read(&namespace_sem);
1020 br_read_lock(vfsmount_lock); 1189 br_write_lock(vfsmount_lock);
1021 if (propagate_mount_busy(mnt, 2)) 1190 if (propagate_mount_busy(mnt, 2))
1022 ret = 0; 1191 ret = 0;
1023 br_read_unlock(vfsmount_lock); 1192 br_write_unlock(vfsmount_lock);
1024 up_read(&namespace_sem); 1193 up_read(&namespace_sem);
1025 return ret; 1194 return ret;
1026} 1195}
@@ -1047,7 +1216,7 @@ void release_mounts(struct list_head *head)
1047 dput(dentry); 1216 dput(dentry);
1048 mntput(m); 1217 mntput(m);
1049 } 1218 }
1050 mntput(mnt); 1219 mntput_long(mnt);
1051 } 1220 }
1052} 1221}
1053 1222
@@ -1073,7 +1242,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1073 list_del_init(&p->mnt_child); 1242 list_del_init(&p->mnt_child);
1074 if (p->mnt_parent != p) { 1243 if (p->mnt_parent != p) {
1075 p->mnt_parent->mnt_ghosts++; 1244 p->mnt_parent->mnt_ghosts++;
1076 p->mnt_mountpoint->d_mounted--; 1245 dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
1077 } 1246 }
1078 change_mnt_propagation(p, MS_PRIVATE); 1247 change_mnt_propagation(p, MS_PRIVATE);
1079 } 1248 }
@@ -1102,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
1102 flags & (MNT_FORCE | MNT_DETACH)) 1271 flags & (MNT_FORCE | MNT_DETACH))
1103 return -EINVAL; 1272 return -EINVAL;
1104 1273
1105 if (atomic_read(&mnt->mnt_count) != 2) 1274 /*
1275 * probably don't strictly need the lock here if we examined
1276 * all race cases, but it's a slowpath.
1277 */
1278 br_write_lock(vfsmount_lock);
1279 if (mnt_get_count(mnt) != 2) {
1280 br_write_lock(vfsmount_lock);
1106 return -EBUSY; 1281 return -EBUSY;
1282 }
1283 br_write_unlock(vfsmount_lock);
1107 1284
1108 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1285 if (!xchg(&mnt->mnt_expiry_mark, 1))
1109 return -EAGAIN; 1286 return -EAGAIN;
@@ -1792,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1792 1969
1793unlock: 1970unlock:
1794 up_write(&namespace_sem); 1971 up_write(&namespace_sem);
1795 mntput(newmnt); 1972 mntput_long(newmnt);
1796 return err; 1973 return err;
1797} 1974}
1798 1975
@@ -2125,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2125 if (fs) { 2302 if (fs) {
2126 if (p == fs->root.mnt) { 2303 if (p == fs->root.mnt) {
2127 rootmnt = p; 2304 rootmnt = p;
2128 fs->root.mnt = mntget(q); 2305 fs->root.mnt = mntget_long(q);
2129 } 2306 }
2130 if (p == fs->pwd.mnt) { 2307 if (p == fs->pwd.mnt) {
2131 pwdmnt = p; 2308 pwdmnt = p;
2132 fs->pwd.mnt = mntget(q); 2309 fs->pwd.mnt = mntget_long(q);
2133 } 2310 }
2134 } 2311 }
2135 p = next_mnt(p, mnt_ns->root); 2312 p = next_mnt(p, mnt_ns->root);
@@ -2138,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2138 up_write(&namespace_sem); 2315 up_write(&namespace_sem);
2139 2316
2140 if (rootmnt) 2317 if (rootmnt)
2141 mntput(rootmnt); 2318 mntput_long(rootmnt);
2142 if (pwdmnt) 2319 if (pwdmnt)
2143 mntput(pwdmnt); 2320 mntput_long(pwdmnt);
2144 2321
2145 return new_ns; 2322 return new_ns;
2146} 2323}
@@ -2327,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2327 touch_mnt_namespace(current->nsproxy->mnt_ns); 2504 touch_mnt_namespace(current->nsproxy->mnt_ns);
2328 br_write_unlock(vfsmount_lock); 2505 br_write_unlock(vfsmount_lock);
2329 chroot_fs_refs(&root, &new); 2506 chroot_fs_refs(&root, &new);
2507
2330 error = 0; 2508 error = 0;
2331 path_put(&root_parent); 2509 path_put(&root_parent);
2332 path_put(&parent_path); 2510 path_put(&parent_path);
@@ -2353,6 +2531,7 @@ static void __init init_mount_tree(void)
2353 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2531 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2354 if (IS_ERR(mnt)) 2532 if (IS_ERR(mnt))
2355 panic("Can't create rootfs"); 2533 panic("Can't create rootfs");
2534
2356 ns = create_mnt_ns(mnt); 2535 ns = create_mnt_ns(mnt);
2357 if (IS_ERR(ns)) 2536 if (IS_ERR(ns))
2358 panic("Can't allocate initial namespace"); 2537 panic("Can't allocate initial namespace");
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f22b12e7d337..28f136d4aaec 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/namei.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/byteorder.h> 22#include <asm/byteorder.h>
22 23
@@ -74,9 +75,12 @@ const struct inode_operations ncp_dir_inode_operations =
74 * Dentry operations routines 75 * Dentry operations routines
75 */ 76 */
76static int ncp_lookup_validate(struct dentry *, struct nameidata *); 77static int ncp_lookup_validate(struct dentry *, struct nameidata *);
77static int ncp_hash_dentry(struct dentry *, struct qstr *); 78static int ncp_hash_dentry(const struct dentry *, const struct inode *,
78static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); 79 struct qstr *);
79static int ncp_delete_dentry(struct dentry *); 80static int ncp_compare_dentry(const struct dentry *, const struct inode *,
81 const struct dentry *, const struct inode *,
82 unsigned int, const char *, const struct qstr *);
83static int ncp_delete_dentry(const struct dentry *);
80 84
81static const struct dentry_operations ncp_dentry_operations = 85static const struct dentry_operations ncp_dentry_operations =
82{ 86{
@@ -113,10 +117,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
113 117
114#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) 118#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS)
115 119
116static inline int ncp_case_sensitive(struct dentry *dentry) 120static inline int ncp_case_sensitive(const struct inode *i)
117{ 121{
118#ifdef CONFIG_NCPFS_NFS_NS 122#ifdef CONFIG_NCPFS_NFS_NS
119 return ncp_namespace(dentry->d_inode) == NW_NS_NFS; 123 return ncp_namespace(i) == NW_NS_NFS;
120#else 124#else
121 return 0; 125 return 0;
122#endif /* CONFIG_NCPFS_NFS_NS */ 126#endif /* CONFIG_NCPFS_NFS_NS */
@@ -127,14 +131,16 @@ static inline int ncp_case_sensitive(struct dentry *dentry)
127 * is case-sensitive. 131 * is case-sensitive.
128 */ 132 */
129static int 133static int
130ncp_hash_dentry(struct dentry *dentry, struct qstr *this) 134ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
135 struct qstr *this)
131{ 136{
132 if (!ncp_case_sensitive(dentry)) { 137 if (!ncp_case_sensitive(inode)) {
138 struct super_block *sb = dentry->d_sb;
133 struct nls_table *t; 139 struct nls_table *t;
134 unsigned long hash; 140 unsigned long hash;
135 int i; 141 int i;
136 142
137 t = NCP_IO_TABLE(dentry); 143 t = NCP_IO_TABLE(sb);
138 hash = init_name_hash(); 144 hash = init_name_hash();
139 for (i=0; i<this->len ; i++) 145 for (i=0; i<this->len ; i++)
140 hash = partial_name_hash(ncp_tolower(t, this->name[i]), 146 hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -145,15 +151,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
145} 151}
146 152
147static int 153static int
148ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 154ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
155 const struct dentry *dentry, const struct inode *inode,
156 unsigned int len, const char *str, const struct qstr *name)
149{ 157{
150 if (a->len != b->len) 158 if (len != name->len)
151 return 1; 159 return 1;
152 160
153 if (ncp_case_sensitive(dentry)) 161 if (ncp_case_sensitive(pinode))
154 return strncmp(a->name, b->name, a->len); 162 return strncmp(str, name->name, len);
155 163
156 return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); 164 return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
157} 165}
158 166
159/* 167/*
@@ -162,7 +170,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
162 * Closing files can be safely postponed until iput() - it's done there anyway. 170 * Closing files can be safely postponed until iput() - it's done there anyway.
163 */ 171 */
164static int 172static int
165ncp_delete_dentry(struct dentry * dentry) 173ncp_delete_dentry(const struct dentry * dentry)
166{ 174{
167 struct inode *inode = dentry->d_inode; 175 struct inode *inode = dentry->d_inode;
168 176
@@ -301,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
301 int res, val = 0, len; 309 int res, val = 0, len;
302 __u8 __name[NCP_MAXPATHLEN + 1]; 310 __u8 __name[NCP_MAXPATHLEN + 1];
303 311
312 if (nd->flags & LOOKUP_RCU)
313 return -ECHILD;
314
304 parent = dget_parent(dentry); 315 parent = dget_parent(dentry);
305 dir = parent->d_inode; 316 dir = parent->d_inode;
306 317
@@ -384,21 +395,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
384 } 395 }
385 396
386 /* If a pointer is invalid, we search the dentry. */ 397 /* If a pointer is invalid, we search the dentry. */
387 spin_lock(&dcache_lock); 398 spin_lock(&parent->d_lock);
388 next = parent->d_subdirs.next; 399 next = parent->d_subdirs.next;
389 while (next != &parent->d_subdirs) { 400 while (next != &parent->d_subdirs) {
390 dent = list_entry(next, struct dentry, d_u.d_child); 401 dent = list_entry(next, struct dentry, d_u.d_child);
391 if ((unsigned long)dent->d_fsdata == fpos) { 402 if ((unsigned long)dent->d_fsdata == fpos) {
392 if (dent->d_inode) 403 if (dent->d_inode)
393 dget_locked(dent); 404 dget(dent);
394 else 405 else
395 dent = NULL; 406 dent = NULL;
396 spin_unlock(&dcache_lock); 407 spin_unlock(&parent->d_lock);
397 goto out; 408 goto out;
398 } 409 }
399 next = next->next; 410 next = next->next;
400 } 411 }
401 spin_unlock(&dcache_lock); 412 spin_unlock(&parent->d_lock);
402 return NULL; 413 return NULL;
403 414
404out: 415out:
@@ -592,7 +603,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
592 qname.hash = full_name_hash(qname.name, qname.len); 603 qname.hash = full_name_hash(qname.name, qname.len);
593 604
594 if (dentry->d_op && dentry->d_op->d_hash) 605 if (dentry->d_op && dentry->d_op->d_hash)
595 if (dentry->d_op->d_hash(dentry, &qname) != 0) 606 if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
596 goto end_advance; 607 goto end_advance;
597 608
598 newdent = d_lookup(dentry, &qname); 609 newdent = d_lookup(dentry, &qname);
@@ -611,35 +622,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
611 shrink_dcache_parent(newdent); 622 shrink_dcache_parent(newdent);
612 623
613 /* 624 /*
614 * It is not as dangerous as it looks. NetWare's OS2 namespace is 625 * NetWare's OS2 namespace is case preserving yet case
615 * case preserving yet case insensitive. So we update dentry's name 626 * insensitive. So we update dentry's name as received from
616 * as received from server. We found dentry via d_lookup with our 627 * server. Parent dir's i_mutex is locked because we're in
617 * hash, so we know that hash does not change, and so replacing name 628 * readdir.
618 * should be reasonably safe.
619 */ 629 */
620 if (qname.len == newdent->d_name.len && 630 dentry_update_name_case(newdent, &qname);
621 memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
622 struct inode *inode = newdent->d_inode;
623
624 /*
625 * Inside ncpfs all uses of d_name are either for debugging,
626 * or on functions which acquire inode mutex (mknod, creat,
627 * lookup). So grab i_mutex here, to be sure. d_path
628 * uses dcache_lock when generating path, so we should too.
629 * And finally d_compare is protected by dentry's d_lock, so
630 * here we go.
631 */
632 if (inode)
633 mutex_lock(&inode->i_mutex);
634 spin_lock(&dcache_lock);
635 spin_lock(&newdent->d_lock);
636 memcpy((char *) newdent->d_name.name, qname.name,
637 newdent->d_name.len);
638 spin_unlock(&newdent->d_lock);
639 spin_unlock(&dcache_lock);
640 if (inode)
641 mutex_unlock(&inode->i_mutex);
642 }
643 } 631 }
644 632
645 if (!newdent->d_inode) { 633 if (!newdent->d_inode) {
@@ -649,7 +637,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
649 entry->ino = iunique(dir->i_sb, 2); 637 entry->ino = iunique(dir->i_sb, 2);
650 inode = ncp_iget(dir->i_sb, entry); 638 inode = ncp_iget(dir->i_sb, entry);
651 if (inode) { 639 if (inode) {
652 newdent->d_op = &ncp_dentry_operations; 640 d_set_d_op(newdent, &ncp_dentry_operations);
653 d_instantiate(newdent, inode); 641 d_instantiate(newdent, inode);
654 if (!hashed) 642 if (!hashed)
655 d_rehash(newdent); 643 d_rehash(newdent);
@@ -657,7 +645,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
657 } else { 645 } else {
658 struct inode *inode = newdent->d_inode; 646 struct inode *inode = newdent->d_inode;
659 647
660 mutex_lock(&inode->i_mutex); 648 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
661 ncp_update_inode2(inode, entry); 649 ncp_update_inode2(inode, entry);
662 mutex_unlock(&inode->i_mutex); 650 mutex_unlock(&inode->i_mutex);
663 } 651 }
@@ -905,7 +893,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
905 if (inode) { 893 if (inode) {
906 ncp_new_dentry(dentry); 894 ncp_new_dentry(dentry);
907add_entry: 895add_entry:
908 dentry->d_op = &ncp_dentry_operations; 896 d_set_d_op(dentry, &ncp_dentry_operations);
909 d_add(dentry, inode); 897 d_add(dentry, inode);
910 error = 0; 898 error = 0;
911 } 899 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8fb93b604e73..9531c052d7a4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -29,6 +29,7 @@
29#include <linux/vfs.h> 29#include <linux/vfs.h>
30#include <linux/mount.h> 30#include <linux/mount.h>
31#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/namei.h>
32 33
33#include <linux/ncp_fs.h> 34#include <linux/ncp_fs.h>
34 35
@@ -58,11 +59,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
58 return &ei->vfs_inode; 59 return &ei->vfs_inode;
59} 60}
60 61
61static void ncp_destroy_inode(struct inode *inode) 62static void ncp_i_callback(struct rcu_head *head)
62{ 63{
64 struct inode *inode = container_of(head, struct inode, i_rcu);
65 INIT_LIST_HEAD(&inode->i_dentry);
63 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 66 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
64} 67}
65 68
69static void ncp_destroy_inode(struct inode *inode)
70{
71 call_rcu(&inode->i_rcu, ncp_i_callback);
72}
73
66static void init_once(void *foo) 74static void init_once(void *foo)
67{ 75{
68 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 76 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
@@ -710,7 +718,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
710 sb->s_root = d_alloc_root(root_inode); 718 sb->s_root = d_alloc_root(root_inode);
711 if (!sb->s_root) 719 if (!sb->s_root)
712 goto out_no_root; 720 goto out_no_root;
713 sb->s_root->d_op = &ncp_root_dentry_operations; 721 d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
714 return 0; 722 return 0;
715 723
716out_no_root: 724out_no_root:
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 3c57eca634ce..1220df75ff22 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
135 const unsigned char *, unsigned int, int); 135 const unsigned char *, unsigned int, int);
136 136
137#define NCP_ESC ':' 137#define NCP_ESC ':'
138#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io) 138#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io)
139#define ncp_tolower(t, c) nls_tolower(t, c) 139#define ncp_tolower(t, c) nls_tolower(t, c)
140#define ncp_toupper(t, c) nls_toupper(t, c) 140#define ncp_toupper(t, c) nls_toupper(t, c)
141#define ncp_strnicmp(t, s1, s2, len) \ 141#define ncp_strnicmp(t, s1, s2, len) \
@@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *,
150int ncp__vol2io(unsigned char *, unsigned int *, 150int ncp__vol2io(unsigned char *, unsigned int *,
151 const unsigned char *, unsigned int, int); 151 const unsigned char *, unsigned int, int);
152 152
153#define NCP_IO_TABLE(dentry) NULL 153#define NCP_IO_TABLE(sb) NULL
154#define ncp_tolower(t, c) tolower(c) 154#define ncp_tolower(t, c) tolower(c)
155#define ncp_toupper(t, c) toupper(c) 155#define ncp_toupper(t, c) toupper(c)
156#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) 156#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U)
157#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) 157#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U)
158 158
159 159
160static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, 160static inline int ncp_strnicmp(const struct nls_table *t,
161 const unsigned char *s2, int len) 161 const unsigned char *s1, const unsigned char *s2, int len)
162{ 162{
163 while (len--) { 163 while (len--) {
164 if (tolower(*s1++) != tolower(*s2++)) 164 if (tolower(*s1++) != tolower(*s2++))
@@ -193,7 +193,7 @@ ncp_renew_dentries(struct dentry *parent)
193 struct list_head *next; 193 struct list_head *next;
194 struct dentry *dentry; 194 struct dentry *dentry;
195 195
196 spin_lock(&dcache_lock); 196 spin_lock(&parent->d_lock);
197 next = parent->d_subdirs.next; 197 next = parent->d_subdirs.next;
198 while (next != &parent->d_subdirs) { 198 while (next != &parent->d_subdirs) {
199 dentry = list_entry(next, struct dentry, d_u.d_child); 199 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -205,7 +205,7 @@ ncp_renew_dentries(struct dentry *parent)
205 205
206 next = next->next; 206 next = next->next;
207 } 207 }
208 spin_unlock(&dcache_lock); 208 spin_unlock(&parent->d_lock);
209} 209}
210 210
211static inline void 211static inline void
@@ -215,7 +215,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
215 struct list_head *next; 215 struct list_head *next;
216 struct dentry *dentry; 216 struct dentry *dentry;
217 217
218 spin_lock(&dcache_lock); 218 spin_lock(&parent->d_lock);
219 next = parent->d_subdirs.next; 219 next = parent->d_subdirs.next;
220 while (next != &parent->d_subdirs) { 220 while (next != &parent->d_subdirs) {
221 dentry = list_entry(next, struct dentry, d_u.d_child); 221 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -223,7 +223,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
223 ncp_age_dentry(server, dentry); 223 ncp_age_dentry(server, dentry);
224 next = next->next; 224 next = next->next;
225 } 225 }
226 spin_unlock(&dcache_lock); 226 spin_unlock(&parent->d_lock);
227} 227}
228 228
229struct ncp_cache_head { 229struct ncp_cache_head {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a91..d33da530097a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
438 if (dentry == NULL) 438 if (dentry == NULL)
439 return; 439 return;
440 440
441 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 441 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
442 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); 442 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
443 if (IS_ERR(inode)) 443 if (IS_ERR(inode))
444 goto out; 444 goto out;
@@ -938,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
938 * component of the path. 938 * component of the path.
939 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. 939 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
940 */ 940 */
941static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) 941static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
942 unsigned int mask)
942{ 943{
943 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) 944 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
944 return 0; 945 return 0;
@@ -1018,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1018 * If the parent directory is seen to have changed, we throw out the 1019 * If the parent directory is seen to have changed, we throw out the
1019 * cached dentry and do a new lookup. 1020 * cached dentry and do a new lookup.
1020 */ 1021 */
1021static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) 1022static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1022{ 1023{
1023 struct inode *dir; 1024 struct inode *dir;
1024 struct inode *inode; 1025 struct inode *inode;
@@ -1027,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
1027 struct nfs_fattr *fattr = NULL; 1028 struct nfs_fattr *fattr = NULL;
1028 int error; 1029 int error;
1029 1030
1031 if (nd->flags & LOOKUP_RCU)
1032 return -ECHILD;
1033
1030 parent = dget_parent(dentry); 1034 parent = dget_parent(dentry);
1031 dir = parent->d_inode; 1035 dir = parent->d_inode;
1032 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 1036 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
@@ -1117,7 +1121,7 @@ out_error:
1117/* 1121/*
1118 * This is called from dput() when d_count is going to 0. 1122 * This is called from dput() when d_count is going to 0.
1119 */ 1123 */
1120static int nfs_dentry_delete(struct dentry *dentry) 1124static int nfs_dentry_delete(const struct dentry *dentry)
1121{ 1125{
1122 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", 1126 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
1123 dentry->d_parent->d_name.name, dentry->d_name.name, 1127 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -1188,7 +1192,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1188 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 1192 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1189 goto out; 1193 goto out;
1190 1194
1191 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1195 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
1192 1196
1193 /* 1197 /*
1194 * If we're doing an exclusive create, optimize away the lookup 1198 * If we're doing an exclusive create, optimize away the lookup
@@ -1333,7 +1337,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1333 res = ERR_PTR(-ENAMETOOLONG); 1337 res = ERR_PTR(-ENAMETOOLONG);
1334 goto out; 1338 goto out;
1335 } 1339 }
1336 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1340 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
1337 1341
1338 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash 1342 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
1339 * the dentry. */ 1343 * the dentry. */
@@ -1718,11 +1722,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1718 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, 1722 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
1719 dir->i_ino, dentry->d_name.name); 1723 dir->i_ino, dentry->d_name.name);
1720 1724
1721 spin_lock(&dcache_lock);
1722 spin_lock(&dentry->d_lock); 1725 spin_lock(&dentry->d_lock);
1723 if (atomic_read(&dentry->d_count) > 1) { 1726 if (dentry->d_count > 1) {
1724 spin_unlock(&dentry->d_lock); 1727 spin_unlock(&dentry->d_lock);
1725 spin_unlock(&dcache_lock);
1726 /* Start asynchronous writeout of the inode */ 1728 /* Start asynchronous writeout of the inode */
1727 write_inode_now(dentry->d_inode, 0); 1729 write_inode_now(dentry->d_inode, 0);
1728 error = nfs_sillyrename(dir, dentry); 1730 error = nfs_sillyrename(dir, dentry);
@@ -1733,7 +1735,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1733 need_rehash = 1; 1735 need_rehash = 1;
1734 } 1736 }
1735 spin_unlock(&dentry->d_lock); 1737 spin_unlock(&dentry->d_lock);
1736 spin_unlock(&dcache_lock);
1737 error = nfs_safe_remove(dentry); 1738 error = nfs_safe_remove(dentry);
1738 if (!error || error == -ENOENT) { 1739 if (!error || error == -ENOENT) {
1739 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1740 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1868,7 +1869,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1868 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", 1869 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1869 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1870 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1870 new_dentry->d_parent->d_name.name, new_dentry->d_name.name, 1871 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1871 atomic_read(&new_dentry->d_count)); 1872 new_dentry->d_count);
1872 1873
1873 /* 1874 /*
1874 * For non-directories, check whether the target is busy and if so, 1875 * For non-directories, check whether the target is busy and if so,
@@ -1886,7 +1887,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1886 rehash = new_dentry; 1887 rehash = new_dentry;
1887 } 1888 }
1888 1889
1889 if (atomic_read(&new_dentry->d_count) > 2) { 1890 if (new_dentry->d_count > 2) {
1890 int err; 1891 int err;
1891 1892
1892 /* copy the target dentry's name */ 1893 /* copy the target dentry's name */
@@ -2188,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2188 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 2189 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2189} 2190}
2190 2191
2191int nfs_permission(struct inode *inode, int mask) 2192int nfs_permission(struct inode *inode, int mask, unsigned int flags)
2192{ 2193{
2193 struct rpc_cred *cred; 2194 struct rpc_cred *cred;
2194 int res = 0; 2195 int res = 0;
2195 2196
2197 if (flags & IPERM_FLAG_RCU)
2198 return -ECHILD;
2199
2196 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2200 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2197 2201
2198 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2202 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2240,7 +2244,7 @@ out:
2240out_notsup: 2244out_notsup:
2241 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2245 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2242 if (res == 0) 2246 if (res == 0)
2243 res = generic_permission(inode, mask, NULL); 2247 res = generic_permission(inode, mask, flags, NULL);
2244 goto out; 2248 goto out;
2245} 2249}
2246 2250
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ac7b814ce162..5596c6a2881e 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
63 * This again causes shrink_dcache_for_umount_subtree() to 63 * This again causes shrink_dcache_for_umount_subtree() to
64 * Oops, since the test for IS_ROOT() will fail. 64 * Oops, since the test for IS_ROOT() will fail.
65 */ 65 */
66 spin_lock(&dcache_lock); 66 spin_lock(&sb->s_root->d_inode->i_lock);
67 spin_lock(&sb->s_root->d_lock);
67 list_del_init(&sb->s_root->d_alias); 68 list_del_init(&sb->s_root->d_alias);
68 spin_unlock(&dcache_lock); 69 spin_unlock(&sb->s_root->d_lock);
70 spin_unlock(&sb->s_root->d_inode->i_lock);
69 } 71 }
70 return 0; 72 return 0;
71} 73}
@@ -119,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 security_d_instantiate(ret, inode); 121 security_d_instantiate(ret, inode);
120 122
121 if (ret->d_op == NULL) 123 if (ret->d_op == NULL)
122 ret->d_op = server->nfs_client->rpc_ops->dentry_ops; 124 d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
123out: 125out:
124 nfs_free_fattr(fsinfo.fattr); 126 nfs_free_fattr(fsinfo.fattr);
125 return ret; 127 return ret;
@@ -226,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
226 security_d_instantiate(ret, inode); 228 security_d_instantiate(ret, inode);
227 229
228 if (ret->d_op == NULL) 230 if (ret->d_op == NULL)
229 ret->d_op = server->nfs_client->rpc_ops->dentry_ops; 231 d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
230 232
231out: 233out:
232 nfs_free_fattr(fattr); 234 nfs_free_fattr(fattr);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e67e31c73416..017daa3bed38 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
1438 return &nfsi->vfs_inode; 1438 return &nfsi->vfs_inode;
1439} 1439}
1440 1440
1441void nfs_destroy_inode(struct inode *inode) 1441static void nfs_i_callback(struct rcu_head *head)
1442{ 1442{
1443 struct inode *inode = container_of(head, struct inode, i_rcu);
1444 INIT_LIST_HEAD(&inode->i_dentry);
1443 kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); 1445 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1444} 1446}
1445 1447
1448void nfs_destroy_inode(struct inode *inode)
1449{
1450 call_rcu(&inode->i_rcu, nfs_i_callback);
1451}
1452
1446static inline void nfs4_init_once(struct nfs_inode *nfsi) 1453static inline void nfs4_init_once(struct nfs_inode *nfsi)
1447{ 1454{
1448#ifdef CONFIG_NFS_V4 1455#ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index db6aa3673cf3..74aaf3963c10 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -49,12 +49,17 @@ char *nfs_path(const char *base,
49 const struct dentry *dentry, 49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen) 50 char *buffer, ssize_t buflen)
51{ 51{
52 char *end = buffer+buflen; 52 char *end;
53 int namelen; 53 int namelen;
54 unsigned seq;
54 55
56rename_retry:
57 end = buffer+buflen;
55 *--end = '\0'; 58 *--end = '\0';
56 buflen--; 59 buflen--;
57 spin_lock(&dcache_lock); 60
61 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock();
58 while (!IS_ROOT(dentry) && dentry != droot) { 63 while (!IS_ROOT(dentry) && dentry != droot) {
59 namelen = dentry->d_name.len; 64 namelen = dentry->d_name.len;
60 buflen -= namelen + 1; 65 buflen -= namelen + 1;
@@ -65,7 +70,9 @@ char *nfs_path(const char *base,
65 *--end = '/'; 70 *--end = '/';
66 dentry = dentry->d_parent; 71 dentry = dentry->d_parent;
67 } 72 }
68 spin_unlock(&dcache_lock); 73 rcu_read_unlock();
74 if (read_seqretry(&rename_lock, seq))
75 goto rename_retry;
69 if (*end != '/') { 76 if (*end != '/') {
70 if (--buflen < 0) 77 if (--buflen < 0)
71 goto Elong; 78 goto Elong;
@@ -82,7 +89,9 @@ char *nfs_path(const char *base,
82 memcpy(end, base, namelen); 89 memcpy(end, base, namelen);
83 return end; 90 return end;
84Elong_unlock: 91Elong_unlock:
85 spin_unlock(&dcache_lock); 92 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry;
86Elong: 95Elong:
87 return ERR_PTR(-ENAMETOOLONG); 96 return ERR_PTR(-ENAMETOOLONG);
88} 97}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 7bdec8531400..8fe9eb47a97f 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
496 496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", 497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name, 498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count)); 499 dentry->d_count);
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME); 500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501 501
502 /* 502 /*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938fcff04..3a359023c9f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1756 goto out_dput_new; 1756 goto out_dput_new;
1757 1757
1758 if (svc_msnfs(ffhp) && 1758 if (svc_msnfs(ffhp) &&
1759 ((atomic_read(&odentry->d_count) > 1) 1759 ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
1760 || (atomic_read(&ndentry->d_count) > 1))) {
1761 host_err = -EPERM; 1760 host_err = -EPERM;
1762 goto out_dput_new; 1761 goto out_dput_new;
1763 } 1762 }
@@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1843 if (type != S_IFDIR) { /* It's UNLINK */ 1842 if (type != S_IFDIR) { /* It's UNLINK */
1844#ifdef MSNFS 1843#ifdef MSNFS
1845 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1844 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1846 (atomic_read(&rdentry->d_count) > 1)) { 1845 (rdentry->d_count > 1)) {
1847 host_err = -EPERM; 1846 host_err = -EPERM;
1848 } else 1847 } else
1849#endif 1848#endif
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 71d4bc8464e0..77b48c8fab17 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -785,15 +785,19 @@ out_err:
785 return err; 785 return err;
786} 786}
787 787
788int nilfs_permission(struct inode *inode, int mask) 788int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
789{ 789{
790 struct nilfs_root *root = NILFS_I(inode)->i_root; 790 struct nilfs_root *root;
791
792 if (flags & IPERM_FLAG_RCU)
793 return -ECHILD;
791 794
795 root = NILFS_I(inode)->i_root;
792 if ((mask & MAY_WRITE) && root && 796 if ((mask & MAY_WRITE) && root &&
793 root->cno != NILFS_CPTREE_CURRENT_CNO) 797 root->cno != NILFS_CPTREE_CURRENT_CNO)
794 return -EROFS; /* snapshot is not writable */ 798 return -EROFS; /* snapshot is not writable */
795 799
796 return generic_permission(inode, mask, NULL); 800 return generic_permission(inode, mask, flags, NULL);
797} 801}
798 802
799int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, 803int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f7560da5a567..0ca98823db59 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -256,7 +256,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
256extern void nilfs_truncate(struct inode *); 256extern void nilfs_truncate(struct inode *);
257extern void nilfs_evict_inode(struct inode *); 257extern void nilfs_evict_inode(struct inode *);
258extern int nilfs_setattr(struct dentry *, struct iattr *); 258extern int nilfs_setattr(struct dentry *, struct iattr *);
259int nilfs_permission(struct inode *inode, int mask); 259int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
260extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, 260extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
261 struct buffer_head **); 261 struct buffer_head **);
262extern int nilfs_inode_dirty(struct inode *); 262extern int nilfs_inode_dirty(struct inode *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f804d41ec9d3..e2dcc9c733f7 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -162,10 +162,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
162 return &ii->vfs_inode; 162 return &ii->vfs_inode;
163} 163}
164 164
165void nilfs_destroy_inode(struct inode *inode) 165static void nilfs_i_callback(struct rcu_head *head)
166{ 166{
167 struct inode *inode = container_of(head, struct inode, i_rcu);
167 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 168 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
168 169
170 INIT_LIST_HEAD(&inode->i_dentry);
171
169 if (mdi) { 172 if (mdi) {
170 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 173 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
171 kfree(mdi); 174 kfree(mdi);
@@ -173,6 +176,11 @@ void nilfs_destroy_inode(struct inode *inode)
173 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 176 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
174} 177}
175 178
179void nilfs_destroy_inode(struct inode *inode)
180{
181 call_rcu(&inode->i_rcu, nilfs_i_callback);
182}
183
176static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 184static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
177{ 185{
178 struct the_nilfs *nilfs = sbi->s_nilfs; 186 struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -838,7 +846,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
838 846
839static int nilfs_tree_was_touched(struct dentry *root_dentry) 847static int nilfs_tree_was_touched(struct dentry *root_dentry)
840{ 848{
841 return atomic_read(&root_dentry->d_count) > 1; 849 return root_dentry->d_count > 1;
842} 850}
843 851
844/** 852/**
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 20dc218707ca..79b47cbb5cd8 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
59 /* determine if the children should tell inode about their events */ 59 /* determine if the children should tell inode about their events */
60 watched = fsnotify_inode_watches_children(inode); 60 watched = fsnotify_inode_watches_children(inode);
61 61
62 spin_lock(&dcache_lock); 62 spin_lock(&inode->i_lock);
63 /* run all of the dentries associated with this inode. Since this is a 63 /* run all of the dentries associated with this inode. Since this is a
64 * directory, there damn well better only be one item on this list */ 64 * directory, there damn well better only be one item on this list */
65 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 65 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -68,19 +68,21 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
68 /* run all of the children of the original inode and fix their 68 /* run all of the children of the original inode and fix their
69 * d_flags to indicate parental interest (their parent is the 69 * d_flags to indicate parental interest (their parent is the
70 * original inode) */ 70 * original inode) */
71 spin_lock(&alias->d_lock);
71 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { 72 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
72 if (!child->d_inode) 73 if (!child->d_inode)
73 continue; 74 continue;
74 75
75 spin_lock(&child->d_lock); 76 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
76 if (watched) 77 if (watched)
77 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; 78 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
78 else 79 else
79 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; 80 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
80 spin_unlock(&child->d_lock); 81 spin_unlock(&child->d_lock);
81 } 82 }
83 spin_unlock(&alias->d_lock);
82 } 84 }
83 spin_unlock(&dcache_lock); 85 spin_unlock(&inode->i_lock);
84} 86}
85 87
86/* Notify this dentry's parent about a child's events. */ 88/* Notify this dentry's parent about a child's events. */
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b175fc7..a627ed82c0a3 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
332 return NULL; 332 return NULL;
333} 333}
334 334
335static void ntfs_i_callback(struct rcu_head *head)
336{
337 struct inode *inode = container_of(head, struct inode, i_rcu);
338 INIT_LIST_HEAD(&inode->i_dentry);
339 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
340}
341
335void ntfs_destroy_big_inode(struct inode *inode) 342void ntfs_destroy_big_inode(struct inode *inode)
336{ 343{
337 ntfs_inode *ni = NTFS_I(inode); 344 ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
340 BUG_ON(ni->page); 347 BUG_ON(ni->page);
341 if (!atomic_dec_and_test(&ni->count)) 348 if (!atomic_dec_and_test(&ni->count))
342 BUG(); 349 BUG();
343 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); 350 call_rcu(&inode->i_rcu, ntfs_i_callback);
344} 351}
345 352
346static inline ntfs_inode *ntfs_alloc_extent_inode(void) 353static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 391915093fe1..704f6b1742f3 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle,
291 return ret; 291 return ret;
292} 292}
293 293
294int ocfs2_check_acl(struct inode *inode, int mask) 294int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
295{ 295{
296 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 296 struct ocfs2_super *osb;
297 struct buffer_head *di_bh = NULL; 297 struct buffer_head *di_bh = NULL;
298 struct posix_acl *acl; 298 struct posix_acl *acl;
299 int ret = -EAGAIN; 299 int ret = -EAGAIN;
300 300
301 if (flags & IPERM_FLAG_RCU)
302 return -ECHILD;
303
304 osb = OCFS2_SB(inode->i_sb);
301 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 305 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
302 return ret; 306 return ret;
303 307
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 5c5d31f05853..4fe7c9cf4bfb 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int, unsigned int);
30extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
32 struct buffer_head *, struct buffer_head *, 32 struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 895532ac4d98..6d80ecc7834f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
52static int ocfs2_dentry_revalidate(struct dentry *dentry, 52static int ocfs2_dentry_revalidate(struct dentry *dentry,
53 struct nameidata *nd) 53 struct nameidata *nd)
54{ 54{
55 struct inode *inode = dentry->d_inode; 55 struct inode *inode;
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 57 struct ocfs2_super *osb;
58
59 if (nd->flags & LOOKUP_RCU)
60 return -ECHILD;
61
62 inode = dentry->d_inode;
63 osb = OCFS2_SB(dentry->d_sb);
58 64
59 mlog_entry("(0x%p, '%.*s')\n", dentry, 65 mlog_entry("(0x%p, '%.*s')\n", dentry,
60 dentry->d_name.len, dentry->d_name.name); 66 dentry->d_name.len, dentry->d_name.name);
@@ -169,23 +175,25 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
169 struct list_head *p; 175 struct list_head *p;
170 struct dentry *dentry = NULL; 176 struct dentry *dentry = NULL;
171 177
172 spin_lock(&dcache_lock); 178 spin_lock(&inode->i_lock);
173
174 list_for_each(p, &inode->i_dentry) { 179 list_for_each(p, &inode->i_dentry) {
175 dentry = list_entry(p, struct dentry, d_alias); 180 dentry = list_entry(p, struct dentry, d_alias);
176 181
182 spin_lock(&dentry->d_lock);
177 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 183 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
178 mlog(0, "dentry found: %.*s\n", 184 mlog(0, "dentry found: %.*s\n",
179 dentry->d_name.len, dentry->d_name.name); 185 dentry->d_name.len, dentry->d_name.name);
180 186
181 dget_locked(dentry); 187 dget_dlock(dentry);
188 spin_unlock(&dentry->d_lock);
182 break; 189 break;
183 } 190 }
191 spin_unlock(&dentry->d_lock);
184 192
185 dentry = NULL; 193 dentry = NULL;
186 } 194 }
187 195
188 spin_unlock(&dcache_lock); 196 spin_unlock(&inode->i_lock);
189 197
190 return dentry; 198 return dentry;
191} 199}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b2df490a19ed..8c5c0eddc365 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
351 return &ip->ip_vfs_inode; 351 return &ip->ip_vfs_inode;
352} 352}
353 353
354static void dlmfs_destroy_inode(struct inode *inode) 354static void dlmfs_i_callback(struct rcu_head *head)
355{ 355{
356 struct inode *inode = container_of(head, struct inode, i_rcu);
357 INIT_LIST_HEAD(&inode->i_dentry);
356 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 358 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
357} 359}
358 360
361static void dlmfs_destroy_inode(struct inode *inode)
362{
363 call_rcu(&inode->i_rcu, dlmfs_i_callback);
364}
365
359static void dlmfs_evict_inode(struct inode *inode) 366static void dlmfs_evict_inode(struct inode *inode)
360{ 367{
361 int status; 368 int status;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145d2af3..6adafa576065 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
138 138
139 result = d_obtain_alias(inode); 139 result = d_obtain_alias(inode);
140 if (!IS_ERR(result)) 140 if (!IS_ERR(result))
141 result->d_op = &ocfs2_dentry_ops; 141 d_set_d_op(result, &ocfs2_dentry_ops);
142 else 142 else
143 mlog_errno(PTR_ERR(result)); 143 mlog_errno(PTR_ERR(result));
144 144
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
176 176
177 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); 177 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
178 if (!IS_ERR(parent)) 178 if (!IS_ERR(parent))
179 parent->d_op = &ocfs2_dentry_ops; 179 d_set_d_op(parent, &ocfs2_dentry_ops);
180 180
181bail_unlock: 181bail_unlock:
182 ocfs2_inode_unlock(dir, 0); 182 ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f6cba566429d..bdadbae09094 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1307,10 +1307,13 @@ bail:
1307 return err; 1307 return err;
1308} 1308}
1309 1309
1310int ocfs2_permission(struct inode *inode, int mask) 1310int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1311{ 1311{
1312 int ret; 1312 int ret;
1313 1313
1314 if (flags & IPERM_FLAG_RCU)
1315 return -ECHILD;
1316
1314 mlog_entry_void(); 1317 mlog_entry_void();
1315 1318
1316 ret = ocfs2_inode_lock(inode, NULL, 0); 1319 ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask)
1320 goto out; 1323 goto out;
1321 } 1324 }
1322 1325
1323 ret = generic_permission(inode, mask, ocfs2_check_acl); 1326 ret = generic_permission(inode, mask, flags, ocfs2_check_acl);
1324 1327
1325 ocfs2_inode_unlock(inode, 0); 1328 ocfs2_inode_unlock(inode, 0);
1326out: 1329out:
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7c..f5afbbef6703 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
63 struct kstat *stat); 63 struct kstat *stat);
64int ocfs2_permission(struct inode *inode, int mask); 64int ocfs2_permission(struct inode *inode, int mask, unsigned int flags);
65 65
66int ocfs2_should_update_atime(struct inode *inode, 66int ocfs2_should_update_atime(struct inode *inode,
67 struct vfsmount *vfsmnt); 67 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e1e36f..d14cad6e2e41 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
147 spin_unlock(&oi->ip_lock); 147 spin_unlock(&oi->ip_lock);
148 148
149bail_add: 149bail_add:
150 dentry->d_op = &ocfs2_dentry_ops; 150 d_set_d_op(dentry, &ocfs2_dentry_ops);
151 ret = d_splice_alias(inode, dentry); 151 ret = d_splice_alias(inode, dentry);
152 152
153 if (inode) { 153 if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
415 mlog_errno(status); 415 mlog_errno(status);
416 goto leave; 416 goto leave;
417 } 417 }
418 dentry->d_op = &ocfs2_dentry_ops; 418 d_set_d_op(dentry, &ocfs2_dentry_ops);
419 419
420 status = ocfs2_add_entry(handle, dentry, inode, 420 status = ocfs2_add_entry(handle, dentry, inode,
421 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 421 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
743 } 743 }
744 744
745 ihold(inode); 745 ihold(inode);
746 dentry->d_op = &ocfs2_dentry_ops; 746 d_set_d_op(dentry, &ocfs2_dentry_ops);
747 d_instantiate(dentry, inode); 747 d_instantiate(dentry, inode);
748 748
749out_commit: 749out_commit:
@@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir,
1794 mlog_errno(status); 1794 mlog_errno(status);
1795 goto bail; 1795 goto bail;
1796 } 1796 }
1797 dentry->d_op = &ocfs2_dentry_ops; 1797 d_set_d_op(dentry, &ocfs2_dentry_ops);
1798 1798
1799 status = ocfs2_add_entry(handle, dentry, inode, 1799 status = ocfs2_add_entry(handle, dentry, inode,
1800 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1800 le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2459 goto out_commit; 2459 goto out_commit;
2460 } 2460 }
2461 2461
2462 dentry->d_op = &ocfs2_dentry_ops; 2462 d_set_d_op(dentry, &ocfs2_dentry_ops);
2463 d_instantiate(dentry, inode); 2463 d_instantiate(dentry, inode);
2464 status = 0; 2464 status = 0;
2465out_commit: 2465out_commit:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cfeab7ce3697..17ff46fa8a10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
569 return &oi->vfs_inode; 569 return &oi->vfs_inode;
570} 570}
571 571
572static void ocfs2_destroy_inode(struct inode *inode) 572static void ocfs2_i_callback(struct rcu_head *head)
573{ 573{
574 struct inode *inode = container_of(head, struct inode, i_rcu);
575 INIT_LIST_HEAD(&inode->i_dentry);
574 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 576 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
575} 577}
576 578
579static void ocfs2_destroy_inode(struct inode *inode)
580{
581 call_rcu(&inode->i_rcu, ocfs2_i_callback);
582}
583
577static unsigned long long ocfs2_max_file_offset(unsigned int bbits, 584static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
578 unsigned int cbits) 585 unsigned int cbits)
579{ 586{
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911e61f348fc..a2a5bff774e3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
343 return &oi->vfs_inode; 343 return &oi->vfs_inode;
344} 344}
345 345
346static void openprom_destroy_inode(struct inode *inode) 346static void openprom_i_callback(struct rcu_head *head)
347{ 347{
348 struct inode *inode = container_of(head, struct inode, i_rcu);
349 INIT_LIST_HEAD(&inode->i_dentry);
348 kmem_cache_free(op_inode_cachep, OP_I(inode)); 350 kmem_cache_free(op_inode_cachep, OP_I(inode));
349} 351}
350 352
353static void openprom_destroy_inode(struct inode *inode)
354{
355 call_rcu(&inode->i_rcu, openprom_i_callback);
356}
357
351static struct inode *openprom_iget(struct super_block *sb, ino_t ino) 358static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
352{ 359{
353 struct inode *inode; 360 struct inode *inode;
diff --git a/fs/pipe.c b/fs/pipe.c
index 04629f36e397..68f1f8e4e23b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -999,12 +999,12 @@ struct file *create_write_pipe(int flags)
999 goto err; 999 goto err;
1000 1000
1001 err = -ENOMEM; 1001 err = -ENOMEM;
1002 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 1002 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
1003 if (!path.dentry) 1003 if (!path.dentry)
1004 goto err_inode; 1004 goto err_inode;
1005 path.mnt = mntget(pipe_mnt); 1005 path.mnt = mntget(pipe_mnt);
1006 1006
1007 path.dentry->d_op = &pipefs_dentry_operations; 1007 d_set_d_op(path.dentry, &pipefs_dentry_operations);
1008 d_instantiate(path.dentry, inode); 1008 d_instantiate(path.dentry, inode);
1009 1009
1010 err = -ENFILE; 1010 err = -ENFILE;
@@ -1253,6 +1253,10 @@ out:
1253 return ret; 1253 return ret;
1254} 1254}
1255 1255
1256static const struct super_operations pipefs_ops = {
1257 .destroy_inode = free_inode_nonrcu,
1258};
1259
1256/* 1260/*
1257 * pipefs should _never_ be mounted by userland - too much of security hassle, 1261 * pipefs should _never_ be mounted by userland - too much of security hassle,
1258 * no real gain from having the whole whorehouse mounted. So we don't need 1262 * no real gain from having the whole whorehouse mounted. So we don't need
@@ -1262,7 +1266,7 @@ out:
1262static struct dentry *pipefs_mount(struct file_system_type *fs_type, 1266static struct dentry *pipefs_mount(struct file_system_type *fs_type,
1263 int flags, const char *dev_name, void *data) 1267 int flags, const char *dev_name, void *data)
1264{ 1268{
1265 return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 1269 return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
1266} 1270}
1267 1271
1268static struct file_system_type pipe_fs_type = { 1272static struct file_system_type pipe_fs_type = {
@@ -1288,7 +1292,7 @@ static int __init init_pipe_fs(void)
1288static void __exit exit_pipe_fs(void) 1292static void __exit exit_pipe_fs(void)
1289{ 1293{
1290 unregister_filesystem(&pipe_fs_type); 1294 unregister_filesystem(&pipe_fs_type);
1291 mntput(pipe_mnt); 1295 mntput_long(pipe_mnt);
1292} 1296}
1293 1297
1294fs_initcall(init_pipe_fs); 1298fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748f..d42514e32380 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
288 */ 288 */
289static inline int do_refcount_check(struct vfsmount *mnt, int count) 289static inline int do_refcount_check(struct vfsmount *mnt, int count)
290{ 290{
291 int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; 291 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
292 return (mycount > count); 292 return (mycount > count);
293} 293}
294 294
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
300 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
301 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 * 302 *
303 * vfsmount lock must be held for read or write 303 * vfsmount lock must be held for write
304 */ 304 */
305int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
306{ 306{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 08cba2c3b612..b20962c71a52 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1795,10 +1795,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1795 */ 1795 */
1796static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1796static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1797{ 1797{
1798 struct inode *inode = dentry->d_inode; 1798 struct inode *inode;
1799 struct task_struct *task = get_proc_task(inode); 1799 struct task_struct *task;
1800 const struct cred *cred; 1800 const struct cred *cred;
1801 1801
1802 if (nd && nd->flags & LOOKUP_RCU)
1803 return -ECHILD;
1804
1805 inode = dentry->d_inode;
1806 task = get_proc_task(inode);
1807
1802 if (task) { 1808 if (task) {
1803 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1809 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1804 task_dumpable(task)) { 1810 task_dumpable(task)) {
@@ -1820,7 +1826,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1820 return 0; 1826 return 0;
1821} 1827}
1822 1828
1823static int pid_delete_dentry(struct dentry * dentry) 1829static int pid_delete_dentry(const struct dentry * dentry)
1824{ 1830{
1825 /* Is the task we represent dead? 1831 /* Is the task we represent dead?
1826 * If so, then don't put the dentry on the lru list, 1832 * If so, then don't put the dentry on the lru list,
@@ -1964,12 +1970,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
1964 1970
1965static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1971static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1966{ 1972{
1967 struct inode *inode = dentry->d_inode; 1973 struct inode *inode;
1968 struct task_struct *task = get_proc_task(inode); 1974 struct task_struct *task;
1969 int fd = proc_fd(inode); 1975 int fd;
1970 struct files_struct *files; 1976 struct files_struct *files;
1971 const struct cred *cred; 1977 const struct cred *cred;
1972 1978
1979 if (nd && nd->flags & LOOKUP_RCU)
1980 return -ECHILD;
1981
1982 inode = dentry->d_inode;
1983 task = get_proc_task(inode);
1984 fd = proc_fd(inode);
1985
1973 if (task) { 1986 if (task) {
1974 files = get_files_struct(task); 1987 files = get_files_struct(task);
1975 if (files) { 1988 if (files) {
@@ -2045,7 +2058,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
2045 inode->i_op = &proc_pid_link_inode_operations; 2058 inode->i_op = &proc_pid_link_inode_operations;
2046 inode->i_size = 64; 2059 inode->i_size = 64;
2047 ei->op.proc_get_link = proc_fd_link; 2060 ei->op.proc_get_link = proc_fd_link;
2048 dentry->d_op = &tid_fd_dentry_operations; 2061 d_set_d_op(dentry, &tid_fd_dentry_operations);
2049 d_add(dentry, inode); 2062 d_add(dentry, inode);
2050 /* Close the race of the process dying before we return the dentry */ 2063 /* Close the race of the process dying before we return the dentry */
2051 if (tid_fd_revalidate(dentry, NULL)) 2064 if (tid_fd_revalidate(dentry, NULL))
@@ -2177,11 +2190,13 @@ static const struct file_operations proc_fd_operations = {
2177 * /proc/pid/fd needs a special permission handler so that a process can still 2190 * /proc/pid/fd needs a special permission handler so that a process can still
2178 * access /proc/self/fd after it has executed a setuid(). 2191 * access /proc/self/fd after it has executed a setuid().
2179 */ 2192 */
2180static int proc_fd_permission(struct inode *inode, int mask) 2193static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2181{ 2194{
2182 int rv; 2195 int rv;
2183 2196
2184 rv = generic_permission(inode, mask, NULL); 2197 if (flags & IPERM_FLAG_RCU)
2198 return -ECHILD;
2199 rv = generic_permission(inode, mask, flags, NULL);
2185 if (rv == 0) 2200 if (rv == 0)
2186 return 0; 2201 return 0;
2187 if (task_pid(current) == proc_pid(inode)) 2202 if (task_pid(current) == proc_pid(inode))
@@ -2213,7 +2228,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2213 ei->fd = fd; 2228 ei->fd = fd;
2214 inode->i_mode = S_IFREG | S_IRUSR; 2229 inode->i_mode = S_IFREG | S_IRUSR;
2215 inode->i_fop = &proc_fdinfo_file_operations; 2230 inode->i_fop = &proc_fdinfo_file_operations;
2216 dentry->d_op = &tid_fd_dentry_operations; 2231 d_set_d_op(dentry, &tid_fd_dentry_operations);
2217 d_add(dentry, inode); 2232 d_add(dentry, inode);
2218 /* Close the race of the process dying before we return the dentry */ 2233 /* Close the race of the process dying before we return the dentry */
2219 if (tid_fd_revalidate(dentry, NULL)) 2234 if (tid_fd_revalidate(dentry, NULL))
@@ -2272,7 +2287,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2272 if (p->fop) 2287 if (p->fop)
2273 inode->i_fop = p->fop; 2288 inode->i_fop = p->fop;
2274 ei->op = p->op; 2289 ei->op = p->op;
2275 dentry->d_op = &pid_dentry_operations; 2290 d_set_d_op(dentry, &pid_dentry_operations);
2276 d_add(dentry, inode); 2291 d_add(dentry, inode);
2277 /* Close the race of the process dying before we return the dentry */ 2292 /* Close the race of the process dying before we return the dentry */
2278 if (pid_revalidate(dentry, NULL)) 2293 if (pid_revalidate(dentry, NULL))
@@ -2639,8 +2654,14 @@ static const struct pid_entry proc_base_stuff[] = {
2639 */ 2654 */
2640static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) 2655static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2641{ 2656{
2642 struct inode *inode = dentry->d_inode; 2657 struct inode *inode;
2643 struct task_struct *task = get_proc_task(inode); 2658 struct task_struct *task;
2659
2660 if (nd->flags & LOOKUP_RCU)
2661 return -ECHILD;
2662
2663 inode = dentry->d_inode;
2664 task = get_proc_task(inode);
2644 if (task) { 2665 if (task) {
2645 put_task_struct(task); 2666 put_task_struct(task);
2646 return 1; 2667 return 1;
@@ -2691,7 +2712,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2691 if (p->fop) 2712 if (p->fop)
2692 inode->i_fop = p->fop; 2713 inode->i_fop = p->fop;
2693 ei->op = p->op; 2714 ei->op = p->op;
2694 dentry->d_op = &proc_base_dentry_operations; 2715 d_set_d_op(dentry, &proc_base_dentry_operations);
2695 d_add(dentry, inode); 2716 d_add(dentry, inode);
2696 error = NULL; 2717 error = NULL;
2697out: 2718out:
@@ -3005,7 +3026,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
3005 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 3026 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
3006 ARRAY_SIZE(tgid_base_stuff)); 3027 ARRAY_SIZE(tgid_base_stuff));
3007 3028
3008 dentry->d_op = &pid_dentry_operations; 3029 d_set_d_op(dentry, &pid_dentry_operations);
3009 3030
3010 d_add(dentry, inode); 3031 d_add(dentry, inode);
3011 /* Close the race of the process dying before we return the dentry */ 3032 /* Close the race of the process dying before we return the dentry */
@@ -3248,7 +3269,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3248 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3269 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
3249 ARRAY_SIZE(tid_base_stuff)); 3270 ARRAY_SIZE(tid_base_stuff));
3250 3271
3251 dentry->d_op = &pid_dentry_operations; 3272 d_set_d_op(dentry, &pid_dentry_operations);
3252 3273
3253 d_add(dentry, inode); 3274 d_add(dentry, inode);
3254 /* Close the race of the process dying before we return the dentry */ 3275 /* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f0337661..f766be29d2c7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
400 * smarter: we could keep a "volatile" flag in the 400 * smarter: we could keep a "volatile" flag in the
401 * inode to indicate which ones to keep. 401 * inode to indicate which ones to keep.
402 */ 402 */
403static int proc_delete_dentry(struct dentry * dentry) 403static int proc_delete_dentry(const struct dentry * dentry)
404{ 404{
405 return 1; 405 return 1;
406} 406}
@@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
439out_unlock: 439out_unlock:
440 440
441 if (inode) { 441 if (inode) {
442 dentry->d_op = &proc_dentry_operations; 442 d_set_d_op(dentry, &proc_dentry_operations);
443 d_add(dentry, inode); 443 d_add(dentry, inode);
444 return NULL; 444 return NULL;
445 } 445 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3ddb6068177c..6bcb926b101b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
65 return inode; 65 return inode;
66} 66}
67 67
68static void proc_destroy_inode(struct inode *inode) 68static void proc_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 72 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
71} 73}
72 74
75static void proc_destroy_inode(struct inode *inode)
76{
77 call_rcu(&inode->i_rcu, proc_i_callback);
78}
79
73static void init_once(void *foo) 80static void init_once(void *foo)
74{ 81{
75 struct proc_inode *ei = (struct proc_inode *) foo; 82 struct proc_inode *ei = (struct proc_inode *) foo;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb00906b..09a1f92a34ef 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/proc_fs.h> 6#include <linux/proc_fs.h>
7#include <linux/security.h> 7#include <linux/security.h>
8#include <linux/namei.h>
8#include "internal.h" 9#include "internal.h"
9 10
10static const struct dentry_operations proc_sys_dentry_operations; 11static const struct dentry_operations proc_sys_dentry_operations;
@@ -120,7 +121,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
120 goto out; 121 goto out;
121 122
122 err = NULL; 123 err = NULL;
123 dentry->d_op = &proc_sys_dentry_operations; 124 d_set_d_op(dentry, &proc_sys_dentry_operations);
124 d_add(dentry, inode); 125 d_add(dentry, inode);
125 126
126out: 127out:
@@ -201,7 +202,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
201 dput(child); 202 dput(child);
202 return -ENOMEM; 203 return -ENOMEM;
203 } else { 204 } else {
204 child->d_op = &proc_sys_dentry_operations; 205 d_set_d_op(child, &proc_sys_dentry_operations);
205 d_add(child, inode); 206 d_add(child, inode);
206 } 207 }
207 } else { 208 } else {
@@ -294,7 +295,7 @@ out:
294 return ret; 295 return ret;
295} 296}
296 297
297static int proc_sys_permission(struct inode *inode, int mask) 298static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
298{ 299{
299 /* 300 /*
300 * sysctl entries that are not writeable, 301 * sysctl entries that are not writeable,
@@ -304,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask)
304 struct ctl_table *table; 305 struct ctl_table *table;
305 int error; 306 int error;
306 307
308 if (flags & IPERM_FLAG_RCU)
309 return -ECHILD;
310
307 /* Executable files are not allowed under /proc/sys/ */ 311 /* Executable files are not allowed under /proc/sys/ */
308 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 312 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
309 return -EACCES; 313 return -EACCES;
@@ -389,23 +393,30 @@ static const struct inode_operations proc_sys_dir_operations = {
389 393
390static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 394static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 395{
396 if (nd->flags & LOOKUP_RCU)
397 return -ECHILD;
392 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 398 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
393} 399}
394 400
395static int proc_sys_delete(struct dentry *dentry) 401static int proc_sys_delete(const struct dentry *dentry)
396{ 402{
397 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 403 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
398} 404}
399 405
400static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, 406static int proc_sys_compare(const struct dentry *parent,
401 struct qstr *name) 407 const struct inode *pinode,
408 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name)
402{ 410{
403 struct dentry *dentry = container_of(qstr, struct dentry, d_name); 411 /* Although proc doesn't have negative dentries, rcu-walk means
404 if (qstr->len != name->len) 412 * that inode here can be NULL */
413 if (!inode)
414 return 0;
415 if (name->len != len)
405 return 1; 416 return 1;
406 if (memcmp(qstr->name, name->name, name->len)) 417 if (memcmp(name->name, str, len))
407 return 1; 418 return 1;
408 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); 419 return !sysctl_is_seen(PROC_I(inode)->sysctl);
409} 420}
410 421
411static const struct dentry_operations proc_sys_dentry_operations = { 422static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fcada42f1aa3..e63b4171d583 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
425 return &ei->vfs_inode; 425 return &ei->vfs_inode;
426} 426}
427 427
428static void qnx4_destroy_inode(struct inode *inode) 428static void qnx4_i_callback(struct rcu_head *head)
429{ 429{
430 struct inode *inode = container_of(head, struct inode, i_rcu);
431 INIT_LIST_HEAD(&inode->i_dentry);
430 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 432 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
431} 433}
432 434
435static void qnx4_destroy_inode(struct inode *inode)
436{
437 call_rcu(&inode->i_rcu, qnx4_i_callback);
438}
439
433static void init_once(void *foo) 440static void init_once(void *foo)
434{ 441{
435 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 442 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b243117b8752..2575682a9ead 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
529 return &ei->vfs_inode; 529 return &ei->vfs_inode;
530} 530}
531 531
532static void reiserfs_destroy_inode(struct inode *inode) 532static void reiserfs_i_callback(struct rcu_head *head)
533{ 533{
534 struct inode *inode = container_of(head, struct inode, i_rcu);
535 INIT_LIST_HEAD(&inode->i_dentry);
534 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 536 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
535} 537}
536 538
539static void reiserfs_destroy_inode(struct inode *inode)
540{
541 call_rcu(&inode->i_rcu, reiserfs_i_callback);
542}
543
537static void init_once(void *foo) 544static void init_once(void *foo)
538{ 545{
539 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 546 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a7828e7a..3cfb2e933644 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -870,11 +870,14 @@ out:
870 return err; 870 return err;
871} 871}
872 872
873static int reiserfs_check_acl(struct inode *inode, int mask) 873static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags)
874{ 874{
875 struct posix_acl *acl; 875 struct posix_acl *acl;
876 int error = -EAGAIN; /* do regular unix permission checks by default */ 876 int error = -EAGAIN; /* do regular unix permission checks by default */
877 877
878 if (flags & IPERM_FLAG_RCU)
879 return -ECHILD;
880
878 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 881 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
879 882
880 if (acl) { 883 if (acl) {
@@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s)
951 return 0; 954 return 0;
952} 955}
953 956
954int reiserfs_permission(struct inode *inode, int mask) 957int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
955{ 958{
959 if (flags & IPERM_FLAG_RCU)
960 return -ECHILD;
956 /* 961 /*
957 * We don't do permission checks on the internal objects. 962 * We don't do permission checks on the internal objects.
958 * Permissions are determined by the "owning" object. 963 * Permissions are determined by the "owning" object.
@@ -965,13 +970,16 @@ int reiserfs_permission(struct inode *inode, int mask)
965 * Stat data v1 doesn't support ACLs. 970 * Stat data v1 doesn't support ACLs.
966 */ 971 */
967 if (get_inode_sd_version(inode) != STAT_DATA_V1) 972 if (get_inode_sd_version(inode) != STAT_DATA_V1)
968 return generic_permission(inode, mask, reiserfs_check_acl); 973 return generic_permission(inode, mask, flags,
974 reiserfs_check_acl);
969#endif 975#endif
970 return generic_permission(inode, mask, NULL); 976 return generic_permission(inode, mask, flags, NULL);
971} 977}
972 978
973static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
974{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
975 return -EPERM; 983 return -EPERM;
976} 984}
977 985
@@ -990,7 +998,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
990 strlen(PRIVROOT_NAME)); 998 strlen(PRIVROOT_NAME));
991 if (!IS_ERR(dentry)) { 999 if (!IS_ERR(dentry)) {
992 REISERFS_SB(s)->priv_root = dentry; 1000 REISERFS_SB(s)->priv_root = dentry;
993 dentry->d_op = &xattr_lookup_poison_ops; 1001 d_set_d_op(dentry, &xattr_lookup_poison_ops);
994 if (dentry->d_inode) 1002 if (dentry->d_inode)
995 dentry->d_inode->i_flags |= S_PRIVATE; 1003 dentry->d_inode->i_flags |= S_PRIVATE;
996 } else 1004 } else
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6647f90e55cd..2305e3121cb1 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
400/* 400/*
401 * return a spent inode to the slab cache 401 * return a spent inode to the slab cache
402 */ 402 */
403static void romfs_destroy_inode(struct inode *inode) 403static void romfs_i_callback(struct rcu_head *head)
404{ 404{
405 struct inode *inode = container_of(head, struct inode, i_rcu);
406 INIT_LIST_HEAD(&inode->i_dentry);
405 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
406} 408}
407 409
410static void romfs_destroy_inode(struct inode *inode)
411{
412 call_rcu(&inode->i_rcu, romfs_i_callback);
413}
414
408/* 415/*
409 * get filesystem statistics 416 * get filesystem statistics
410 */ 417 */
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 24de30ba34c1..20700b9f2b4c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
440} 440}
441 441
442 442
443static void squashfs_destroy_inode(struct inode *inode) 443static void squashfs_i_callback(struct rcu_head *head)
444{ 444{
445 struct inode *inode = container_of(head, struct inode, i_rcu);
446 INIT_LIST_HEAD(&inode->i_dentry);
445 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); 447 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
446} 448}
447 449
450static void squashfs_destroy_inode(struct inode *inode)
451{
452 call_rcu(&inode->i_rcu, squashfs_i_callback);
453}
454
448 455
449static struct file_system_type squashfs_fs_type = { 456static struct file_system_type squashfs_fs_type = {
450 .owner = THIS_MODULE, 457 .owner = THIS_MODULE,
diff --git a/fs/super.c b/fs/super.c
index ca696155cd9a..823e061faa87 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -30,6 +30,7 @@
30#include <linux/idr.h> 30#include <linux/idr.h>
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h>
33#include "internal.h" 34#include "internal.h"
34 35
35 36
@@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
71 INIT_LIST_HEAD(&s->s_files); 72 INIT_LIST_HEAD(&s->s_files);
72#endif 73#endif
73 INIT_LIST_HEAD(&s->s_instances); 74 INIT_LIST_HEAD(&s->s_instances);
74 INIT_HLIST_HEAD(&s->s_anon); 75 INIT_HLIST_BL_HEAD(&s->s_anon);
75 INIT_LIST_HEAD(&s->s_inodes); 76 INIT_LIST_HEAD(&s->s_inodes);
76 INIT_LIST_HEAD(&s->s_dentry_lru); 77 INIT_LIST_HEAD(&s->s_dentry_lru);
77 init_rwsem(&s->s_umount); 78 init_rwsem(&s->s_umount);
@@ -1139,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1139 return mnt; 1140 return mnt;
1140 1141
1141 err: 1142 err:
1142 mntput(mnt); 1143 mntput_long(mnt);
1143 return ERR_PTR(err); 1144 return ERR_PTR(err);
1144} 1145}
1145 1146
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac8c4b0..ea9120a830d8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
231 goto repeat; 231 goto repeat;
232} 232}
233 233
234static int sysfs_dentry_delete(struct dentry *dentry) 234static int sysfs_dentry_delete(const struct dentry *dentry)
235{ 235{
236 struct sysfs_dirent *sd = dentry->d_fsdata; 236 struct sysfs_dirent *sd = dentry->d_fsdata;
237 return !!(sd->s_flags & SYSFS_FLAG_REMOVED); 237 return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
@@ -239,9 +239,13 @@ static int sysfs_dentry_delete(struct dentry *dentry)
239 239
240static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) 240static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
241{ 241{
242 struct sysfs_dirent *sd = dentry->d_fsdata; 242 struct sysfs_dirent *sd;
243 int is_dir; 243 int is_dir;
244 244
245 if (nd->flags & LOOKUP_RCU)
246 return -ECHILD;
247
248 sd = dentry->d_fsdata;
245 mutex_lock(&sysfs_mutex); 249 mutex_lock(&sysfs_mutex);
246 250
247 /* The sysfs dirent has been deleted */ 251 /* The sysfs dirent has been deleted */
@@ -701,7 +705,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
701 /* instantiate and hash dentry */ 705 /* instantiate and hash dentry */
702 ret = d_find_alias(inode); 706 ret = d_find_alias(inode);
703 if (!ret) { 707 if (!ret) {
704 dentry->d_op = &sysfs_dentry_ops; 708 d_set_d_op(dentry, &sysfs_dentry_ops);
705 dentry->d_fsdata = sysfs_get(sd); 709 dentry->d_fsdata = sysfs_get(sd);
706 d_add(dentry, inode); 710 d_add(dentry, inode);
707 } else { 711 } else {
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cffb1fd8ba33..30ac27345586 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -348,13 +348,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
348 return -ENOENT; 348 return -ENOENT;
349} 349}
350 350
351int sysfs_permission(struct inode *inode, int mask) 351int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
352{ 352{
353 struct sysfs_dirent *sd = inode->i_private; 353 struct sysfs_dirent *sd;
354
355 if (flags & IPERM_FLAG_RCU)
356 return -ECHILD;
357
358 sd = inode->i_private;
354 359
355 mutex_lock(&sysfs_mutex); 360 mutex_lock(&sysfs_mutex);
356 sysfs_refresh_inode(sd, inode); 361 sysfs_refresh_inode(sd, inode);
357 mutex_unlock(&sysfs_mutex); 362 mutex_unlock(&sysfs_mutex);
358 363
359 return generic_permission(inode, mask, NULL); 364 return generic_permission(inode, mask, flags, NULL);
360} 365}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d9be60a2e956..ffaaa816bfba 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -200,7 +200,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
201void sysfs_evict_inode(struct inode *inode); 201void sysfs_evict_inode(struct inode *inode);
202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
203int sysfs_permission(struct inode *inode, int mask); 203int sysfs_permission(struct inode *inode, int mask, unsigned int flags);
204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d067b9e6..0630eb969a28 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
333 return &si->vfs_inode; 333 return &si->vfs_inode;
334} 334}
335 335
336static void sysv_destroy_inode(struct inode *inode) 336static void sysv_i_callback(struct rcu_head *head)
337{ 337{
338 struct inode *inode = container_of(head, struct inode, i_rcu);
339 INIT_LIST_HEAD(&inode->i_dentry);
338 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 340 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
339} 341}
340 342
343static void sysv_destroy_inode(struct inode *inode)
344{
345 call_rcu(&inode->i_rcu, sysv_i_callback);
346}
347
341static void init_once(void *p) 348static void init_once(void *p)
342{ 349{
343 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 350 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e7f7d11cd0..b5e68da2db32 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
27 return err; 27 return err;
28} 28}
29 29
30static int sysv_hash(struct dentry *dentry, struct qstr *qstr) 30static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
31 struct qstr *qstr)
31{ 32{
32 /* Truncate the name in place, avoids having to define a compare 33 /* Truncate the name in place, avoids having to define a compare
33 function. */ 34 function. */
@@ -47,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
47 struct inode * inode = NULL; 48 struct inode * inode = NULL;
48 ino_t ino; 49 ino_t ino;
49 50
50 dentry->d_op = dir->i_sb->s_root->d_op; 51 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
51 if (dentry->d_name.len > SYSV_NAMELEN) 52 if (dentry->d_name.len > SYSV_NAMELEN)
52 return ERR_PTR(-ENAMETOOLONG); 53 return ERR_PTR(-ENAMETOOLONG);
53 ino = sysv_inode_by_name(dentry); 54 ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3d9c62be0c10..76712aefc4ab 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
346 if (sbi->s_forced_ro) 346 if (sbi->s_forced_ro)
347 sb->s_flags |= MS_RDONLY; 347 sb->s_flags |= MS_RDONLY;
348 if (sbi->s_truncate) 348 if (sbi->s_truncate)
349 sb->s_root->d_op = &sysv_dentry_operations; 349 d_set_d_op(sb->s_root, &sysv_dentry_operations);
350 return 1; 350 return 1;
351} 351}
352 352
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 91fac54c70e3..6e11c2975dcf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
272 return &ui->vfs_inode; 272 return &ui->vfs_inode;
273}; 273};
274 274
275static void ubifs_i_callback(struct rcu_head *head)
276{
277 struct inode *inode = container_of(head, struct inode, i_rcu);
278 struct ubifs_inode *ui = ubifs_inode(inode);
279 INIT_LIST_HEAD(&inode->i_dentry);
280 kmem_cache_free(ubifs_inode_slab, ui);
281}
282
275static void ubifs_destroy_inode(struct inode *inode) 283static void ubifs_destroy_inode(struct inode *inode)
276{ 284{
277 struct ubifs_inode *ui = ubifs_inode(inode); 285 struct ubifs_inode *ui = ubifs_inode(inode);
278 286
279 kfree(ui->data); 287 kfree(ui->data);
280 kmem_cache_free(ubifs_inode_slab, inode); 288 call_rcu(&inode->i_rcu, ubifs_i_callback);
281} 289}
282 290
283/* 291/*
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4a5c7c61836a..b539d53320fb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
139 return &ei->vfs_inode; 139 return &ei->vfs_inode;
140} 140}
141 141
142static void udf_destroy_inode(struct inode *inode) 142static void udf_i_callback(struct rcu_head *head)
143{ 143{
144 struct inode *inode = container_of(head, struct inode, i_rcu);
145 INIT_LIST_HEAD(&inode->i_dentry);
144 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 146 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
145} 147}
146 148
149static void udf_destroy_inode(struct inode *inode)
150{
151 call_rcu(&inode->i_rcu, udf_i_callback);
152}
153
147static void init_once(void *foo) 154static void init_once(void *foo)
148{ 155{
149 struct udf_inode_info *ei = (struct udf_inode_info *)foo; 156 struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c47daed56da..2c61ac5d4e48 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1412 return &ei->vfs_inode; 1412 return &ei->vfs_inode;
1413} 1413}
1414 1414
1415static void ufs_destroy_inode(struct inode *inode) 1415static void ufs_i_callback(struct rcu_head *head)
1416{ 1416{
1417 struct inode *inode = container_of(head, struct inode, i_rcu);
1418 INIT_LIST_HEAD(&inode->i_dentry);
1417 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1419 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1418} 1420}
1419 1421
1422static void ufs_destroy_inode(struct inode *inode)
1423{
1424 call_rcu(&inode->i_rcu, ufs_i_callback);
1425}
1426
1420static void init_once(void *foo) 1427static void init_once(void *foo)
1421{ 1428{
1422 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1429 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b2771862fd3d..39f4f809bb68 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219} 219}
220 220
221int 221int
222xfs_check_acl(struct inode *inode, int mask) 222xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
223{ 223{
224 struct xfs_inode *ip = XFS_I(inode); 224 struct xfs_inode *ip;
225 struct posix_acl *acl; 225 struct posix_acl *acl;
226 int error = -EAGAIN; 226 int error = -EAGAIN;
227 227
228 ip = XFS_I(inode);
228 trace_xfs_check_acl(ip); 229 trace_xfs_check_acl(ip);
229 230
230 /* 231 /*
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask)
234 if (!XFS_IFORK_Q(ip)) 235 if (!XFS_IFORK_Q(ip))
235 return -EAGAIN; 236 return -EAGAIN;
236 237
238 if (flags & IPERM_FLAG_RCU) {
239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
240 return -ECHILD;
241 return -EAGAIN;
242 }
243
237 acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); 244 acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
238 if (IS_ERR(acl)) 245 if (IS_ERR(acl))
239 return PTR_ERR(acl); 246 return PTR_ERR(acl);
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a669d7..11dd72070cbb 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
44#ifdef CONFIG_XFS_POSIX_ACL 44#ifdef CONFIG_XFS_POSIX_ACL
45extern int xfs_check_acl(struct inode *inode, int mask); 45extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8e..d7de5a3f7867 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,6 +91,17 @@ xfs_inode_alloc(
91 return ip; 91 return ip;
92} 92}
93 93
94STATIC void
95xfs_inode_free_callback(
96 struct rcu_head *head)
97{
98 struct inode *inode = container_of(head, struct inode, i_rcu);
99 struct xfs_inode *ip = XFS_I(inode);
100
101 INIT_LIST_HEAD(&inode->i_dentry);
102 kmem_zone_free(xfs_inode_zone, ip);
103}
104
94void 105void
95xfs_inode_free( 106xfs_inode_free(
96 struct xfs_inode *ip) 107 struct xfs_inode *ip)
@@ -134,7 +145,7 @@ xfs_inode_free(
134 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 145 ASSERT(!spin_is_locked(&ip->i_flags_lock));
135 ASSERT(completion_done(&ip->i_flush)); 146 ASSERT(completion_done(&ip->i_flush));
136 147
137 kmem_zone_free(xfs_inode_zone, ip); 148 call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
138} 149}
139 150
140/* 151/*
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 7113a32a86ea..e612575a2596 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -1,6 +1,10 @@
1#ifndef __LINUX_BIT_SPINLOCK_H 1#ifndef __LINUX_BIT_SPINLOCK_H
2#define __LINUX_BIT_SPINLOCK_H 2#define __LINUX_BIT_SPINLOCK_H
3 3
4#include <linux/kernel.h>
5#include <linux/preempt.h>
6#include <asm/atomic.h>
7
4/* 8/*
5 * bit-based spin_lock() 9 * bit-based spin_lock()
6 * 10 *
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index 2e914d0771b9..4ccc59c1ea82 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations;
37/* operations shared over more than one file */ 37/* operations shared over more than one file */
38int coda_open(struct inode *i, struct file *f); 38int coda_open(struct inode *i, struct file *f);
39int coda_release(struct inode *i, struct file *f); 39int coda_release(struct inode *i, struct file *f);
40int coda_permission(struct inode *inode, int mask); 40int coda_permission(struct inode *inode, int mask, unsigned int flags);
41int coda_revalidate_inode(struct dentry *); 41int coda_revalidate_inode(struct dentry *);
42int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); 42int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
43int coda_setattr(struct dentry *, struct iattr *); 43int coda_setattr(struct dentry *, struct iattr *);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 6a4aea30aa09..bd07758943e0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -4,7 +4,9 @@
4#include <asm/atomic.h> 4#include <asm/atomic.h>
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/rculist.h> 6#include <linux/rculist.h>
7#include <linux/rculist_bl.h>
7#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/seqlock.h>
8#include <linux/cache.h> 10#include <linux/cache.h>
9#include <linux/rcupdate.h> 11#include <linux/rcupdate.h>
10 12
@@ -45,6 +47,27 @@ struct dentry_stat_t {
45}; 47};
46extern struct dentry_stat_t dentry_stat; 48extern struct dentry_stat_t dentry_stat;
47 49
50/*
51 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
52 * The strings are both count bytes long, and count is non-zero.
53 */
54static inline int dentry_cmp(const unsigned char *cs, size_t scount,
55 const unsigned char *ct, size_t tcount)
56{
57 int ret;
58 if (scount != tcount)
59 return 1;
60 do {
61 ret = (*cs != *ct);
62 if (ret)
63 break;
64 cs++;
65 ct++;
66 tcount--;
67 } while (tcount);
68 return ret;
69}
70
48/* Name hashing routines. Initial hash value */ 71/* Name hashing routines. Initial hash value */
49/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ 72/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
50#define init_name_hash() 0 73#define init_name_hash() 0
@@ -81,25 +104,33 @@ full_name_hash(const unsigned char *name, unsigned int len)
81 * large memory footprint increase). 104 * large memory footprint increase).
82 */ 105 */
83#ifdef CONFIG_64BIT 106#ifdef CONFIG_64BIT
84#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */ 107# define DNAME_INLINE_LEN 32 /* 192 bytes */
85#else 108#else
86#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */ 109# ifdef CONFIG_SMP
110# define DNAME_INLINE_LEN 36 /* 128 bytes */
111# else
112# define DNAME_INLINE_LEN 40 /* 128 bytes */
113# endif
87#endif 114#endif
88 115
89struct dentry { 116struct dentry {
90 atomic_t d_count; 117 /* RCU lookup touched fields */
91 unsigned int d_flags; /* protected by d_lock */ 118 unsigned int d_flags; /* protected by d_lock */
92 spinlock_t d_lock; /* per dentry lock */ 119 seqcount_t d_seq; /* per dentry seqlock */
93 int d_mounted; 120 struct hlist_bl_node d_hash; /* lookup hash list */
94 struct inode *d_inode; /* Where the name belongs to - NULL is
95 * negative */
96 /*
97 * The next three fields are touched by __d_lookup. Place them here
98 * so they all fit in a cache line.
99 */
100 struct hlist_node d_hash; /* lookup hash list */
101 struct dentry *d_parent; /* parent directory */ 121 struct dentry *d_parent; /* parent directory */
102 struct qstr d_name; 122 struct qstr d_name;
123 struct inode *d_inode; /* Where the name belongs to - NULL is
124 * negative */
125 unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
126
127 /* Ref lookup also touches following */
128 unsigned int d_count; /* protected by d_lock */
129 spinlock_t d_lock; /* per dentry lock */
130 const struct dentry_operations *d_op;
131 struct super_block *d_sb; /* The root of the dentry tree */
132 unsigned long d_time; /* used by d_revalidate */
133 void *d_fsdata; /* fs-specific data */
103 134
104 struct list_head d_lru; /* LRU list */ 135 struct list_head d_lru; /* LRU list */
105 /* 136 /*
@@ -111,12 +142,6 @@ struct dentry {
111 } d_u; 142 } d_u;
112 struct list_head d_subdirs; /* our children */ 143 struct list_head d_subdirs; /* our children */
113 struct list_head d_alias; /* inode alias list */ 144 struct list_head d_alias; /* inode alias list */
114 unsigned long d_time; /* used by d_revalidate */
115 const struct dentry_operations *d_op;
116 struct super_block *d_sb; /* The root of the dentry tree */
117 void *d_fsdata; /* fs-specific data */
118
119 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
120}; 145};
121 146
122/* 147/*
@@ -133,96 +158,61 @@ enum dentry_d_lock_class
133 158
134struct dentry_operations { 159struct dentry_operations {
135 int (*d_revalidate)(struct dentry *, struct nameidata *); 160 int (*d_revalidate)(struct dentry *, struct nameidata *);
136 int (*d_hash) (struct dentry *, struct qstr *); 161 int (*d_hash)(const struct dentry *, const struct inode *,
137 int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); 162 struct qstr *);
138 int (*d_delete)(struct dentry *); 163 int (*d_compare)(const struct dentry *, const struct inode *,
164 const struct dentry *, const struct inode *,
165 unsigned int, const char *, const struct qstr *);
166 int (*d_delete)(const struct dentry *);
139 void (*d_release)(struct dentry *); 167 void (*d_release)(struct dentry *);
140 void (*d_iput)(struct dentry *, struct inode *); 168 void (*d_iput)(struct dentry *, struct inode *);
141 char *(*d_dname)(struct dentry *, char *, int); 169 char *(*d_dname)(struct dentry *, char *, int);
142}; 170} ____cacheline_aligned;
143
144/* the dentry parameter passed to d_hash and d_compare is the parent
145 * directory of the entries to be compared. It is used in case these
146 * functions need any directory specific information for determining
147 * equivalency classes. Using the dentry itself might not work, as it
148 * might be a negative dentry which has no information associated with
149 * it */
150 171
151/* 172/*
152locking rules: 173 * Locking rules for dentry_operations callbacks are to be found in
153 big lock dcache_lock d_lock may block 174 * Documentation/filesystems/Locking. Keep it updated!
154d_revalidate: no no no yes 175 *
155d_hash no no no yes 176 * FUrther descriptions are found in Documentation/filesystems/vfs.txt.
156d_compare: no yes yes no 177 * Keep it updated too!
157d_delete: no yes no no
158d_release: no no no yes
159d_iput: no no no yes
160 */ 178 */
161 179
162/* d_flags entries */ 180/* d_flags entries */
163#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ 181#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */
164#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly 182#define DCACHE_NFSFS_RENAMED 0x0002
165 * renamed" and has to be 183 /* this dentry has been "silly renamed" and has to be deleted on the last
166 * deleted on the last dput() 184 * dput() */
167 */ 185
168#define DCACHE_DISCONNECTED 0x0004 186#define DCACHE_DISCONNECTED 0x0004
169 /* This dentry is possibly not currently connected to the dcache tree, 187 /* This dentry is possibly not currently connected to the dcache tree, in
170 * in which case its parent will either be itself, or will have this 188 * which case its parent will either be itself, or will have this flag as
171 * flag as well. nfsd will not use a dentry with this bit set, but will 189 * well. nfsd will not use a dentry with this bit set, but will first
172 * first endeavour to clear the bit either by discovering that it is 190 * endeavour to clear the bit either by discovering that it is connected,
173 * connected, or by performing lookup operations. Any filesystem which 191 * or by performing lookup operations. Any filesystem which supports
174 * supports nfsd_operations MUST have a lookup function which, if it finds 192 * nfsd_operations MUST have a lookup function which, if it finds a
175 * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move 193 * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
176 * that dentry into place and return that dentry rather than the passed one, 194 * dentry into place and return that dentry rather than the passed one,
177 * typically using d_splice_alias. 195 * typically using d_splice_alias. */
178 */
179 196
180#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ 197#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
181#define DCACHE_UNHASHED 0x0010 198#define DCACHE_UNHASHED 0x0010
182 199#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
183#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ 200 /* Parent inode is watched by inotify */
184 201
185#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ 202#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */
186 203#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080
187#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ 204 /* Parent inode is watched by some fsnotify listener */
188 205
189#define DCACHE_CANT_MOUNT 0x0100 206#define DCACHE_CANT_MOUNT 0x0100
207#define DCACHE_GENOCIDE 0x0200
208#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */
190 209
191extern spinlock_t dcache_lock; 210#define DCACHE_OP_HASH 0x1000
192extern seqlock_t rename_lock; 211#define DCACHE_OP_COMPARE 0x2000
193 212#define DCACHE_OP_REVALIDATE 0x4000
194/** 213#define DCACHE_OP_DELETE 0x8000
195 * d_drop - drop a dentry
196 * @dentry: dentry to drop
197 *
198 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
199 * be found through a VFS lookup any more. Note that this is different from
200 * deleting the dentry - d_delete will try to mark the dentry negative if
201 * possible, giving a successful _negative_ lookup, while d_drop will
202 * just make the cache lookup fail.
203 *
204 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
205 * reason (NFS timeouts or autofs deletes).
206 *
207 * __d_drop requires dentry->d_lock.
208 */
209
210static inline void __d_drop(struct dentry *dentry)
211{
212 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
213 dentry->d_flags |= DCACHE_UNHASHED;
214 hlist_del_rcu(&dentry->d_hash);
215 }
216}
217 214
218static inline void d_drop(struct dentry *dentry) 215extern seqlock_t rename_lock;
219{
220 spin_lock(&dcache_lock);
221 spin_lock(&dentry->d_lock);
222 __d_drop(dentry);
223 spin_unlock(&dentry->d_lock);
224 spin_unlock(&dcache_lock);
225}
226 216
227static inline int dname_external(struct dentry *dentry) 217static inline int dname_external(struct dentry *dentry)
228{ 218{
@@ -235,10 +225,14 @@ static inline int dname_external(struct dentry *dentry)
235extern void d_instantiate(struct dentry *, struct inode *); 225extern void d_instantiate(struct dentry *, struct inode *);
236extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); 226extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
237extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); 227extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
228extern void __d_drop(struct dentry *dentry);
229extern void d_drop(struct dentry *dentry);
238extern void d_delete(struct dentry *); 230extern void d_delete(struct dentry *);
231extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
239 232
240/* allocate/de-allocate */ 233/* allocate/de-allocate */
241extern struct dentry * d_alloc(struct dentry *, const struct qstr *); 234extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
235extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
242extern struct dentry * d_splice_alias(struct inode *, struct dentry *); 236extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
243extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); 237extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
244extern struct dentry * d_obtain_alias(struct inode *); 238extern struct dentry * d_obtain_alias(struct inode *);
@@ -296,14 +290,40 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in
296 return res; 290 return res;
297} 291}
298 292
293extern void dentry_update_name_case(struct dentry *, struct qstr *);
294
299/* used for rename() and baskets */ 295/* used for rename() and baskets */
300extern void d_move(struct dentry *, struct dentry *); 296extern void d_move(struct dentry *, struct dentry *);
301extern struct dentry *d_ancestor(struct dentry *, struct dentry *); 297extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
302 298
303/* appendix may either be NULL or be used for transname suffixes */ 299/* appendix may either be NULL or be used for transname suffixes */
304extern struct dentry * d_lookup(struct dentry *, struct qstr *); 300extern struct dentry *d_lookup(struct dentry *, struct qstr *);
305extern struct dentry * __d_lookup(struct dentry *, struct qstr *); 301extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
306extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *); 302extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
303extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
304 unsigned *seq, struct inode **inode);
305
306/**
307 * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
308 * @dentry: dentry to take a ref on
309 * @seq: seqcount to verify against
310 * @Returns: 0 on failure, else 1.
311 *
312 * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
313 * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
314 */
315static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
316{
317 int ret = 0;
318
319 assert_spin_locked(&dentry->d_lock);
320 if (!read_seqcount_retry(&dentry->d_seq, seq)) {
321 ret = 1;
322 dentry->d_count++;
323 }
324
325 return ret;
326}
307 327
308/* validate "insecure" dentry pointer */ 328/* validate "insecure" dentry pointer */
309extern int d_validate(struct dentry *, struct dentry *); 329extern int d_validate(struct dentry *, struct dentry *);
@@ -316,34 +336,37 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
316extern char *__d_path(const struct path *path, struct path *root, char *, int); 336extern char *__d_path(const struct path *path, struct path *root, char *, int);
317extern char *d_path(const struct path *, char *, int); 337extern char *d_path(const struct path *, char *, int);
318extern char *d_path_with_unreachable(const struct path *, char *, int); 338extern char *d_path_with_unreachable(const struct path *, char *, int);
319extern char *__dentry_path(struct dentry *, char *, int); 339extern char *dentry_path_raw(struct dentry *, char *, int);
320extern char *dentry_path(struct dentry *, char *, int); 340extern char *dentry_path(struct dentry *, char *, int);
321 341
322/* Allocation counts.. */ 342/* Allocation counts.. */
323 343
324/** 344/**
325 * dget, dget_locked - get a reference to a dentry 345 * dget, dget_dlock - get a reference to a dentry
326 * @dentry: dentry to get a reference to 346 * @dentry: dentry to get a reference to
327 * 347 *
328 * Given a dentry or %NULL pointer increment the reference count 348 * Given a dentry or %NULL pointer increment the reference count
329 * if appropriate and return the dentry. A dentry will not be 349 * if appropriate and return the dentry. A dentry will not be
330 * destroyed when it has references. dget() should never be 350 * destroyed when it has references.
331 * called for dentries with zero reference counter. For these cases
332 * (preferably none, functions in dcache.c are sufficient for normal
333 * needs and they take necessary precautions) you should hold dcache_lock
334 * and call dget_locked() instead of dget().
335 */ 351 */
336 352static inline struct dentry *dget_dlock(struct dentry *dentry)
353{
354 if (dentry)
355 dentry->d_count++;
356 return dentry;
357}
358
337static inline struct dentry *dget(struct dentry *dentry) 359static inline struct dentry *dget(struct dentry *dentry)
338{ 360{
339 if (dentry) { 361 if (dentry) {
340 BUG_ON(!atomic_read(&dentry->d_count)); 362 spin_lock(&dentry->d_lock);
341 atomic_inc(&dentry->d_count); 363 dget_dlock(dentry);
364 spin_unlock(&dentry->d_lock);
342 } 365 }
343 return dentry; 366 return dentry;
344} 367}
345 368
346extern struct dentry * dget_locked(struct dentry *); 369extern struct dentry *dget_parent(struct dentry *dentry);
347 370
348/** 371/**
349 * d_unhashed - is dentry hashed 372 * d_unhashed - is dentry hashed
@@ -374,21 +397,11 @@ static inline void dont_mount(struct dentry *dentry)
374 spin_unlock(&dentry->d_lock); 397 spin_unlock(&dentry->d_lock);
375} 398}
376 399
377static inline struct dentry *dget_parent(struct dentry *dentry)
378{
379 struct dentry *ret;
380
381 spin_lock(&dentry->d_lock);
382 ret = dget(dentry->d_parent);
383 spin_unlock(&dentry->d_lock);
384 return ret;
385}
386
387extern void dput(struct dentry *); 400extern void dput(struct dentry *);
388 401
389static inline int d_mountpoint(struct dentry *dentry) 402static inline int d_mountpoint(struct dentry *dentry)
390{ 403{
391 return dentry->d_mounted; 404 return dentry->d_flags & DCACHE_MOUNTED;
392} 405}
393 406
394extern struct vfsmount *lookup_mnt(struct path *); 407extern struct vfsmount *lookup_mnt(struct path *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 090f0eacde29..baf3e556ff0e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -392,6 +392,7 @@ struct inodes_stat_t {
392#include <linux/capability.h> 392#include <linux/capability.h>
393#include <linux/semaphore.h> 393#include <linux/semaphore.h>
394#include <linux/fiemap.h> 394#include <linux/fiemap.h>
395#include <linux/rculist_bl.h>
395 396
396#include <asm/atomic.h> 397#include <asm/atomic.h>
397#include <asm/byteorder.h> 398#include <asm/byteorder.h>
@@ -733,16 +734,31 @@ struct posix_acl;
733#define ACL_NOT_CACHED ((void *)(-1)) 734#define ACL_NOT_CACHED ((void *)(-1))
734 735
735struct inode { 736struct inode {
737 /* RCU path lookup touches following: */
738 umode_t i_mode;
739 uid_t i_uid;
740 gid_t i_gid;
741 const struct inode_operations *i_op;
742 struct super_block *i_sb;
743
744 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
745 unsigned int i_flags;
746 struct mutex i_mutex;
747
748 unsigned long i_state;
749 unsigned long dirtied_when; /* jiffies of first dirtying */
750
736 struct hlist_node i_hash; 751 struct hlist_node i_hash;
737 struct list_head i_wb_list; /* backing dev IO list */ 752 struct list_head i_wb_list; /* backing dev IO list */
738 struct list_head i_lru; /* inode LRU list */ 753 struct list_head i_lru; /* inode LRU list */
739 struct list_head i_sb_list; 754 struct list_head i_sb_list;
740 struct list_head i_dentry; 755 union {
756 struct list_head i_dentry;
757 struct rcu_head i_rcu;
758 };
741 unsigned long i_ino; 759 unsigned long i_ino;
742 atomic_t i_count; 760 atomic_t i_count;
743 unsigned int i_nlink; 761 unsigned int i_nlink;
744 uid_t i_uid;
745 gid_t i_gid;
746 dev_t i_rdev; 762 dev_t i_rdev;
747 unsigned int i_blkbits; 763 unsigned int i_blkbits;
748 u64 i_version; 764 u64 i_version;
@@ -755,13 +771,8 @@ struct inode {
755 struct timespec i_ctime; 771 struct timespec i_ctime;
756 blkcnt_t i_blocks; 772 blkcnt_t i_blocks;
757 unsigned short i_bytes; 773 unsigned short i_bytes;
758 umode_t i_mode;
759 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
760 struct mutex i_mutex;
761 struct rw_semaphore i_alloc_sem; 774 struct rw_semaphore i_alloc_sem;
762 const struct inode_operations *i_op;
763 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 775 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
764 struct super_block *i_sb;
765 struct file_lock *i_flock; 776 struct file_lock *i_flock;
766 struct address_space *i_mapping; 777 struct address_space *i_mapping;
767 struct address_space i_data; 778 struct address_space i_data;
@@ -782,11 +793,6 @@ struct inode {
782 struct hlist_head i_fsnotify_marks; 793 struct hlist_head i_fsnotify_marks;
783#endif 794#endif
784 795
785 unsigned long i_state;
786 unsigned long dirtied_when; /* jiffies of first dirtying */
787
788 unsigned int i_flags;
789
790#ifdef CONFIG_IMA 796#ifdef CONFIG_IMA
791 /* protected by i_lock */ 797 /* protected by i_lock */
792 unsigned int i_readcount; /* struct files open RO */ 798 unsigned int i_readcount; /* struct files open RO */
@@ -1372,13 +1378,13 @@ struct super_block {
1372 const struct xattr_handler **s_xattr; 1378 const struct xattr_handler **s_xattr;
1373 1379
1374 struct list_head s_inodes; /* all inodes */ 1380 struct list_head s_inodes; /* all inodes */
1375 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1381 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
1376#ifdef CONFIG_SMP 1382#ifdef CONFIG_SMP
1377 struct list_head __percpu *s_files; 1383 struct list_head __percpu *s_files;
1378#else 1384#else
1379 struct list_head s_files; 1385 struct list_head s_files;
1380#endif 1386#endif
1381 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1387 /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
1382 struct list_head s_dentry_lru; /* unused dentry lru */ 1388 struct list_head s_dentry_lru; /* unused dentry lru */
1383 int s_nr_dentry_unused; /* # of dentry on lru */ 1389 int s_nr_dentry_unused; /* # of dentry on lru */
1384 1390
@@ -1545,9 +1551,18 @@ struct file_operations {
1545 int (*setlease)(struct file *, long, struct file_lock **); 1551 int (*setlease)(struct file *, long, struct file_lock **);
1546}; 1552};
1547 1553
1554#define IPERM_FLAG_RCU 0x0001
1555
1548struct inode_operations { 1556struct inode_operations {
1549 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
1550 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); 1557 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
1558 void * (*follow_link) (struct dentry *, struct nameidata *);
1559 int (*permission) (struct inode *, int, unsigned int);
1560 int (*check_acl)(struct inode *, int, unsigned int);
1561
1562 int (*readlink) (struct dentry *, char __user *,int);
1563 void (*put_link) (struct dentry *, struct nameidata *, void *);
1564
1565 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
1551 int (*link) (struct dentry *,struct inode *,struct dentry *); 1566 int (*link) (struct dentry *,struct inode *,struct dentry *);
1552 int (*unlink) (struct inode *,struct dentry *); 1567 int (*unlink) (struct inode *,struct dentry *);
1553 int (*symlink) (struct inode *,struct dentry *,const char *); 1568 int (*symlink) (struct inode *,struct dentry *,const char *);
@@ -1556,12 +1571,7 @@ struct inode_operations {
1556 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 1571 int (*mknod) (struct inode *,struct dentry *,int,dev_t);
1557 int (*rename) (struct inode *, struct dentry *, 1572 int (*rename) (struct inode *, struct dentry *,
1558 struct inode *, struct dentry *); 1573 struct inode *, struct dentry *);
1559 int (*readlink) (struct dentry *, char __user *,int);
1560 void * (*follow_link) (struct dentry *, struct nameidata *);
1561 void (*put_link) (struct dentry *, struct nameidata *, void *);
1562 void (*truncate) (struct inode *); 1574 void (*truncate) (struct inode *);
1563 int (*permission) (struct inode *, int);
1564 int (*check_acl)(struct inode *, int);
1565 int (*setattr) (struct dentry *, struct iattr *); 1575 int (*setattr) (struct dentry *, struct iattr *);
1566 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 1576 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1567 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 1577 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1573,7 +1583,7 @@ struct inode_operations {
1573 loff_t len); 1583 loff_t len);
1574 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1584 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1575 u64 len); 1585 u64 len);
1576}; 1586} ____cacheline_aligned;
1577 1587
1578struct seq_file; 1588struct seq_file;
1579 1589
@@ -2158,8 +2168,8 @@ extern sector_t bmap(struct inode *, sector_t);
2158#endif 2168#endif
2159extern int notify_change(struct dentry *, struct iattr *); 2169extern int notify_change(struct dentry *, struct iattr *);
2160extern int inode_permission(struct inode *, int); 2170extern int inode_permission(struct inode *, int);
2161extern int generic_permission(struct inode *, int, 2171extern int generic_permission(struct inode *, int, unsigned int,
2162 int (*check_acl)(struct inode *, int)); 2172 int (*check_acl)(struct inode *, int, unsigned int));
2163 2173
2164static inline bool execute_ok(struct inode *inode) 2174static inline bool execute_ok(struct inode *inode)
2165{ 2175{
@@ -2230,6 +2240,7 @@ extern void iget_failed(struct inode *);
2230extern void end_writeback(struct inode *); 2240extern void end_writeback(struct inode *);
2231extern void __destroy_inode(struct inode *); 2241extern void __destroy_inode(struct inode *);
2232extern struct inode *new_inode(struct super_block *); 2242extern struct inode *new_inode(struct super_block *);
2243extern void free_inode_nonrcu(struct inode *inode);
2233extern int should_remove_suid(struct dentry *); 2244extern int should_remove_suid(struct dentry *);
2234extern int file_remove_suid(struct file *); 2245extern int file_remove_suid(struct file *);
2235 2246
@@ -2446,6 +2457,10 @@ static inline ino_t parent_ino(struct dentry *dentry)
2446{ 2457{
2447 ino_t res; 2458 ino_t res;
2448 2459
2460 /*
2461 * Don't strictly need d_lock here? If the parent ino could change
2462 * then surely we'd have a deeper race in the caller?
2463 */
2449 spin_lock(&dentry->d_lock); 2464 spin_lock(&dentry->d_lock);
2450 res = dentry->d_parent->d_inode->i_ino; 2465 res = dentry->d_parent->d_inode->i_ino;
2451 spin_unlock(&dentry->d_lock); 2466 spin_unlock(&dentry->d_lock);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index a42b5bf02f8b..003dc0fd7347 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -2,10 +2,13 @@
2#define _LINUX_FS_STRUCT_H 2#define _LINUX_FS_STRUCT_H
3 3
4#include <linux/path.h> 4#include <linux/path.h>
5#include <linux/spinlock.h>
6#include <linux/seqlock.h>
5 7
6struct fs_struct { 8struct fs_struct {
7 int users; 9 int users;
8 spinlock_t lock; 10 spinlock_t lock;
11 seqcount_t seq;
9 int umask; 12 int umask;
10 int in_exec; 13 int in_exec;
11 struct path root, pwd; 14 struct path root, pwd;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b10bcdeaef76..2a53f10712b3 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,7 +17,6 @@
17 17
18/* 18/*
19 * fsnotify_d_instantiate - instantiate a dentry for inode 19 * fsnotify_d_instantiate - instantiate a dentry for inode
20 * Called with dcache_lock held.
21 */ 20 */
22static inline void fsnotify_d_instantiate(struct dentry *dentry, 21static inline void fsnotify_d_instantiate(struct dentry *dentry,
23 struct inode *inode) 22 struct inode *inode)
@@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask)
62 61
63/* 62/*
64 * fsnotify_d_move - dentry has been moved 63 * fsnotify_d_move - dentry has been moved
65 * Called with dcache_lock and dentry->d_lock held.
66 */ 64 */
67static inline void fsnotify_d_move(struct dentry *dentry) 65static inline void fsnotify_d_move(struct dentry *dentry)
68{ 66{
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7380763595d3..69ad89b50489 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
329{ 329{
330 struct dentry *parent; 330 struct dentry *parent;
331 331
332 assert_spin_locked(&dcache_lock);
333 assert_spin_locked(&dentry->d_lock); 332 assert_spin_locked(&dentry->d_lock);
334 333
334 /*
335 * Serialisation of setting PARENT_WATCHED on the dentries is provided
336 * by d_lock. If inotify_inode_watched changes after we have taken
337 * d_lock, the following __fsnotify_update_child_dentry_flags call will
338 * find our entry, so it will spin until we complete here, and update
339 * us with the new state.
340 */
335 parent = dentry->d_parent; 341 parent = dentry->d_parent;
336 if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) 342 if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
337 dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; 343 dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
@@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
341 347
342/* 348/*
343 * fsnotify_d_instantiate - instantiate a dentry for inode 349 * fsnotify_d_instantiate - instantiate a dentry for inode
344 * Called with dcache_lock held.
345 */ 350 */
346static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) 351static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
347{ 352{
348 if (!inode) 353 if (!inode)
349 return; 354 return;
350 355
351 assert_spin_locked(&dcache_lock);
352
353 spin_lock(&dentry->d_lock); 356 spin_lock(&dentry->d_lock);
354 __fsnotify_update_dcache_flags(dentry); 357 __fsnotify_update_dcache_flags(dentry);
355 spin_unlock(&dentry->d_lock); 358 spin_unlock(&dentry->d_lock);
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 574bea4013b6..0437e377b555 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler;
10 10
11int generic_acl_init(struct inode *, struct inode *); 11int generic_acl_init(struct inode *, struct inode *);
12int generic_acl_chmod(struct inode *); 12int generic_acl_chmod(struct inode *);
13int generic_check_acl(struct inode *inode, int mask); 13int generic_check_acl(struct inode *inode, int mask, unsigned int flags);
14 14
15#endif /* LINUX_GENERIC_ACL_H */ 15#endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
new file mode 100644
index 000000000000..9ee97e7f2be4
--- /dev/null
+++ b/include/linux/list_bl.h
@@ -0,0 +1,144 @@
1#ifndef _LINUX_LIST_BL_H
2#define _LINUX_LIST_BL_H
3
4#include <linux/list.h>
5
6/*
7 * Special version of lists, where head of the list has a lock in the lowest
8 * bit. This is useful for scalable hash tables without increasing memory
9 * footprint overhead.
10 *
11 * For modification operations, the 0 bit of hlist_bl_head->first
12 * pointer must be set.
13 *
14 * With some small modifications, this can easily be adapted to store several
15 * arbitrary bits (not just a single lock bit), if the need arises to store
16 * some fast and compact auxiliary data.
17 */
18
19#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
20#define LIST_BL_LOCKMASK 1UL
21#else
22#define LIST_BL_LOCKMASK 0UL
23#endif
24
25#ifdef CONFIG_DEBUG_LIST
26#define LIST_BL_BUG_ON(x) BUG_ON(x)
27#else
28#define LIST_BL_BUG_ON(x)
29#endif
30
31
32struct hlist_bl_head {
33 struct hlist_bl_node *first;
34};
35
36struct hlist_bl_node {
37 struct hlist_bl_node *next, **pprev;
38};
39#define INIT_HLIST_BL_HEAD(ptr) \
40 ((ptr)->first = NULL)
41
42static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
43{
44 h->next = NULL;
45 h->pprev = NULL;
46}
47
48#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)
49
50static inline int hlist_bl_unhashed(const struct hlist_bl_node *h)
51{
52 return !h->pprev;
53}
54
55static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
56{
57 return (struct hlist_bl_node *)
58 ((unsigned long)h->first & ~LIST_BL_LOCKMASK);
59}
60
61static inline void hlist_bl_set_first(struct hlist_bl_head *h,
62 struct hlist_bl_node *n)
63{
64 LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
65 LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
66 h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
67}
68
69static inline int hlist_bl_empty(const struct hlist_bl_head *h)
70{
71 return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
72}
73
74static inline void hlist_bl_add_head(struct hlist_bl_node *n,
75 struct hlist_bl_head *h)
76{
77 struct hlist_bl_node *first = hlist_bl_first(h);
78
79 n->next = first;
80 if (first)
81 first->pprev = &n->next;
82 n->pprev = &h->first;
83 hlist_bl_set_first(h, n);
84}
85
86static inline void __hlist_bl_del(struct hlist_bl_node *n)
87{
88 struct hlist_bl_node *next = n->next;
89 struct hlist_bl_node **pprev = n->pprev;
90
91 LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
92
93 /* pprev may be `first`, so be careful not to lose the lock bit */
94 *pprev = (struct hlist_bl_node *)
95 ((unsigned long)next |
96 ((unsigned long)*pprev & LIST_BL_LOCKMASK));
97 if (next)
98 next->pprev = pprev;
99}
100
101static inline void hlist_bl_del(struct hlist_bl_node *n)
102{
103 __hlist_bl_del(n);
104 n->next = LIST_POISON1;
105 n->pprev = LIST_POISON2;
106}
107
108static inline void hlist_bl_del_init(struct hlist_bl_node *n)
109{
110 if (!hlist_bl_unhashed(n)) {
111 __hlist_bl_del(n);
112 INIT_HLIST_BL_NODE(n);
113 }
114}
115
116/**
117 * hlist_bl_for_each_entry - iterate over list of given type
118 * @tpos: the type * to use as a loop cursor.
119 * @pos: the &struct hlist_node to use as a loop cursor.
120 * @head: the head for your list.
121 * @member: the name of the hlist_node within the struct.
122 *
123 */
124#define hlist_bl_for_each_entry(tpos, pos, head, member) \
125 for (pos = hlist_bl_first(head); \
126 pos && \
127 ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
128 pos = pos->next)
129
130/**
131 * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry
132 * @tpos: the type * to use as a loop cursor.
133 * @pos: the &struct hlist_node to use as a loop cursor.
134 * @n: another &struct hlist_node to use as temporary storage
135 * @head: the head for your list.
136 * @member: the name of the hlist_node within the struct.
137 */
138#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member) \
139 for (pos = hlist_bl_first(head); \
140 pos && ({ n = pos->next; 1; }) && \
141 ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
142 pos = n)
143
144#endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5e7a59408dd4..1869ea24a739 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -13,6 +13,7 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/nodemask.h> 14#include <linux/nodemask.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/seqlock.h>
16#include <asm/atomic.h> 17#include <asm/atomic.h>
17 18
18struct super_block; 19struct super_block;
@@ -46,12 +47,24 @@ struct mnt_namespace;
46 47
47#define MNT_INTERNAL 0x4000 48#define MNT_INTERNAL 0x4000
48 49
50struct mnt_pcp {
51 int mnt_count;
52 int mnt_writers;
53};
54
49struct vfsmount { 55struct vfsmount {
50 struct list_head mnt_hash; 56 struct list_head mnt_hash;
51 struct vfsmount *mnt_parent; /* fs we are mounted on */ 57 struct vfsmount *mnt_parent; /* fs we are mounted on */
52 struct dentry *mnt_mountpoint; /* dentry of mountpoint */ 58 struct dentry *mnt_mountpoint; /* dentry of mountpoint */
53 struct dentry *mnt_root; /* root of the mounted tree */ 59 struct dentry *mnt_root; /* root of the mounted tree */
54 struct super_block *mnt_sb; /* pointer to superblock */ 60 struct super_block *mnt_sb; /* pointer to superblock */
61#ifdef CONFIG_SMP
62 struct mnt_pcp __percpu *mnt_pcp;
63 atomic_t mnt_longrefs;
64#else
65 int mnt_count;
66 int mnt_writers;
67#endif
55 struct list_head mnt_mounts; /* list of children, anchored here */ 68 struct list_head mnt_mounts; /* list of children, anchored here */
56 struct list_head mnt_child; /* and going through their mnt_child */ 69 struct list_head mnt_child; /* and going through their mnt_child */
57 int mnt_flags; 70 int mnt_flags;
@@ -70,57 +83,25 @@ struct vfsmount {
70 struct mnt_namespace *mnt_ns; /* containing namespace */ 83 struct mnt_namespace *mnt_ns; /* containing namespace */
71 int mnt_id; /* mount identifier */ 84 int mnt_id; /* mount identifier */
72 int mnt_group_id; /* peer group identifier */ 85 int mnt_group_id; /* peer group identifier */
73 /*
74 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
75 * to let these frequently modified fields in a separate cache line
76 * (so that reads of mnt_flags wont ping-pong on SMP machines)
77 */
78 atomic_t mnt_count;
79 int mnt_expiry_mark; /* true if marked for expiry */ 86 int mnt_expiry_mark; /* true if marked for expiry */
80 int mnt_pinned; 87 int mnt_pinned;
81 int mnt_ghosts; 88 int mnt_ghosts;
82#ifdef CONFIG_SMP
83 int __percpu *mnt_writers;
84#else
85 int mnt_writers;
86#endif
87}; 89};
88 90
89static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
90{
91#ifdef CONFIG_SMP
92 return mnt->mnt_writers;
93#else
94 return &mnt->mnt_writers;
95#endif
96}
97
98static inline struct vfsmount *mntget(struct vfsmount *mnt)
99{
100 if (mnt)
101 atomic_inc(&mnt->mnt_count);
102 return mnt;
103}
104
105struct file; /* forward dec */ 91struct file; /* forward dec */
106 92
107extern int mnt_want_write(struct vfsmount *mnt); 93extern int mnt_want_write(struct vfsmount *mnt);
108extern int mnt_want_write_file(struct file *file); 94extern int mnt_want_write_file(struct file *file);
109extern int mnt_clone_write(struct vfsmount *mnt); 95extern int mnt_clone_write(struct vfsmount *mnt);
110extern void mnt_drop_write(struct vfsmount *mnt); 96extern void mnt_drop_write(struct vfsmount *mnt);
111extern void mntput_no_expire(struct vfsmount *mnt); 97extern void mntput(struct vfsmount *mnt);
98extern struct vfsmount *mntget(struct vfsmount *mnt);
99extern void mntput_long(struct vfsmount *mnt);
100extern struct vfsmount *mntget_long(struct vfsmount *mnt);
112extern void mnt_pin(struct vfsmount *mnt); 101extern void mnt_pin(struct vfsmount *mnt);
113extern void mnt_unpin(struct vfsmount *mnt); 102extern void mnt_unpin(struct vfsmount *mnt);
114extern int __mnt_is_readonly(struct vfsmount *mnt); 103extern int __mnt_is_readonly(struct vfsmount *mnt);
115 104
116static inline void mntput(struct vfsmount *mnt)
117{
118 if (mnt) {
119 mnt->mnt_expiry_mark = 0;
120 mntput_no_expire(mnt);
121 }
122}
123
124extern struct vfsmount *do_kern_mount(const char *fstype, int flags, 105extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
125 const char *name, void *data); 106 const char *name, void *data);
126 107
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d93642..18d06add0a40 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,10 @@ struct nameidata {
19 struct path path; 19 struct path path;
20 struct qstr last; 20 struct qstr last;
21 struct path root; 21 struct path root;
22 struct file *file;
23 struct inode *inode; /* path.dentry.d_inode */
22 unsigned int flags; 24 unsigned int flags;
25 unsigned seq;
23 int last_type; 26 int last_type;
24 unsigned depth; 27 unsigned depth;
25 char *saved_names[MAX_NESTED_LINKS + 1]; 28 char *saved_names[MAX_NESTED_LINKS + 1];
@@ -41,14 +44,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
41 * - require a directory 44 * - require a directory
42 * - ending slashes ok even for nonexistent files 45 * - ending slashes ok even for nonexistent files
43 * - internal "there are more path components" flag 46 * - internal "there are more path components" flag
44 * - locked when lookup done with dcache_lock held
45 * - dentry cache is untrusted; force a real lookup 47 * - dentry cache is untrusted; force a real lookup
46 */ 48 */
47#define LOOKUP_FOLLOW 1 49#define LOOKUP_FOLLOW 0x0001
48#define LOOKUP_DIRECTORY 2 50#define LOOKUP_DIRECTORY 0x0002
49#define LOOKUP_CONTINUE 4 51#define LOOKUP_CONTINUE 0x0004
50#define LOOKUP_PARENT 16 52
51#define LOOKUP_REVAL 64 53#define LOOKUP_PARENT 0x0010
54#define LOOKUP_REVAL 0x0020
55#define LOOKUP_RCU 0x0040
52/* 56/*
53 * Intent data 57 * Intent data
54 */ 58 */
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index ef663061d5ac..1c27f201c856 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -184,13 +184,13 @@ struct ncp_entry_info {
184 __u8 file_handle[6]; 184 __u8 file_handle[6];
185}; 185};
186 186
187static inline struct ncp_server *NCP_SBP(struct super_block *sb) 187static inline struct ncp_server *NCP_SBP(const struct super_block *sb)
188{ 188{
189 return sb->s_fs_info; 189 return sb->s_fs_info;
190} 190}
191 191
192#define NCP_SERVER(inode) NCP_SBP((inode)->i_sb) 192#define NCP_SERVER(inode) NCP_SBP((inode)->i_sb)
193static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode) 193static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode)
194{ 194{
195 return container_of(inode, struct ncp_inode_info, vfs_inode); 195 return container_of(inode, struct ncp_inode_info, vfs_inode);
196} 196}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 29d504d5d1c3..0779bb8f95be 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -351,7 +351,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
351extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); 351extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
352extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); 352extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
353extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 353extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
354extern int nfs_permission(struct inode *, int); 354extern int nfs_permission(struct inode *, int, unsigned int);
355extern int nfs_open(struct inode *, struct file *); 355extern int nfs_open(struct inode *, struct file *);
356extern int nfs_release(struct inode *, struct file *); 356extern int nfs_release(struct inode *, struct file *);
357extern int nfs_attribute_timeout(struct inode *inode); 357extern int nfs_attribute_timeout(struct inode *inode);
diff --git a/include/linux/path.h b/include/linux/path.h
index edc98dec6266..a581e8c06533 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -10,7 +10,9 @@ struct path {
10}; 10};
11 11
12extern void path_get(struct path *); 12extern void path_get(struct path *);
13extern void path_get_long(struct path *);
13extern void path_put(struct path *); 14extern void path_put(struct path *);
15extern void path_put_long(struct path *);
14 16
15static inline int path_equal(const struct path *path1, const struct path *path2) 17static inline int path_equal(const struct path *path1, const struct path *path2)
16{ 18{
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 67608161df6b..d68283a898bb 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -108,6 +108,25 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
108 return acl; 108 return acl;
109} 109}
110 110
111static inline int negative_cached_acl(struct inode *inode, int type)
112{
113 struct posix_acl **p, *acl;
114 switch (type) {
115 case ACL_TYPE_ACCESS:
116 p = &inode->i_acl;
117 break;
118 case ACL_TYPE_DEFAULT:
119 p = &inode->i_default_acl;
120 break;
121 default:
122 BUG();
123 }
124 acl = ACCESS_ONCE(*p);
125 if (acl)
126 return 0;
127 return 1;
128}
129
111static inline void set_cached_acl(struct inode *inode, 130static inline void set_cached_acl(struct inode *inode,
112 int type, 131 int type,
113 struct posix_acl *acl) 132 struct posix_acl *acl)
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
new file mode 100644
index 000000000000..b872b493724d
--- /dev/null
+++ b/include/linux/rculist_bl.h
@@ -0,0 +1,127 @@
1#ifndef _LINUX_RCULIST_BL_H
2#define _LINUX_RCULIST_BL_H
3
4/*
5 * RCU-protected bl list version. See include/linux/list_bl.h.
6 */
7#include <linux/list_bl.h>
8#include <linux/rcupdate.h>
9
10static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
11 struct hlist_bl_node *n)
12{
13 LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
14 LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
15 rcu_assign_pointer(h->first,
16 (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
17}
18
19static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
20{
21 return (struct hlist_bl_node *)
22 ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
23}
24
25/**
26 * hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization
27 * @n: the element to delete from the hash list.
28 *
29 * Note: hlist_bl_unhashed() on the node returns true after this. It is
30 * useful for RCU based read lockfree traversal if the writer side
31 * must know if the list entry is still hashed or already unhashed.
32 *
33 * In particular, it means that we can not poison the forward pointers
34 * that may still be used for walking the hash list and we can only
35 * zero the pprev pointer so list_unhashed() will return true after
36 * this.
37 *
38 * The caller must take whatever precautions are necessary (such as
39 * holding appropriate locks) to avoid racing with another
40 * list-mutation primitive, such as hlist_bl_add_head_rcu() or
41 * hlist_bl_del_rcu(), running on this same list. However, it is
42 * perfectly legal to run concurrently with the _rcu list-traversal
43 * primitives, such as hlist_bl_for_each_entry_rcu().
44 */
45static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n)
46{
47 if (!hlist_bl_unhashed(n)) {
48 __hlist_bl_del(n);
49 n->pprev = NULL;
50 }
51}
52
53/**
54 * hlist_bl_del_rcu - deletes entry from hash list without re-initialization
55 * @n: the element to delete from the hash list.
56 *
57 * Note: hlist_bl_unhashed() on entry does not return true after this,
58 * the entry is in an undefined state. It is useful for RCU based
59 * lockfree traversal.
60 *
61 * In particular, it means that we can not poison the forward
62 * pointers that may still be used for walking the hash list.
63 *
64 * The caller must take whatever precautions are necessary
65 * (such as holding appropriate locks) to avoid racing
66 * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
67 * or hlist_bl_del_rcu(), running on this same list.
68 * However, it is perfectly legal to run concurrently with
69 * the _rcu list-traversal primitives, such as
70 * hlist_bl_for_each_entry().
71 */
72static inline void hlist_bl_del_rcu(struct hlist_bl_node *n)
73{
74 __hlist_bl_del(n);
75 n->pprev = LIST_POISON2;
76}
77
78/**
79 * hlist_bl_add_head_rcu
80 * @n: the element to add to the hash list.
81 * @h: the list to add to.
82 *
83 * Description:
84 * Adds the specified element to the specified hlist_bl,
85 * while permitting racing traversals.
86 *
87 * The caller must take whatever precautions are necessary
88 * (such as holding appropriate locks) to avoid racing
89 * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
90 * or hlist_bl_del_rcu(), running on this same list.
91 * However, it is perfectly legal to run concurrently with
92 * the _rcu list-traversal primitives, such as
93 * hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency
94 * problems on Alpha CPUs. Regardless of the type of CPU, the
95 * list-traversal primitive must be guarded by rcu_read_lock().
96 */
97static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
98 struct hlist_bl_head *h)
99{
100 struct hlist_bl_node *first;
101
102 /* don't need hlist_bl_first_rcu because we're under lock */
103 first = hlist_bl_first(h);
104
105 n->next = first;
106 if (first)
107 first->pprev = &n->next;
108 n->pprev = &h->first;
109
110 /* need _rcu because we can have concurrent lock free readers */
111 hlist_bl_set_first_rcu(h, n);
112}
113/**
114 * hlist_bl_for_each_entry_rcu - iterate over rcu list of given type
115 * @tpos: the type * to use as a loop cursor.
116 * @pos: the &struct hlist_bl_node to use as a loop cursor.
117 * @head: the head for your list.
118 * @member: the name of the hlist_bl_node within the struct.
119 *
120 */
121#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \
122 for (pos = hlist_bl_first_rcu(head); \
123 pos && \
124 ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
125 pos = rcu_dereference_raw(pos->next))
126
127#endif
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index b2cf2089769b..3b94c91f20a6 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -41,7 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
41int reiserfs_lookup_privroot(struct super_block *sb); 41int reiserfs_lookup_privroot(struct super_block *sb);
42int reiserfs_delete_xattrs(struct inode *inode); 42int reiserfs_delete_xattrs(struct inode *inode);
43int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); 43int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
44int reiserfs_permission(struct inode *inode, int mask); 44int reiserfs_permission(struct inode *inode, int mask, unsigned int flags);
45 45
46#ifdef CONFIG_REISERFS_FS_XATTR 46#ifdef CONFIG_REISERFS_FS_XATTR
47#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) 47#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
diff --git a/include/linux/security.h b/include/linux/security.h
index d47a4c24b3e4..1ac42475ea08 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -457,7 +457,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
457 * called when the actual read/write operations are performed. 457 * called when the actual read/write operations are performed.
458 * @inode contains the inode structure to check. 458 * @inode contains the inode structure to check.
459 * @mask contains the permission mask. 459 * @mask contains the permission mask.
460 * @nd contains the nameidata (may be NULL).
461 * Return 0 if permission is granted. 460 * Return 0 if permission is granted.
462 * @inode_setattr: 461 * @inode_setattr:
463 * Check permission before setting file attributes. Note that the kernel 462 * Check permission before setting file attributes. Note that the kernel
@@ -1713,6 +1712,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
1713int security_inode_readlink(struct dentry *dentry); 1712int security_inode_readlink(struct dentry *dentry);
1714int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); 1713int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
1715int security_inode_permission(struct inode *inode, int mask); 1714int security_inode_permission(struct inode *inode, int mask);
1715int security_inode_exec_permission(struct inode *inode, unsigned int flags);
1716int security_inode_setattr(struct dentry *dentry, struct iattr *attr); 1716int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
1717int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); 1717int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
1718int security_inode_setxattr(struct dentry *dentry, const char *name, 1718int security_inode_setxattr(struct dentry *dentry, const char *name,
@@ -2102,6 +2102,12 @@ static inline int security_inode_permission(struct inode *inode, int mask)
2102 return 0; 2102 return 0;
2103} 2103}
2104 2104
2105static inline int security_inode_exec_permission(struct inode *inode,
2106 unsigned int flags)
2107{
2108 return 0;
2109}
2110
2105static inline int security_inode_setattr(struct dentry *dentry, 2111static inline int security_inode_setattr(struct dentry *dentry,
2106 struct iattr *attr) 2112 struct iattr *attr)
2107{ 2113{
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 632205ccc25d..e98cd2e57194 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -107,7 +107,7 @@ static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
107{ 107{
108 smp_rmb(); 108 smp_rmb();
109 109
110 return (sl->sequence != start); 110 return unlikely(sl->sequence != start);
111} 111}
112 112
113 113
@@ -125,14 +125,25 @@ typedef struct seqcount {
125#define SEQCNT_ZERO { 0 } 125#define SEQCNT_ZERO { 0 }
126#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) 126#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
127 127
128/* Start of read using pointer to a sequence counter only. */ 128/**
129static inline unsigned read_seqcount_begin(const seqcount_t *s) 129 * __read_seqcount_begin - begin a seq-read critical section (without barrier)
130 * @s: pointer to seqcount_t
131 * Returns: count to be passed to read_seqcount_retry
132 *
133 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
134 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
135 * provided before actually loading any of the variables that are to be
136 * protected in this critical section.
137 *
138 * Use carefully, only in critical code, and comment how the barrier is
139 * provided.
140 */
141static inline unsigned __read_seqcount_begin(const seqcount_t *s)
130{ 142{
131 unsigned ret; 143 unsigned ret;
132 144
133repeat: 145repeat:
134 ret = s->sequence; 146 ret = s->sequence;
135 smp_rmb();
136 if (unlikely(ret & 1)) { 147 if (unlikely(ret & 1)) {
137 cpu_relax(); 148 cpu_relax();
138 goto repeat; 149 goto repeat;
@@ -140,14 +151,56 @@ repeat:
140 return ret; 151 return ret;
141} 152}
142 153
143/* 154/**
144 * Test if reader processed invalid data because sequence number has changed. 155 * read_seqcount_begin - begin a seq-read critical section
156 * @s: pointer to seqcount_t
157 * Returns: count to be passed to read_seqcount_retry
158 *
159 * read_seqcount_begin opens a read critical section of the given seqcount.
160 * Validity of the critical section is tested by checking read_seqcount_retry
161 * function.
162 */
163static inline unsigned read_seqcount_begin(const seqcount_t *s)
164{
165 unsigned ret = __read_seqcount_begin(s);
166 smp_rmb();
167 return ret;
168}
169
170/**
171 * __read_seqcount_retry - end a seq-read critical section (without barrier)
172 * @s: pointer to seqcount_t
173 * @start: count, from read_seqcount_begin
174 * Returns: 1 if retry is required, else 0
175 *
176 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
177 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
178 * provided before actually loading any of the variables that are to be
179 * protected in this critical section.
180 *
181 * Use carefully, only in critical code, and comment how the barrier is
182 * provided.
183 */
184static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
185{
186 return unlikely(s->sequence != start);
187}
188
189/**
190 * read_seqcount_retry - end a seq-read critical section
191 * @s: pointer to seqcount_t
192 * @start: count, from read_seqcount_begin
193 * Returns: 1 if retry is required, else 0
194 *
195 * read_seqcount_retry closes a read critical section of the given seqcount.
196 * If the critical section was invalid, it must be ignored (and typically
197 * retried).
145 */ 198 */
146static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 199static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
147{ 200{
148 smp_rmb(); 201 smp_rmb();
149 202
150 return s->sequence != start; 203 return __read_seqcount_retry(s, start);
151} 204}
152 205
153 206
@@ -167,6 +220,19 @@ static inline void write_seqcount_end(seqcount_t *s)
167 s->sequence++; 220 s->sequence++;
168} 221}
169 222
223/**
224 * write_seqcount_barrier - invalidate in-progress read-side seq operations
225 * @s: pointer to seqcount_t
226 *
227 * After write_seqcount_barrier, no read-side seq operations will complete
228 * successfully and see data older than this.
229 */
230static inline void write_seqcount_barrier(seqcount_t *s)
231{
232 smp_wmb();
233 s->sequence+=2;
234}
235
170/* 236/*
171 * Possible sw/hw IRQ protected versions of the interfaces. 237 * Possible sw/hw IRQ protected versions of the interfaces.
172 */ 238 */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 59260e21bdf5..fa9086647eb7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -106,8 +106,6 @@ int kmem_cache_shrink(struct kmem_cache *);
106void kmem_cache_free(struct kmem_cache *, void *); 106void kmem_cache_free(struct kmem_cache *, void *);
107unsigned int kmem_cache_size(struct kmem_cache *); 107unsigned int kmem_cache_size(struct kmem_cache *);
108const char *kmem_cache_name(struct kmem_cache *); 108const char *kmem_cache_name(struct kmem_cache *);
109int kern_ptr_validate(const void *ptr, unsigned long size);
110int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
111 109
112/* 110/*
113 * Please use this macro to create slab caches. Simply specify the 111 * Please use this macro to create slab caches. Simply specify the
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 035f4399edbc..14fb6d67e6a3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -237,11 +237,18 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
237 return &ei->vfs_inode; 237 return &ei->vfs_inode;
238} 238}
239 239
240static void mqueue_destroy_inode(struct inode *inode) 240static void mqueue_i_callback(struct rcu_head *head)
241{ 241{
242 struct inode *inode = container_of(head, struct inode, i_rcu);
243 INIT_LIST_HEAD(&inode->i_dentry);
242 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); 244 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
243} 245}
244 246
247static void mqueue_destroy_inode(struct inode *inode)
248{
249 call_rcu(&inode->i_rcu, mqueue_i_callback);
250}
251
245static void mqueue_evict_inode(struct inode *inode) 252static void mqueue_evict_inode(struct inode *inode)
246{ 253{
247 struct mqueue_inode_info *info; 254 struct mqueue_inode_info *info;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 66a416b42c18..51cddc11cd85 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -763,6 +763,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
763 * -> cgroup_mkdir. 763 * -> cgroup_mkdir.
764 */ 764 */
765 765
766static struct dentry *cgroup_lookup(struct inode *dir,
767 struct dentry *dentry, struct nameidata *nd);
766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); 768static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
767static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); 769static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
768static int cgroup_populate_dir(struct cgroup *cgrp); 770static int cgroup_populate_dir(struct cgroup *cgrp);
@@ -874,25 +876,29 @@ static void cgroup_clear_directory(struct dentry *dentry)
874 struct list_head *node; 876 struct list_head *node;
875 877
876 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 878 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
877 spin_lock(&dcache_lock); 879 spin_lock(&dentry->d_lock);
878 node = dentry->d_subdirs.next; 880 node = dentry->d_subdirs.next;
879 while (node != &dentry->d_subdirs) { 881 while (node != &dentry->d_subdirs) {
880 struct dentry *d = list_entry(node, struct dentry, d_u.d_child); 882 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
883
884 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
881 list_del_init(node); 885 list_del_init(node);
882 if (d->d_inode) { 886 if (d->d_inode) {
883 /* This should never be called on a cgroup 887 /* This should never be called on a cgroup
884 * directory with child cgroups */ 888 * directory with child cgroups */
885 BUG_ON(d->d_inode->i_mode & S_IFDIR); 889 BUG_ON(d->d_inode->i_mode & S_IFDIR);
886 d = dget_locked(d); 890 dget_dlock(d);
887 spin_unlock(&dcache_lock); 891 spin_unlock(&d->d_lock);
892 spin_unlock(&dentry->d_lock);
888 d_delete(d); 893 d_delete(d);
889 simple_unlink(dentry->d_inode, d); 894 simple_unlink(dentry->d_inode, d);
890 dput(d); 895 dput(d);
891 spin_lock(&dcache_lock); 896 spin_lock(&dentry->d_lock);
892 } 897 } else
898 spin_unlock(&d->d_lock);
893 node = dentry->d_subdirs.next; 899 node = dentry->d_subdirs.next;
894 } 900 }
895 spin_unlock(&dcache_lock); 901 spin_unlock(&dentry->d_lock);
896} 902}
897 903
898/* 904/*
@@ -900,11 +906,16 @@ static void cgroup_clear_directory(struct dentry *dentry)
900 */ 906 */
901static void cgroup_d_remove_dir(struct dentry *dentry) 907static void cgroup_d_remove_dir(struct dentry *dentry)
902{ 908{
909 struct dentry *parent;
910
903 cgroup_clear_directory(dentry); 911 cgroup_clear_directory(dentry);
904 912
905 spin_lock(&dcache_lock); 913 parent = dentry->d_parent;
914 spin_lock(&parent->d_lock);
915 spin_lock(&dentry->d_lock);
906 list_del_init(&dentry->d_u.d_child); 916 list_del_init(&dentry->d_u.d_child);
907 spin_unlock(&dcache_lock); 917 spin_unlock(&dentry->d_lock);
918 spin_unlock(&parent->d_lock);
908 remove_dir(dentry); 919 remove_dir(dentry);
909} 920}
910 921
@@ -2180,7 +2191,7 @@ static const struct file_operations cgroup_file_operations = {
2180}; 2191};
2181 2192
2182static const struct inode_operations cgroup_dir_inode_operations = { 2193static const struct inode_operations cgroup_dir_inode_operations = {
2183 .lookup = simple_lookup, 2194 .lookup = cgroup_lookup,
2184 .mkdir = cgroup_mkdir, 2195 .mkdir = cgroup_mkdir,
2185 .rmdir = cgroup_rmdir, 2196 .rmdir = cgroup_rmdir,
2186 .rename = cgroup_rename, 2197 .rename = cgroup_rename,
@@ -2196,13 +2207,29 @@ static inline struct cftype *__file_cft(struct file *file)
2196 return __d_cft(file->f_dentry); 2207 return __d_cft(file->f_dentry);
2197} 2208}
2198 2209
2199static int cgroup_create_file(struct dentry *dentry, mode_t mode, 2210static int cgroup_delete_dentry(const struct dentry *dentry)
2200 struct super_block *sb) 2211{
2212 return 1;
2213}
2214
2215static struct dentry *cgroup_lookup(struct inode *dir,
2216 struct dentry *dentry, struct nameidata *nd)
2201{ 2217{
2202 static const struct dentry_operations cgroup_dops = { 2218 static const struct dentry_operations cgroup_dentry_operations = {
2219 .d_delete = cgroup_delete_dentry,
2203 .d_iput = cgroup_diput, 2220 .d_iput = cgroup_diput,
2204 }; 2221 };
2205 2222
2223 if (dentry->d_name.len > NAME_MAX)
2224 return ERR_PTR(-ENAMETOOLONG);
2225 d_set_d_op(dentry, &cgroup_dentry_operations);
2226 d_add(dentry, NULL);
2227 return NULL;
2228}
2229
2230static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2231 struct super_block *sb)
2232{
2206 struct inode *inode; 2233 struct inode *inode;
2207 2234
2208 if (!dentry) 2235 if (!dentry)
@@ -2228,7 +2255,6 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2228 inode->i_size = 0; 2255 inode->i_size = 0;
2229 inode->i_fop = &cgroup_file_operations; 2256 inode->i_fop = &cgroup_file_operations;
2230 } 2257 }
2231 dentry->d_op = &cgroup_dops;
2232 d_instantiate(dentry, inode); 2258 d_instantiate(dentry, inode);
2233 dget(dentry); /* Extra count - pin the dentry in core */ 2259 dget(dentry); /* Extra count - pin the dentry in core */
2234 return 0; 2260 return 0;
@@ -3638,9 +3664,7 @@ again:
3638 list_del(&cgrp->sibling); 3664 list_del(&cgrp->sibling);
3639 cgroup_unlock_hierarchy(cgrp->root); 3665 cgroup_unlock_hierarchy(cgrp->root);
3640 3666
3641 spin_lock(&cgrp->dentry->d_lock);
3642 d = dget(cgrp->dentry); 3667 d = dget(cgrp->dentry);
3643 spin_unlock(&d->d_lock);
3644 3668
3645 cgroup_d_remove_dir(d); 3669 cgroup_d_remove_dir(d);
3646 dput(d); 3670 dput(d);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6b9aee20f242..ca389394fa2a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -102,9 +102,6 @@
102 * ->inode_lock (zap_pte_range->set_page_dirty) 102 * ->inode_lock (zap_pte_range->set_page_dirty)
103 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 103 * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
104 * 104 *
105 * ->task->proc_lock
106 * ->dcache_lock (proc_pid_lookup)
107 *
108 * (code doesn't rely on that order, so you could switch it around) 105 * (code doesn't rely on that order, so you could switch it around)
109 * ->tasklist_lock (memory_failure, collect_procs_ao) 106 * ->tasklist_lock (memory_failure, collect_procs_ao)
110 * ->i_mmap_lock 107 * ->i_mmap_lock
diff --git a/mm/shmem.c b/mm/shmem.c
index 47fdeeb9d636..5ee67c990602 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
2415 return &p->vfs_inode; 2415 return &p->vfs_inode;
2416} 2416}
2417 2417
2418static void shmem_i_callback(struct rcu_head *head)
2419{
2420 struct inode *inode = container_of(head, struct inode, i_rcu);
2421 INIT_LIST_HEAD(&inode->i_dentry);
2422 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2423}
2424
2418static void shmem_destroy_inode(struct inode *inode) 2425static void shmem_destroy_inode(struct inode *inode)
2419{ 2426{
2420 if ((inode->i_mode & S_IFMT) == S_IFREG) { 2427 if ((inode->i_mode & S_IFMT) == S_IFREG) {
2421 /* only struct inode is valid if it's an inline symlink */ 2428 /* only struct inode is valid if it's an inline symlink */
2422 mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2429 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2423 } 2430 }
2424 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 2431 call_rcu(&inode->i_rcu, shmem_i_callback);
2425} 2432}
2426 2433
2427static void init_once(void *foo) 2434static void init_once(void *foo)
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab3..6107f2380e08 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2781,7 +2781,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2781/* 2781/*
2782 * Map pages beginning at addr to the given cache and slab. This is required 2782 * Map pages beginning at addr to the given cache and slab. This is required
2783 * for the slab allocator to be able to lookup the cache and slab of a 2783 * for the slab allocator to be able to lookup the cache and slab of a
2784 * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. 2784 * virtual address for kfree, ksize, and slab debugging.
2785 */ 2785 */
2786static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, 2786static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2787 void *addr) 2787 void *addr)
@@ -3660,36 +3660,6 @@ void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3660EXPORT_SYMBOL(kmem_cache_alloc_notrace); 3660EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3661#endif 3661#endif
3662 3662
3663/**
3664 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
3665 * @cachep: the cache we're checking against
3666 * @ptr: pointer to validate
3667 *
3668 * This verifies that the untrusted pointer looks sane;
3669 * it is _not_ a guarantee that the pointer is actually
3670 * part of the slab cache in question, but it at least
3671 * validates that the pointer can be dereferenced and
3672 * looks half-way sane.
3673 *
3674 * Currently only used for dentry validation.
3675 */
3676int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3677{
3678 unsigned long size = cachep->buffer_size;
3679 struct page *page;
3680
3681 if (unlikely(!kern_ptr_validate(ptr, size)))
3682 goto out;
3683 page = virt_to_page(ptr);
3684 if (unlikely(!PageSlab(page)))
3685 goto out;
3686 if (unlikely(page_get_cache(page) != cachep))
3687 goto out;
3688 return 1;
3689out:
3690 return 0;
3691}
3692
3693#ifdef CONFIG_NUMA 3663#ifdef CONFIG_NUMA
3694void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3664void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3695{ 3665{
diff --git a/mm/slob.c b/mm/slob.c
index 617b6d6c42c7..3588eaaef726 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -678,11 +678,6 @@ int kmem_cache_shrink(struct kmem_cache *d)
678} 678}
679EXPORT_SYMBOL(kmem_cache_shrink); 679EXPORT_SYMBOL(kmem_cache_shrink);
680 680
681int kmem_ptr_validate(struct kmem_cache *a, const void *b)
682{
683 return 0;
684}
685
686static unsigned int slob_ready __read_mostly; 681static unsigned int slob_ready __read_mostly;
687 682
688int slab_is_available(void) 683int slab_is_available(void)
diff --git a/mm/slub.c b/mm/slub.c
index bec0e355fbad..a2fe1727ed85 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1917,17 +1917,6 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
1917} 1917}
1918EXPORT_SYMBOL(kmem_cache_free); 1918EXPORT_SYMBOL(kmem_cache_free);
1919 1919
1920/* Figure out on which slab page the object resides */
1921static struct page *get_object_page(const void *x)
1922{
1923 struct page *page = virt_to_head_page(x);
1924
1925 if (!PageSlab(page))
1926 return NULL;
1927
1928 return page;
1929}
1930
1931/* 1920/*
1932 * Object placement in a slab is made very easy because we always start at 1921 * Object placement in a slab is made very easy because we always start at
1933 * offset 0. If we tune the size of the object to the alignment then we can 1922 * offset 0. If we tune the size of the object to the alignment then we can
@@ -2386,35 +2375,6 @@ error:
2386} 2375}
2387 2376
2388/* 2377/*
2389 * Check if a given pointer is valid
2390 */
2391int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2392{
2393 struct page *page;
2394
2395 if (!kern_ptr_validate(object, s->size))
2396 return 0;
2397
2398 page = get_object_page(object);
2399
2400 if (!page || s != page->slab)
2401 /* No slab or wrong slab */
2402 return 0;
2403
2404 if (!check_valid_pointer(s, page, object))
2405 return 0;
2406
2407 /*
2408 * We could also check if the object is on the slabs freelist.
2409 * But this would be too expensive and it seems that the main
2410 * purpose of kmem_ptr_valid() is to check if the object belongs
2411 * to a certain slab.
2412 */
2413 return 1;
2414}
2415EXPORT_SYMBOL(kmem_ptr_validate);
2416
2417/*
2418 * Determine the size of a slab object 2378 * Determine the size of a slab object
2419 */ 2379 */
2420unsigned int kmem_cache_size(struct kmem_cache *s) 2380unsigned int kmem_cache_size(struct kmem_cache *s)
diff --git a/mm/util.c b/mm/util.c
index 73dac81e9f78..f126975ef23e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,27 +186,6 @@ void kzfree(const void *p)
186} 186}
187EXPORT_SYMBOL(kzfree); 187EXPORT_SYMBOL(kzfree);
188 188
189int kern_ptr_validate(const void *ptr, unsigned long size)
190{
191 unsigned long addr = (unsigned long)ptr;
192 unsigned long min_addr = PAGE_OFFSET;
193 unsigned long align_mask = sizeof(void *) - 1;
194
195 if (unlikely(addr < min_addr))
196 goto out;
197 if (unlikely(addr > (unsigned long)high_memory - size))
198 goto out;
199 if (unlikely(addr & align_mask))
200 goto out;
201 if (unlikely(!kern_addr_valid(addr)))
202 goto out;
203 if (unlikely(!kern_addr_valid(addr + size - 1)))
204 goto out;
205 return 1;
206out:
207 return 0;
208}
209
210/* 189/*
211 * strndup_user - duplicate an existing string from user space 190 * strndup_user - duplicate an existing string from user space
212 * @s: The string to duplicate 191 * @s: The string to duplicate
diff --git a/net/socket.c b/net/socket.c
index c1663c0ff3d3..ccc576a6a508 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -262,6 +262,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
262} 262}
263 263
264 264
265
265static void wq_free_rcu(struct rcu_head *head) 266static void wq_free_rcu(struct rcu_head *head)
266{ 267{
267 struct socket_wq *wq = container_of(head, struct socket_wq, rcu); 268 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
@@ -360,14 +361,14 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
360 if (unlikely(fd < 0)) 361 if (unlikely(fd < 0))
361 return fd; 362 return fd;
362 363
363 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 364 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
364 if (unlikely(!path.dentry)) { 365 if (unlikely(!path.dentry)) {
365 put_unused_fd(fd); 366 put_unused_fd(fd);
366 return -ENOMEM; 367 return -ENOMEM;
367 } 368 }
368 path.mnt = mntget(sock_mnt); 369 path.mnt = mntget(sock_mnt);
369 370
370 path.dentry->d_op = &sockfs_dentry_operations; 371 d_set_d_op(path.dentry, &sockfs_dentry_operations);
371 d_instantiate(path.dentry, SOCK_INODE(sock)); 372 d_instantiate(path.dentry, SOCK_INODE(sock));
372 SOCK_INODE(sock)->i_fop = &socket_file_ops; 373 SOCK_INODE(sock)->i_fop = &socket_file_ops;
373 374
@@ -2390,6 +2391,8 @@ EXPORT_SYMBOL(sock_unregister);
2390 2391
2391static int __init sock_init(void) 2392static int __init sock_init(void)
2392{ 2393{
2394 int err;
2395
2393 /* 2396 /*
2394 * Initialize sock SLAB cache. 2397 * Initialize sock SLAB cache.
2395 */ 2398 */
@@ -2406,8 +2409,15 @@ static int __init sock_init(void)
2406 */ 2409 */
2407 2410
2408 init_inodecache(); 2411 init_inodecache();
2409 register_filesystem(&sock_fs_type); 2412
2413 err = register_filesystem(&sock_fs_type);
2414 if (err)
2415 goto out_fs;
2410 sock_mnt = kern_mount(&sock_fs_type); 2416 sock_mnt = kern_mount(&sock_fs_type);
2417 if (IS_ERR(sock_mnt)) {
2418 err = PTR_ERR(sock_mnt);
2419 goto out_mount;
2420 }
2411 2421
2412 /* The real protocol initialization is performed in later initcalls. 2422 /* The real protocol initialization is performed in later initcalls.
2413 */ 2423 */
@@ -2420,7 +2430,13 @@ static int __init sock_init(void)
2420 skb_timestamping_init(); 2430 skb_timestamping_init();
2421#endif 2431#endif
2422 2432
2423 return 0; 2433out:
2434 return err;
2435
2436out_mount:
2437 unregister_filesystem(&sock_fs_type);
2438out_fs:
2439 goto out;
2424} 2440}
2425 2441
2426core_initcall(sock_init); /* early initcall */ 2442core_initcall(sock_init); /* early initcall */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 10a17a37ec4e..09f01f41e55a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb)
162} 162}
163 163
164static void 164static void
165rpc_destroy_inode(struct inode *inode) 165rpc_i_callback(struct rcu_head *head)
166{ 166{
167 struct inode *inode = container_of(head, struct inode, i_rcu);
168 INIT_LIST_HEAD(&inode->i_dentry);
167 kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); 169 kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
168} 170}
169 171
172static void
173rpc_destroy_inode(struct inode *inode)
174{
175 call_rcu(&inode->i_rcu, rpc_i_callback);
176}
177
170static int 178static int
171rpc_pipe_open(struct inode *inode, struct file *filp) 179rpc_pipe_open(struct inode *inode, struct file *filp)
172{ 180{
@@ -430,7 +438,7 @@ void rpc_put_mount(void)
430} 438}
431EXPORT_SYMBOL_GPL(rpc_put_mount); 439EXPORT_SYMBOL_GPL(rpc_put_mount);
432 440
433static int rpc_delete_dentry(struct dentry *dentry) 441static int rpc_delete_dentry(const struct dentry *dentry)
434{ 442{
435 return 1; 443 return 1;
436} 444}
@@ -583,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent,
583 } 591 }
584 } 592 }
585 if (!dentry->d_inode) 593 if (!dentry->d_inode)
586 dentry->d_op = &rpc_dentry_operations; 594 d_set_d_op(dentry, &rpc_dentry_operations);
587out_err: 595out_err:
588 return dentry; 596 return dentry;
589} 597}
diff --git a/security/security.c b/security/security.c
index e5fb07a3052d..739e40362f44 100644
--- a/security/security.c
+++ b/security/security.c
@@ -513,6 +513,15 @@ int security_inode_permission(struct inode *inode, int mask)
513 return security_ops->inode_permission(inode, mask); 513 return security_ops->inode_permission(inode, mask);
514} 514}
515 515
516int security_inode_exec_permission(struct inode *inode, unsigned int flags)
517{
518 if (unlikely(IS_PRIVATE(inode)))
519 return 0;
520 if (flags)
521 return -ECHILD;
522 return security_ops->inode_permission(inode, MAY_EXEC);
523}
524
516int security_inode_setattr(struct dentry *dentry, struct iattr *attr) 525int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
517{ 526{
518 if (unlikely(IS_PRIVATE(dentry->d_inode))) 527 if (unlikely(IS_PRIVATE(dentry->d_inode)))
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 073fd5b0a53a..43deac219491 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1145,24 +1145,28 @@ static void sel_remove_entries(struct dentry *de)
1145{ 1145{
1146 struct list_head *node; 1146 struct list_head *node;
1147 1147
1148 spin_lock(&dcache_lock); 1148 spin_lock(&de->d_lock);
1149 node = de->d_subdirs.next; 1149 node = de->d_subdirs.next;
1150 while (node != &de->d_subdirs) { 1150 while (node != &de->d_subdirs) {
1151 struct dentry *d = list_entry(node, struct dentry, d_u.d_child); 1151 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
1152
1153 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
1152 list_del_init(node); 1154 list_del_init(node);
1153 1155
1154 if (d->d_inode) { 1156 if (d->d_inode) {
1155 d = dget_locked(d); 1157 dget_dlock(d);
1156 spin_unlock(&dcache_lock); 1158 spin_unlock(&de->d_lock);
1159 spin_unlock(&d->d_lock);
1157 d_delete(d); 1160 d_delete(d);
1158 simple_unlink(de->d_inode, d); 1161 simple_unlink(de->d_inode, d);
1159 dput(d); 1162 dput(d);
1160 spin_lock(&dcache_lock); 1163 spin_lock(&de->d_lock);
1161 } 1164 } else
1165 spin_unlock(&d->d_lock);
1162 node = de->d_subdirs.next; 1166 node = de->d_subdirs.next;
1163 } 1167 }
1164 1168
1165 spin_unlock(&dcache_lock); 1169 spin_unlock(&de->d_lock);
1166} 1170}
1167 1171
1168#define BOOL_DIR_NAME "booleans" 1172#define BOOL_DIR_NAME "booleans"
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 1d0bf8fa1922..d1e05b047715 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <net/sock.h> 15#include <net/sock.h>
16#include "common.h" 16#include "common.h"
17#include "../../fs/internal.h"
17 18
18/** 19/**
19 * tomoyo_encode: Convert binary string to ascii string. 20 * tomoyo_encode: Convert binary string to ascii string.